Last active
July 10, 2020 04:15
-
-
Save dmentipl/fb3ae46b42f7e3b773d906574a3e7631 to your computer and use it in GitHub Desktop.
Make a paper bibtex from citations contained within the .tex of the paper
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""Generate bibtex from citations in tex using a master bibtex. | |
It uses the following regex to find citations in the tex file: | |
regex = re.compile(r'\\|no|cite[a-z]*\*?{(.*?)}') | |
This regex matches citations like \cite{}, \citet{}, \citep{}, | |
\citeauthor{}, \citet*{}, and so on, and returns the captured citekey. | |
This script requires the bibtool program. Install with, for example, | |
Homebrew or APT. | |
Daniel Mentiplay, 2019. | |
""" | |
import argparse | |
import pathlib | |
import re | |
import subprocess | |
import tempfile | |
BIBTOOL_OPTIONS = tempfile.NamedTemporaryFile(delete=True) | |
with open(BIBTOOL_OPTIONS.name, 'w') as f: | |
f.writelines( | |
'\n'.join( | |
[ | |
'delete.field = "bdsk-file-1"', | |
'delete.field = "bdsk-url-1"', | |
'delete.field = "bdsk-url-2"', | |
'delete.field = "bdsk-url-3"', | |
'delete.field = "date-added"', | |
'delete.field = "date-modified"', | |
'preserve.key.case = on', | |
'print.align.key = 0', | |
'print.indent = 2', | |
'print.align = 0', | |
'print.line.length = 9999', | |
'print.wide.equal = on', | |
'print.equal.right = off', | |
'print.use.tab = off', | |
] | |
) | |
) | |
class CitationNotFoundError(Exception): | |
pass | |
def find_citations_in_texfile(input_tex_file): | |
regex = re.compile(r'\\|no|cite[a-z]*\*?{(.*?)}') | |
with open(input_tex_file, 'r') as tex_file: | |
lines = tex_file.readlines() | |
citations = list() | |
for line in lines: | |
matches = regex.findall(line) | |
if matches: | |
for match in matches: | |
if ',' in match: | |
submatches = [m.strip() for m in match.split(',')] | |
for submatch in submatches: | |
citations.append(submatch.strip()) | |
else: | |
citations.append(match.strip()) | |
citations_keys = sorted(set(citations)) | |
if '' in citations_keys: | |
citations_keys.remove('') | |
return citations_keys | |
def write_reduced_bibtex(citations, input_bibtex_file, output_bibtex_file=None): | |
if output_bibtex_file is None: | |
output_bibtex_file = 'references.bib' | |
with open(output_bibtex_file, 'w+') as bibtex_file: | |
for citation in citations: | |
print(f'Adding {citation}') | |
result = subprocess.run( | |
[ | |
'bibtool', | |
'-q', | |
'-r', | |
BIBTOOL_OPTIONS.name, | |
'-X', | |
'^' + citation + '$', | |
input_bibtex_file, | |
], | |
encoding='utf-8', | |
stdout=subprocess.PIPE, | |
) | |
if result.stdout == '': | |
print(f'Cannot find citation key: {citation}') | |
raise CitationNotFoundError() | |
bibtex_file.write(result.stdout) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(description='Make .bib from .tex citations') | |
parser.add_argument( | |
'-t', '--tex-file', required=True, type=str, help='input ".tex" file name' | |
) | |
parser.add_argument( | |
'-b', '--bib-file', required=True, type=str, help='input ".bib" file name' | |
) | |
parser.add_argument( | |
'-o', '--out-file', required=False, type=str, help='output ".bib" file name' | |
) | |
args = parser.parse_args() | |
input_texfile = pathlib.Path(args.tex_file) | |
input_bibtex = pathlib.Path(args.bib_file) | |
if args.out_file is not None: | |
output_bibtex = pathlib.Path(args.out_file) | |
else: | |
output_bibtex = 'references.bib' | |
if not input_texfile.exists(): | |
raise FileNotFoundError(f'{input_texfile} not found') | |
# Get citations from .tex file. | |
print(f'Finding citations in {input_texfile}') | |
citations = find_citations_in_texfile(input_texfile) | |
# Write reduced .bib file. | |
print(f'Writing {output_bibtex} file with citations from {input_bibtex}') | |
write_reduced_bibtex(citations, input_bibtex, output_bibtex) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment