Skip to content

Instantly share code, notes, and snippets.

@dmentipl
Last active July 10, 2020 04:15
Show Gist options
  • Save dmentipl/fb3ae46b42f7e3b773d906574a3e7631 to your computer and use it in GitHub Desktop.
Save dmentipl/fb3ae46b42f7e3b773d906574a3e7631 to your computer and use it in GitHub Desktop.
Make a paper bibtex from citations contained within the .tex of the paper
#!/usr/bin/env python
"""Generate bibtex from citations in tex using a master bibtex.
It uses the following regex to find citations in the tex file:
regex = re.compile(r'\\|no|cite[a-z]*\*?{(.*?)}')
This regex matches citations like \cite{}, \citet{}, \citep{},
\citeauthor{}, \citet*{}, and so on, and returns the captured citekey.
This script requires the bibtool program. Install with, for example,
Homebrew or APT.
Daniel Mentiplay, 2019.
"""
import argparse
import pathlib
import re
import subprocess
import tempfile
BIBTOOL_OPTIONS = tempfile.NamedTemporaryFile(delete=True)
with open(BIBTOOL_OPTIONS.name, 'w') as f:
f.writelines(
'\n'.join(
[
'delete.field = "bdsk-file-1"',
'delete.field = "bdsk-url-1"',
'delete.field = "bdsk-url-2"',
'delete.field = "bdsk-url-3"',
'delete.field = "date-added"',
'delete.field = "date-modified"',
'preserve.key.case = on',
'print.align.key = 0',
'print.indent = 2',
'print.align = 0',
'print.line.length = 9999',
'print.wide.equal = on',
'print.equal.right = off',
'print.use.tab = off',
]
)
)
class CitationNotFoundError(Exception):
pass
def find_citations_in_texfile(input_tex_file):
regex = re.compile(r'\\|no|cite[a-z]*\*?{(.*?)}')
with open(input_tex_file, 'r') as tex_file:
lines = tex_file.readlines()
citations = list()
for line in lines:
matches = regex.findall(line)
if matches:
for match in matches:
if ',' in match:
submatches = [m.strip() for m in match.split(',')]
for submatch in submatches:
citations.append(submatch.strip())
else:
citations.append(match.strip())
citations_keys = sorted(set(citations))
if '' in citations_keys:
citations_keys.remove('')
return citations_keys
def write_reduced_bibtex(citations, input_bibtex_file, output_bibtex_file=None):
if output_bibtex_file is None:
output_bibtex_file = 'references.bib'
with open(output_bibtex_file, 'w+') as bibtex_file:
for citation in citations:
print(f'Adding {citation}')
result = subprocess.run(
[
'bibtool',
'-q',
'-r',
BIBTOOL_OPTIONS.name,
'-X',
'^' + citation + '$',
input_bibtex_file,
],
encoding='utf-8',
stdout=subprocess.PIPE,
)
if result.stdout == '':
print(f'Cannot find citation key: {citation}')
raise CitationNotFoundError()
bibtex_file.write(result.stdout)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Make .bib from .tex citations')
parser.add_argument(
'-t', '--tex-file', required=True, type=str, help='input ".tex" file name'
)
parser.add_argument(
'-b', '--bib-file', required=True, type=str, help='input ".bib" file name'
)
parser.add_argument(
'-o', '--out-file', required=False, type=str, help='output ".bib" file name'
)
args = parser.parse_args()
input_texfile = pathlib.Path(args.tex_file)
input_bibtex = pathlib.Path(args.bib_file)
if args.out_file is not None:
output_bibtex = pathlib.Path(args.out_file)
else:
output_bibtex = 'references.bib'
if not input_texfile.exists():
raise FileNotFoundError(f'{input_texfile} not found')
# Get citations from .tex file.
print(f'Finding citations in {input_texfile}')
citations = find_citations_in_texfile(input_texfile)
# Write reduced .bib file.
print(f'Writing {output_bibtex} file with citations from {input_bibtex}')
write_reduced_bibtex(citations, input_bibtex, output_bibtex)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment