#!/usr/bin/env python3 # -*- coding: utf-8 -*- """Script to facilitate the import of a Readcube Papers 3 library into Zotero __Purpose of this script__ If you export your Readcube (Mekentosj) Papers3 library as a BibTeX file, the file paths to the PDFs are not formatted correctly for Zotero to import them. The specific issues include that: * Papers3 does not export the file paths in a way that Zotero can understand. * Papers3 does not export the paths to supplementary files, so only the primary PDF is imported into Zotero. * Papers3 will export the primary PDF multiple times so you'll end up with multiple copies of the same PDF in Zotero. * Papers3 includes superfluous supplementary files that you typically don't want to import into Zotero (e.g. *.html and *.webarchive files). This script will take the BibTeX file you exported from Papers3 and modify the file paths so that they can be imported into Zotero. __Usage__ This script takes as input a BibTeX library exported from readcube/mekentosj Papers3 and outputs a BibTeX library for Zotero to import. The script preserves your Papers citekeys, adds supplementary files from the Papers3 Library, removes duplicate links to PDFs, and removes extraneous *.html and *.webarchive files that are often created by importing articles into Paper from a web browser. __Instructions__ * Make sure to have Better BibTeX pre-installed to Zotero if you want to preserve the Papers citekeys. * Export your Papers3 library as a *.bib file. Export > BibTeX Library Make sure to set the "BibTex Record" option to "Complete". This will cause papers to include the paths to the main PDF (or whatever) file in the *.bib export * Run this script with python 3.7 or higher to generate the file, 'zotero_import.bib', in the same location as the BibTeX library export. * You can pass the script the paths to the Papers3 library and the BibTeX library export as command line arguments, e.g.: python Papers3_to_Zotero.py --papers "~/Documents/Library.papers3" --bibtex "~/Desktop/Library.bib" * Or you can modify the script by updating the 'papers_lib_hardcoded' and 'bibtex_lib_hardcoded' variables with the paths to your Papers3 library and the BibTeX library that you just exported. E.g.: papers_lib_hardcoded = "~/Documents/User Library/Library.papers3" ### Path to Papers3 Library bibtex_lib_hardcoded = "~/Desktop/full_library_export.bib" ### Path to Papers BibTeX library export * Running the script will generate a new BibTeX file, 'zotero_import.bib', in the same location as the BibTeX library export. * Import the 'zotero_import.bib' file that gets generated with Zotero. * Be sure to check the 'Import errors found:' file if Zotero generates one (if it exists, it will be in whatever folder you imported the library to; sort by title to find it). * Also check that special characters in titles and journal names were imported correctly. Sometimes '{\&}' in the zotero_import.bib will be imported as '<span class="nocase">&</span>'. I'm not sure why or when this happens. You can search for "</span>" to check. __NOTE__ The Collections groupings are not preserved with this method. This is one way to manually get your Papers3 Collections into Zotero after following the above instructions: * Export each collection as a BibTex library ("Export" set to "Selected Collection" and "BibTex Record" set to "Standard"). This will prevent any file paths from being included in the *.bib file. * Import that *.bib file directly to Zotero with the option to "Place imported collections and items into new collection" selected. * Then merge the duplicate records. That will give you a new collection with links to right papers from your Zotero library. * In this strategy, you have to do that for each one of your Papers3 Collections. Not ideal but maybe tolerable. __Author__ Dae Houlihan __Source__ https://gist.github.com/daeh/abc6d46d897b58a657699fa1a408573e """ import argparse import re import sys from pathlib import Path from warnings import warn def main(papers=None, bibtex=None): ################################################ ### Update these paths or pass via command line: ################################################ ### Path to Papers3 Library ### papers_lib_hardcoded = "~/Documents/Library.papers3" ### Path to the BibTeX export of the Papers3 Library ### bibtex_lib_hardcoded = "~/Desktop/library.bib" ################################################ papers_lib = papers_lib_hardcoded if papers is None else papers bibtex_lib = bibtex_lib_hardcoded if bibtex is None else bibtex papers_library = Path(papers_lib).expanduser() bibtex_library = Path(bibtex_lib).expanduser() papers_library_string = str(papers_library).replace(r"(", r"\(").replace(r")", r"\)") + r"/" if papers_library_string[-9:] != ".papers3/": raise Exception( f"The variable 'papers_library' should end in with '.papers3' but is rather: \n\t{str(papers_library)}" ) if not papers_library.is_dir(): raise Exception( f"The path you provided to the Papers3 library does not seem to exist or is not a directory: \n\t{str(papers_library)}" ) if not (bibtex_library.is_file() and bibtex_library.suffix == ".bib"): raise Exception( f"The path you provided to the BibTeX Library file you exported from Papers3 does not seem to exist or is not '.bib' file: \n\t{str(bibtex_library)}" ) out, missing = list(), list() with open(bibtex_library, "r") as btlib: for line in btlib: if line.startswith("file = {"): templine = re.sub(r"^file = {{(.*?)}},?", r"file = {\1},", line, flags=re.M) newline = re.sub(r"^file = {(.*?);(\1)},?", r"file = {\1},", templine, flags=re.M) assert ";" not in newline # assert that this line references only one file search_str = r"^file = {.*?:" + papers_library_string + r"(.*?\..*?):(.*?/.*?)},?" filepath_relative = re.search(search_str, newline) assert isinstance( filepath_relative, re.Match ), f"Unable to match regex expression:: \n{search_str} \nwith entry from BibTex:: \n{newline}" primary_file_path = papers_library / filepath_relative.group(1) if not primary_file_path.is_file(): warn(f"The linked file was not found: {primary_file_path}", UserWarning) missing.append(primary_file_path) supp_files = list() for dir_extra in ["Supplemental", "Media"]: supp_dir = primary_file_path.parents[0] / dir_extra if supp_dir.exists(): for x in supp_dir.iterdir(): if ( x.is_file() and x.suffix not in [".html", ".webarchive"] and str(x) != str(primary_file_path) ): supp_files.append(x) if len(supp_files) > 0: search_str_supp = ( r"(^file = {.*?:" + papers_library_string + r".*?\..*?:application/.*?)},?" ) primary_line = re.search(search_str_supp, newline) assert isinstance( primary_line, re.Match ), f"Unable to match regex expression:: \n{search_str_supp} \nwith entry from BibTex:: \n{newline}" newline = primary_line.group(1) for x in supp_files: print(f"adding supplementary file for {x.name}") newline += f';{x.with_suffix("").name + " Supp" + x.suffix}:{x}:application/{x.suffix}' newline += "},\n" out.append(newline) else: out.append(line) ### New BibTeX record to import into Zotero modified_lib = bibtex_library.parents[0] / "zotero_import.bib" with open(modified_lib, "w", encoding="utf-8") as outfile: for item in out: outfile.write(item) if missing: print("\n\nList of missing files::\n") for mf in missing: print(mf) print( f"\n\nScript completed but {len(missing)} files referenced in the BibTeX library were not located. They are listed above." ) else: print( f"\n\nScript appears to have completed successfully. You can now import this file into Zotero (make sure Better BibTeX is already installed): \n\t{str(modified_lib)}" ) return 0 def _cli(): parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter, argument_default=argparse.SUPPRESS ) parser.add_argument("-p", "--papers", help="Path to Papers3 Library") parser.add_argument("-b", "--bibtex", help="Path to the BibTeX export") args = parser.parse_args() return vars(args) if __name__ == "__main__": sys.exit(main(**_cli()))