Created
February 12, 2025 19:11
-
-
Save egonw/9a8f8f0266658be6a616d84f7e2a11d5 to your computer and use it in GitHub Desktop.
Extract chemical from a Beilstein Journal of Organic Chemistry article
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright (c) 2022-2024 Egon Willighagen <[email protected]> | |
// | |
// How to cite: https://joss.theoj.org/papers/10.21105/joss.02558 | |
// | |
// GPL v3 | |
@Grab(group='io.github.egonw.bacting', module='managers-rdf', version='1.0.4') | |
@Grab(group='io.github.egonw.bacting', module='managers-ui', version='1.0.4') | |
@Grab(group='io.github.egonw.bacting', module='net.bioclipse.managers.jsoup', version='1.0.4') | |
bioclipse = new net.bioclipse.managers.BioclipseManager("."); | |
rdf = new net.bioclipse.managers.RDFManager("."); | |
jsoup = new net.bioclipse.managers.JSoupManager("."); | |
articles = [ | |
args[0] | |
] | |
kg = rdf.createInMemoryStore() | |
for (article in articles) { | |
htmlContent = bioclipse.download(article) | |
htmlDom = jsoup.parseString(htmlContent) | |
// application/ld+json | |
bioschemasSections = jsoup.select(htmlDom, "script[type='application/ld+json']"); | |
for (section in bioschemasSections) { | |
bioschemasJSON = section.html() | |
rdf.importFromString(kg, bioschemasJSON, "JSON-LD") | |
} | |
} | |
turtle = rdf.asTurtle(kg); | |
println "#" + rdf.size(kg) + " triples detected in the JSON-LD" | |
// println turtle | |
sparql = """ | |
PREFIX schema: <http://schema.org/> | |
SELECT ?entity ?inchikey ?smiles WHERE { | |
?entity a schema:MolecularEntity . | |
OPTIONAL { ?entity schema:inChIKey ?inchikey } | |
OPTIONAL { ?entity schema:smiles ?smiles } | |
} | |
""" | |
results = rdf.sparql(kg, sparql) | |
for (i=1;i<=results.rowCount;i++) { | |
println "${results.get(i, "inchikey")}\t${results.get(i, "smiles")}" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment