Created
August 5, 2021 16:07
-
-
Save jermnelson/116b8923bd148e4e509852b199df34c5 to your computer and use it in GitHub Desktop.
Using GNDRD API to send PDFs or full-text and returning list of matches.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"id": "c63e4345-190d-420f-b8d4-c6d5335f1371", | |
"metadata": {}, | |
"source": [ | |
"# Using Global Names Recognition and Discovery API\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"id": "b7873594-fd7e-46e6-8657-f77dced0484a", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"%reload_ext autoreload\n", | |
"%autoreload 2\n", | |
"%matplotlib inline\n", | |
"\n", | |
"import csv\n", | |
"import datetime\n", | |
"import pathlib\n", | |
"import requests\n", | |
"\n", | |
"import lxml.etree as etree\n", | |
"\n", | |
"gnrd_api_url = \"http://gnrd.globalnames.org/name_finder.json\"\n", | |
"papers_pdf = pathlib.Path(\"/Volumes/GoogleDrive/Shared drives/SUL AI 2020-2021/Project - Species Occurrences/papers_pdf\")\n", | |
"papers_tei = pathlib.Path(\"/Volumes/GoogleDrive/Shared drives/SUL AI 2020-2021/Project - Species Occurrences/papers_tei\")\n", | |
"TEI = {\"tei\": \"http://www.tei-c.org/ns/1.0\"}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 85, | |
"id": "97b4b723-66c8-4ecb-8724-61934f3467f4", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"result1 = requests.post(gnrd_api_url, \n", | |
" files={ \"file\": first_paper.read_bytes()})" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 86, | |
"id": "aa763b2a-ec55-49ff-b4d6-725720d20dda", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"200" | |
] | |
}, | |
"execution_count": 86, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"result1.status_code" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 93, | |
"id": "a6113365-8218-4376-b63c-a6d4edb69897", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[{'verbatim': '(Myoxocephalus polyacanthocephalus)',\n", | |
" 'scientificName': 'Myoxocephalus polyacanthocephalus',\n", | |
" 'offsetStart': 93,\n", | |
" 'offsetEnd': 128},\n", | |
" {'verbatim': 'Myoxocephalus',\n", | |
" 'scientificName': 'Myoxocephalus',\n", | |
" 'offsetStart': 519,\n", | |
" 'offsetEnd': 532},\n", | |
" {'verbatim': '(M.\\npolyacanthocephalus)',\n", | |
" 'scientificName': 'M. polyacanthocephalus',\n", | |
" 'offsetStart': 1347,\n", | |
" 'offsetEnd': 1371},\n", | |
" {'verbatim': 'Leonardo',\n", | |
" 'scientificName': 'Leonardo',\n", | |
" 'offsetStart': 1590,\n", | |
" 'offsetEnd': 1598},\n", | |
" {'verbatim': 'Myoxocephalus polyacanthocephalus.',\n", | |
" 'scientificName': 'Myoxocephalus polyacanthocephalus',\n", | |
" 'offsetStart': 5667,\n", | |
" 'offsetEnd': 5701},\n", | |
" {'verbatim': '(Myoxocephalus polyacanthocephalus',\n", | |
" 'scientificName': 'Myoxocephalus polyacanthocephalus',\n", | |
" 'offsetStart': 5757,\n", | |
" 'offsetEnd': 5791},\n", | |
" {'verbatim': 'Leonardo',\n", | |
" 'scientificName': 'Leonardo',\n", | |
" 'offsetStart': 6354,\n", | |
" 'offsetEnd': 6362},\n", | |
" {'verbatim': 'M. polyacanthocephalus',\n", | |
" 'scientificName': 'M. polyacanthocephalus',\n", | |
" 'offsetStart': 7410,\n", | |
" 'offsetEnd': 7432},\n", | |
" {'verbatim': 'Leonardo',\n", | |
" 'scientificName': 'Leonardo',\n", | |
" 'offsetStart': 7491,\n", | |
" 'offsetEnd': 7499},\n", | |
" {'verbatim': '(Ammodytes',\n", | |
" 'scientificName': 'Ammodytes',\n", | |
" 'offsetStart': 8639,\n", | |
" 'offsetEnd': 8649},\n", | |
" {'verbatim': 'M.\\npolyacanthocephalus',\n", | |
" 'scientificName': 'M. polyacanthocephalus',\n", | |
" 'offsetStart': 18396,\n", | |
" 'offsetEnd': 18418},\n", | |
" {'verbatim': 'Esox lucius.',\n", | |
" 'scientificName': 'Esox lucius',\n", | |
" 'offsetStart': 33892,\n", | |
" 'offsetEnd': 33904},\n", | |
" {'verbatim': 'Paralichthys\\n\\nolivaceus',\n", | |
" 'scientificName': 'Paralichthys olivaceus',\n", | |
" 'offsetStart': 34092,\n", | |
" 'offsetEnd': 34115},\n", | |
" {'verbatim': '(Albula vulpes)',\n", | |
" 'scientificName': 'Albula vulpes',\n", | |
" 'offsetStart': 34272,\n", | |
" 'offsetEnd': 34287},\n", | |
" {'verbatim': 'Oncorhynchus\\n\\nketa',\n", | |
" 'scientificName': 'Oncorhynchus keta',\n", | |
" 'offsetStart': 34557,\n", | |
" 'offsetEnd': 34575},\n", | |
" {'verbatim': 'Quintana,',\n", | |
" 'scientificName': 'Quintana',\n", | |
" 'offsetStart': 35086,\n", | |
" 'offsetEnd': 35095}]" | |
] | |
}, | |
"execution_count": 93, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"result1.json()['names']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 97, | |
"id": "a4908b5a-e900-4825-b15a-0f097021cea4", | |
"metadata": { | |
"collapsed": true, | |
"jupyter": { | |
"outputs_hidden": true | |
}, | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Started processing PDF reports with GNRD web services at 2021-06-04 15:43:59.930634\n", | |
"0...E34..50....E99.100.....150.....200.....250.....300.....350.....400.....450.....500.....550.....600.....650.....700.....750.....800.....850.....900.....950.....1000E1007.....1050.....1100.....1150.....1200...." | |
] | |
}, | |
{ | |
"ename": "KeyboardInterrupt", | |
"evalue": "", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-97-174075bb9b97>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mfilepath\u001b[0m \u001b[0;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpdf_directory\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0miterdir\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0mreport_name\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfilepath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\".\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 7\u001b[0;31m gnrd_result = requests.post(gnrd_api_url,\n\u001b[0m\u001b[1;32m 8\u001b[0m files={ \"file\": filepath.read_bytes() })\n\u001b[1;32m 9\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/02021/sul-dlss/labs/ml-env/lib/python3.8/site-packages/requests/api.py\u001b[0m in \u001b[0;36mpost\u001b[0;34m(url, data, json, **kwargs)\u001b[0m\n\u001b[1;32m 117\u001b[0m \"\"\"\n\u001b[1;32m 118\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 119\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'post'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 120\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 121\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/02021/sul-dlss/labs/ml-env/lib/python3.8/site-packages/requests/api.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(method, url, **kwargs)\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[0;31m# cases, and look like a memory leak in others.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0msessions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSession\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 61\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 62\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/02021/sul-dlss/labs/ml-env/lib/python3.8/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 540\u001b[0m }\n\u001b[1;32m 541\u001b[0m \u001b[0msend_kwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 542\u001b[0;31m \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0msend_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 543\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 544\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/02021/sul-dlss/labs/ml-env/lib/python3.8/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 653\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 654\u001b[0m \u001b[0;31m# Send the request\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 655\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0madapter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 656\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 657\u001b[0m \u001b[0;31m# Total elapsed time of the request (approximately)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/02021/sul-dlss/labs/ml-env/lib/python3.8/site-packages/requests/adapters.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 437\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 438\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mchunked\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 439\u001b[0;31m resp = conn.urlopen(\n\u001b[0m\u001b[1;32m 440\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 441\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/02021/sul-dlss/labs/ml-env/lib/python3.8/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 697\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 698\u001b[0m \u001b[0;31m# Make the request on the httplib connection object.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 699\u001b[0;31m httplib_response = self._make_request(\n\u001b[0m\u001b[1;32m 700\u001b[0m \u001b[0mconn\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 701\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/02021/sul-dlss/labs/ml-env/lib/python3.8/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 392\u001b[0m \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest_chunked\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mhttplib_request_kw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 393\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 394\u001b[0;31m \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mhttplib_request_kw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 395\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 396\u001b[0m \u001b[0;31m# We are swallowing BrokenPipeError (errno.EPIPE) since the server is\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/02021/sul-dlss/labs/ml-env/lib/python3.8/site-packages/urllib3/connection.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, body, headers)\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"user-agent\"\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0msix\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mensure_str\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlower\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 233\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"User-Agent\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_get_default_user_agent\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 234\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mHTTPConnection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mheaders\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 235\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 236\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mrequest_chunked\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, body, headers, encode_chunked)\u001b[0m\n\u001b[1;32m 1228\u001b[0m encode_chunked=False):\n\u001b[1;32m 1229\u001b[0m \u001b[0;34m\"\"\"Send a complete request to the server.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1230\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_send_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1231\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1232\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_send_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py\u001b[0m in \u001b[0;36m_send_request\u001b[0;34m(self, method, url, body, headers, encode_chunked)\u001b[0m\n\u001b[1;32m 1274\u001b[0m \u001b[0;31m# default charset of iso-8859-1.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1275\u001b[0m \u001b[0mbody\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_encode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'body'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1276\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mendheaders\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1277\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1278\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mgetresponse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py\u001b[0m in \u001b[0;36mendheaders\u001b[0;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[1;32m 1223\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1224\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mCannotSendHeader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1225\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_send_output\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage_body\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1226\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1227\u001b[0m def request(self, method, url, body=None, headers={}, *,\n", | |
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py\u001b[0m in \u001b[0;36m_send_output\u001b[0;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[1;32m 1041\u001b[0m \u001b[0mchunk\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf'{len(chunk):X}\\r\\n'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'ascii'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mchunk\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1042\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34mb'\\r\\n'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1043\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mchunk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1044\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1045\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mencode_chunked\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_http_vsn\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m11\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, data)\u001b[0m\n\u001b[1;32m 963\u001b[0m \u001b[0;32mreturn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 964\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 965\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msendall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 966\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 967\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcollections\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mabc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mIterable\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mKeyboardInterrupt\u001b[0m: " | |
] | |
} | |
], | |
"source": [ | |
"gnrd_names_papers = []\n", | |
"errors = []\n", | |
"start = datetime.datetime.utcnow()\n", | |
"print(f\"Started processing PDF reports with GNRD web services at {start}\")\n", | |
"for i,filepath in enumerate(pdf_directory.iterdir()):\n", | |
" report_name = filepath.name.split(\".\")[:-1][0]\n", | |
" gnrd_result = requests.post(gnrd_api_url,\n", | |
" files={ \"file\": filepath.read_bytes() })\n", | |
"\n", | |
" if gnrd_result.status_code < 400:\n", | |
" retrieved_on = datetime.datetime.utcnow().isoformat()\n", | |
" for name in gnrd_result.json().get('names', []):\n", | |
" gnrd_names_papers.append( {\"report\": report_name, \n", | |
" \"scientificName\": name['scientificName'],\n", | |
" \"retrieved\": retrieved_on})\n", | |
" else:\n", | |
" errors.append({ \"report\": report_name, \"http_status\": gnrd_result.status_code})\n", | |
" print(f\"E{i}\", end=\"\")\n", | |
" if not i%10 and i > 0:\n", | |
" print(\".\", end=\"\")\n", | |
" if not i%50:\n", | |
" print(f\"{i}\", end=\"\")\n", | |
"end = datetime.datetime.utcnow()\n", | |
"print(f\"Finished processing at {end}, total time {(end-start).seconds / 60.} for {len(gnrd_names_papers)}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 99, | |
"id": "494c15aa-6129-43f3-b5d6-5b5a8743a856", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"43272" | |
] | |
}, | |
"execution_count": 99, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(gnrd_names_papers)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 100, | |
"id": "1ff40074-d4b8-42e3-b07f-446533da396c", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[{'report': ['fhl_2011_Ho_26600'], 'http_status': 500},\n", | |
" {'report': ['fhl_2011_Witt_25966'], 'http_status': 500},\n", | |
" {'report': ['hms_hg072nm6762'], 'http_status': 413}]" | |
] | |
}, | |
"execution_count": 100, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"errors" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 101, | |
"id": "c51e0d86-1799-4e6a-b8fd-0650c12f1d3c", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['hms_gr619jn4381']" | |
] | |
}, | |
"execution_count": 101, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"report_name" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 104, | |
"id": "bd393635-74a1-4a0f-b345-291583a03616", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'report': 'hms_hd008gc7852',\n", | |
" 'scientificName': 'Chthamalus dalli',\n", | |
" 'retrieved': '2021-06-04T23:06:25.344051'}" | |
] | |
}, | |
"execution_count": 104, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"gnrd_names_papers[-1]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 103, | |
"id": "3c241f59-6615-4fb4-9675-6b2b73380a0f", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for in gnrd_names_papers:\n", | |
" row['report'] = row['report'][0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 107, | |
"id": "4aeca82c-ae29-436e-b613-8172754f3c44", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import csv\n", | |
"with open(\"/Volumes/GoogleDrive/Shared drives/SUL AI 2020-2021/Project - Species Occurrences/data/gnrd_report.csv\",\n", | |
" 'w+', newline='') as fo:\n", | |
" field_names = ['report', 'scientificName', 'retrieved']\n", | |
" csv_writer = csv.DictWriter(fo, field_names)\n", | |
" \n", | |
" csv_writer.writeheader()\n", | |
" for row in gnrd_names_papers:\n", | |
" csv_writer.writerow(row)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 108, | |
"id": "63fde6aa-ce1c-452a-9ec9-a8f4e3d52a53", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1243" | |
] | |
}, | |
"execution_count": 108, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"i" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "d34a5761-5913-4b0d-8b4c-66b4200934eb", | |
"metadata": { | |
"collapsed": true, | |
"jupyter": { | |
"outputs_hidden": true | |
}, | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Started processing PDF reports with GNRD web services at 2021-06-25 15:04:16.417841\n" | |
] | |
}, | |
{ | |
"ename": "KeyboardInterrupt", | |
"evalue": "", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", | |
"\u001b[0;32m<ipython-input-3-df05c57cec09>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;32mcontinue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mreport_name\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfilepath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\".\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m gnrd_result = requests.post(gnrd_api_url,\n\u001b[0m\u001b[1;32m 10\u001b[0m files={ \"file\": filepath.read_bytes() })\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/02021/sul-dlss/labs/ml-env/lib/python3.8/site-packages/requests/api.py\u001b[0m in \u001b[0;36mpost\u001b[0;34m(url, data, json, **kwargs)\u001b[0m\n\u001b[1;32m 117\u001b[0m \"\"\"\n\u001b[1;32m 118\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 119\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'post'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mjson\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mjson\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 120\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 121\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/02021/sul-dlss/labs/ml-env/lib/python3.8/site-packages/requests/api.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(method, url, **kwargs)\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[0;31m# cases, and look like a memory leak in others.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0msessions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSession\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 61\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0msession\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 62\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/02021/sul-dlss/labs/ml-env/lib/python3.8/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)\u001b[0m\n\u001b[1;32m 540\u001b[0m }\n\u001b[1;32m 541\u001b[0m \u001b[0msend_kwargs\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msettings\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 542\u001b[0;31m \u001b[0mresp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mprep\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0msend_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 543\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 544\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresp\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/02021/sul-dlss/labs/ml-env/lib/python3.8/site-packages/requests/sessions.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, **kwargs)\u001b[0m\n\u001b[1;32m 653\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 654\u001b[0m \u001b[0;31m# Send the request\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 655\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0madapter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 656\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 657\u001b[0m \u001b[0;31m# Total elapsed time of the request (approximately)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/02021/sul-dlss/labs/ml-env/lib/python3.8/site-packages/requests/adapters.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, request, stream, timeout, verify, cert, proxies)\u001b[0m\n\u001b[1;32m 437\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 438\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mchunked\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 439\u001b[0;31m resp = conn.urlopen(\n\u001b[0m\u001b[1;32m 440\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 441\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/02021/sul-dlss/labs/ml-env/lib/python3.8/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36murlopen\u001b[0;34m(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)\u001b[0m\n\u001b[1;32m 697\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 698\u001b[0m \u001b[0;31m# Make the request on the httplib connection object.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 699\u001b[0;31m httplib_response = self._make_request(\n\u001b[0m\u001b[1;32m 700\u001b[0m \u001b[0mconn\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 701\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/02021/sul-dlss/labs/ml-env/lib/python3.8/site-packages/urllib3/connectionpool.py\u001b[0m in \u001b[0;36m_make_request\u001b[0;34m(self, conn, method, url, timeout, chunked, **httplib_request_kw)\u001b[0m\n\u001b[1;32m 392\u001b[0m \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest_chunked\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mhttplib_request_kw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 393\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 394\u001b[0;31m \u001b[0mconn\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mhttplib_request_kw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 395\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 396\u001b[0m \u001b[0;31m# We are swallowing BrokenPipeError (errno.EPIPE) since the server is\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m~/02021/sul-dlss/labs/ml-env/lib/python3.8/site-packages/urllib3/connection.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, body, headers)\u001b[0m\n\u001b[1;32m 232\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"user-agent\"\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0msix\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mensure_str\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mlower\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 233\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"User-Agent\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_get_default_user_agent\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 234\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mHTTPConnection\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrequest\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mheaders\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 235\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 236\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mrequest_chunked\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py\u001b[0m in \u001b[0;36mrequest\u001b[0;34m(self, method, url, body, headers, encode_chunked)\u001b[0m\n\u001b[1;32m 1228\u001b[0m encode_chunked=False):\n\u001b[1;32m 1229\u001b[0m \u001b[0;34m\"\"\"Send a complete request to the server.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1230\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_send_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1231\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1232\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_send_request\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0murl\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mheaders\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py\u001b[0m in \u001b[0;36m_send_request\u001b[0;34m(self, method, url, body, headers, encode_chunked)\u001b[0m\n\u001b[1;32m 1274\u001b[0m \u001b[0;31m# default charset of iso-8859-1.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1275\u001b[0m \u001b[0mbody\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_encode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'body'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1276\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mendheaders\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbody\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1277\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1278\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mgetresponse\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py\u001b[0m in \u001b[0;36mendheaders\u001b[0;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[1;32m 1223\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1224\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mCannotSendHeader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1225\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_send_output\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmessage_body\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencode_chunked\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mencode_chunked\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1226\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1227\u001b[0m def request(self, method, url, body=None, headers={}, *,\n", | |
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py\u001b[0m in \u001b[0;36m_send_output\u001b[0;34m(self, message_body, encode_chunked)\u001b[0m\n\u001b[1;32m 1041\u001b[0m \u001b[0mchunk\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34mf'{len(chunk):X}\\r\\n'\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mencode\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'ascii'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0mchunk\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1042\u001b[0m \u001b[0;34m+\u001b[0m \u001b[0;34mb'\\r\\n'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1043\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mchunk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1044\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1045\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mencode_chunked\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_http_vsn\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m11\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.8/lib/python3.8/http/client.py\u001b[0m in \u001b[0;36msend\u001b[0;34m(self, data)\u001b[0m\n\u001b[1;32m 963\u001b[0m \u001b[0;32mreturn\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 964\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 965\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msock\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msendall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 966\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 967\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcollections\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mabc\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mIterable\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", | |
"\u001b[0;31mKeyboardInterrupt\u001b[0m: " | |
] | |
} | |
], | |
"source": [ | |
"gnrd_names_papers_remaining = []\n", | |
"errors_remaining = []\n", | |
"start = datetime.datetime.utcnow()\n", | |
"print(f\"Started processing PDF reports with GNRD web services at {start}\")\n", | |
"for i,filepath in enumerate(pdf_directory.iterdir()):\n", | |
" if i <= 1243:\n", | |
" continue\n", | |
" report_name = filepath.name.split(\".\")[:-1][0]\n", | |
" gnrd_result = requests.post(gnrd_api_url,\n", | |
" files={ \"file\": filepath.read_bytes() })\n", | |
"\n", | |
" if gnrd_result.status_code < 400:\n", | |
" retrieved_on = datetime.datetime.utcnow().isoformat()\n", | |
" for name in gnrd_result.json().get('names', []):\n", | |
" gnrd_names_papers_remaining.append( {\"report\": report_name, \n", | |
" \"scientificName\": name['scientificName'],\n", | |
" \"retrieved\": retrieved_on})\n", | |
" else:\n", | |
" errors_remaining.append({ \"report\": report_name, \"http_status\": gnrd_result.status_code})\n", | |
" print(f\"E{i}\", end=\"\")\n", | |
" if not i%10 and i > 0:\n", | |
" print(\".\", end=\"\")\n", | |
" if not i%50:\n", | |
" print(f\"{i}\", end=\"\")\n", | |
"end = datetime.datetime.utcnow()\n", | |
"print(f\"Finished processing at {end}, total time {(end-start).seconds / 60.} for {len(gnrd_names_papers)}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"id": "666d59f4-0efb-4fe9-8b92-ef78a7c04bc0", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"taxa_report_names = []\n", | |
"for row in pdf_directory.iterdir():\n", | |
" taxa_report_names.append(row.name)\n", | |
"end = datetime.datetime.utcnow()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "ed80001a-f37d-43b0-95ca-1c882aba6c13", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1705" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(taxa_report_names)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"id": "0670442f-0145-43ec-8fdb-62965d9ad174", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import multiprocessing" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 19, | |
"id": "10d0d26e-8a18-4731-952f-75811fdbb1c8", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def retrieve_names(report):\n", | |
" result = requests.post(gnrd_api_url,\n", | |
" files={'file': report.read_bytes()})\n", | |
" if result.status_code < 400:\n", | |
" return { 'report': filename, \n", | |
" 'names': result.json().get('names', []),\n", | |
" 'retrieved': datetime.datetime.utcnow().isoformat() }\n", | |
" return { 'report': filename, 'names': [], 'error': result.text }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"id": "e0f181a6-e21e-47ff-9a2a-222e373400c2", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def get_gnrd_names():\n", | |
" PROCESSES = 4\n", | |
" start = datetime.datetime.utcnow()\n", | |
" with multiprocessing.Pool(PROCESSES) as pool:\n", | |
" results = []\n", | |
" for i,filepath in enumerate(pdf_directory.iterdir()):\n", | |
" results.append(pool.apply_async(retrieve_names, filepath))\n", | |
" if not i%10 and i > 0:\n", | |
" print(\".\", end=\"\")\n", | |
" if not i%50:\n", | |
" print(f\"{i}\", end=\"\")\n", | |
" end = datetime.datetime.utcnow()\n", | |
" return results" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"id": "9b207fce-93be-40e9-b154-5eb3e59eae6b", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"0.....50.....100.....150.....200.....250.....300.....350.....400.....450.....500.....550.....600.....650.....700.....750.....800.....850.....900.....950.....1000.....1050.....1100.....1150.....1200.....1250.....1300.....1350.....1400.....1450.....1500.....1550.....1600.....1650.....1700" | |
] | |
} | |
], | |
"source": [ | |
"results = get_gnrd_names()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 50, | |
"id": "250741bb-6663-453f-b6db-e5134a68fa54", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for i,row in enumerate(results):\n", | |
" if row.ready():\n", | |
" print(i)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"id": "9a73c93a-e7be-4090-bbbb-390d7f97cf23", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"502" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"1705-1203" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"id": "b5f2155b-f186-459e-a62e-d2808de22f86", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"1539" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"1371 + 168" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 59, | |
"id": "6d5a7e20-6dc5-4933-be4a-e5a31b9b1c85", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Finished at 2021-06-28 20:02:29.528724 total time 0.0\n" | |
] | |
} | |
], | |
"source": [ | |
"start = datetime.datetime.utcnow()\n", | |
"result_text = requests.post(gnrd_api_url, \n", | |
" data={ \"text\": all_text, \"with_verification\": 'true'})\n", | |
"end = datetime.datetime.utcnow()\n", | |
"print(f\"Finished at {end} total time {(end-start).seconds / 60.}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 70, | |
"id": "441de6e8-0ccb-4d05-85c9-eca0c006fcce", | |
"metadata": { | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"{'token_url': 'http://gnrd.globalnames.org/name_finder.json?token=5bfmm7fkcg',\n", | |
" 'input_url': None,\n", | |
" 'file': None,\n", | |
" 'status': 200,\n", | |
" 'engine': 'gnfinder',\n", | |
" 'unique': False,\n", | |
" 'verbatim': True,\n", | |
" 'parameters': {'return_content': False,\n", | |
" 'with_verification': True,\n", | |
" 'preferred_data_sources': [],\n", | |
" 'detect_language': False,\n", | |
" 'engine': 0,\n", | |
" 'no_bayes': False},\n", | |
" 'names': [{'verbatim': 'Cyanoplax',\n", | |
" 'scientificName': 'Cyanoplax',\n", | |
" 'offsetStart': 404,\n", | |
" 'offsetEnd': 413},\n", | |
" {'verbatim': 'Cyanoplax',\n", | |
" 'scientificName': 'Cyanoplax',\n", | |
" 'offsetStart': 547,\n", | |
" 'offsetEnd': 556},\n", | |
" {'verbatim': 'Cyanoplax',\n", | |
" 'scientificName': 'Cyanoplax',\n", | |
" 'offsetStart': 596,\n", | |
" 'offsetEnd': 605},\n", | |
" {'verbatim': 'Pelvetia fastiqiata',\n", | |
" 'scientificName': 'Pelvetia fastiqiata',\n", | |
" 'offsetStart': 798,\n", | |
" 'offsetEnd': 817},\n", | |
" {'verbatim': 'Cyanoplax',\n", | |
" 'scientificName': 'Cyanoplax',\n", | |
" 'offsetStart': 1399,\n", | |
" 'offsetEnd': 1408},\n", | |
" {'verbatim': 'Cyanoplax',\n", | |
" 'scientificName': 'Cyanoplax',\n", | |
" 'offsetStart': 1461,\n", | |
" 'offsetEnd': 1470},\n", | |
" {'verbatim': 'Cyanonlax hartwegii',\n", | |
" 'scientificName': 'Cyanonlax hartwegii',\n", | |
" 'offsetStart': 1829,\n", | |
" 'offsetEnd': 1848},\n", | |
" {'verbatim': 'Cyanoplax hartwegii',\n", | |
" 'scientificName': 'Cyanoplax hartwegii',\n", | |
" 'offsetStart': 2251,\n", | |
" 'offsetEnd': 2270},\n", | |
" {'verbatim': 'Cyanoplax hartwegii',\n", | |
" 'scientificName': 'Cyanoplax hartwegii',\n", | |
" 'offsetStart': 2806,\n", | |
" 'offsetEnd': 2825},\n", | |
" {'verbatim': 'Cyanoplax',\n", | |
" 'scientificName': 'Cyanoplax',\n", | |
" 'offsetStart': 3492,\n", | |
" 'offsetEnd': 3501},\n", | |
" {'verbatim': 'Cyanoplax.',\n", | |
" 'scientificName': 'Cyanoplax',\n", | |
" 'offsetStart': 3682,\n", | |
" 'offsetEnd': 3692},\n", | |
" {'verbatim': 'Cyanoplax hartwegii',\n", | |
" 'scientificName': 'Cyanoplax hartwegii',\n", | |
" 'offsetStart': 3759,\n", | |
" 'offsetEnd': 3778},\n", | |
" {'verbatim': 'Cyanoplax hartwegii',\n", | |
" 'scientificName': 'Cyanoplax hartwegii',\n", | |
" 'offsetStart': 3797,\n", | |
" 'offsetEnd': 3816},\n", | |
" {'verbatim': 'Cyanoplax hartwegii',\n", | |
" 'scientificName': 'Cyanoplax hartwegii',\n", | |
" 'offsetStart': 5765,\n", | |
" 'offsetEnd': 5784},\n", | |
" {'verbatim': 'Cyanoplax',\n", | |
" 'scientificName': 'Cyanoplax',\n", | |
" 'offsetStart': 6063,\n", | |
" 'offsetEnd': 6072},\n", | |
" {'verbatim': 'Cyanoplax hartwegii.',\n", | |
" 'scientificName': 'Cyanoplax hartwegii',\n", | |
" 'offsetStart': 6128,\n", | |
" 'offsetEnd': 6148},\n", | |
" {'verbatim': 'Polyplacophora',\n", | |
" 'scientificName': 'Polyplacophora',\n", | |
" 'offsetStart': 6330,\n", | |
" 'offsetEnd': 6344},\n", | |
" {'verbatim': 'Cyanoplax hartwegii',\n", | |
" 'scientificName': 'Cyanoplax hartwegii',\n", | |
" 'offsetStart': 6432,\n", | |
" 'offsetEnd': 6451},\n", | |
" {'verbatim': 'Cyanoplax',\n", | |
" 'scientificName': 'Cyanoplax',\n", | |
" 'offsetStart': 6715,\n", | |
" 'offsetEnd': 6724},\n", | |
" {'verbatim': 'Cyanoplax',\n", | |
" 'scientificName': 'Cyanoplax',\n", | |
" 'offsetStart': 7103,\n", | |
" 'offsetEnd': 7112},\n", | |
" {'verbatim': 'Cyanoplax hartwegii',\n", | |
" 'scientificName': 'Cyanoplax hartwegii',\n", | |
" 'offsetStart': 7298,\n", | |
" 'offsetEnd': 7317},\n", | |
" {'verbatim': 'Cyanoplax hartwegii',\n", | |
" 'scientificName': 'Cyanoplax hartwegii',\n", | |
" 'offsetStart': 7429,\n", | |
" 'offsetEnd': 7448},\n", | |
" {'verbatim': 'Cyanoplax',\n", | |
" 'scientificName': 'Cyanoplax',\n", | |
" 'offsetStart': 7530,\n", | |
" 'offsetEnd': 7539},\n", | |
" {'verbatim': 'Cyanoplax hartwegii',\n", | |
" 'scientificName': 'Cyanoplax hartwegii',\n", | |
" 'offsetStart': 7581,\n", | |
" 'offsetEnd': 7600},\n", | |
" {'verbatim': 'Cyanonlax hartwegii',\n", | |
" 'scientificName': 'Cyanonlax hartwegii',\n", | |
" 'offsetStart': 7626,\n", | |
" 'offsetEnd': 7645},\n", | |
" {'verbatim': 'Sypharochiton',\n", | |
" 'scientificName': 'Sypharochiton',\n", | |
" 'offsetStart': 7910,\n", | |
" 'offsetEnd': 7923},\n", | |
" {'verbatim': 'Nuttallina',\n", | |
" 'scientificName': 'Nuttallina',\n", | |
" 'offsetStart': 7961,\n", | |
" 'offsetEnd': 7971},\n", | |
" {'verbatim': 'Cyanoplax',\n", | |
" 'scientificName': 'Cyanoplax',\n", | |
" 'offsetStart': 7992,\n", | |
" 'offsetEnd': 8001},\n", | |
" {'verbatim': '(Mollusca:',\n", | |
" 'scientificName': 'Mollusca',\n", | |
" 'offsetStart': 8060,\n", | |
" 'offsetEnd': 8070},\n", | |
" {'verbatim': 'Polyplacophora).',\n", | |
" 'scientificName': 'Polyplacophora',\n", | |
" 'offsetStart': 8071,\n", | |
" 'offsetEnd': 8087},\n", | |
" {'verbatim': '(Mollusca:',\n", | |
" 'scientificName': 'Mollusca',\n", | |
" 'offsetStart': 8116,\n", | |
" 'offsetEnd': 8126},\n", | |
" {'verbatim': 'Polyplacophora).',\n", | |
" 'scientificName': 'Polyplacophora',\n", | |
" 'offsetStart': 8127,\n", | |
" 'offsetEnd': 8143},\n", | |
" {'verbatim': 'Cyanoplax',\n", | |
" 'scientificName': 'Cyanoplax',\n", | |
" 'offsetStart': 8329,\n", | |
" 'offsetEnd': 8338},\n", | |
" {'verbatim': 'Sypharochiton pelliserpentis',\n", | |
" 'scientificName': 'Sypharochiton pelliserpentis',\n", | |
" 'offsetStart': 8757,\n", | |
" 'offsetEnd': 8785},\n", | |
" {'verbatim': 'Polyplacophora).',\n", | |
" 'scientificName': 'Polyplacophora',\n", | |
" 'offsetStart': 8797,\n", | |
" 'offsetEnd': 8813},\n", | |
" {'verbatim': 'Cyanoplax hartwegii',\n", | |
" 'scientificName': 'Cyanoplax hartwegii',\n", | |
" 'offsetStart': 9053,\n", | |
" 'offsetEnd': 9072},\n", | |
" {'verbatim': 'Acmaea',\n", | |
" 'scientificName': 'Acmaea',\n", | |
" 'offsetStart': 9326,\n", | |
" 'offsetEnd': 9332},\n", | |
" {'verbatim': 'Aplysia',\n", | |
" 'scientificName': 'Aplysia',\n", | |
" 'offsetStart': 9417,\n", | |
" 'offsetEnd': 9424},\n", | |
" {'verbatim': 'Henrietta fasciata',\n", | |
" 'scientificName': 'Henrietta fasciata',\n", | |
" 'offsetStart': 9495,\n", | |
" 'offsetEnd': 9513},\n", | |
" {'verbatim': 'Mollusca',\n", | |
" 'scientificName': 'Mollusca',\n", | |
" 'offsetStart': 9656,\n", | |
" 'offsetEnd': 9664},\n", | |
" {'verbatim': 'Cyanoplax',\n", | |
" 'scientificName': 'Cyanoplax',\n", | |
" 'offsetStart': 9858,\n", | |
" 'offsetEnd': 9867},\n", | |
" {'verbatim': 'Cyanoplax',\n", | |
" 'scientificName': 'Cyanoplax',\n", | |
" 'offsetStart': 10487,\n", | |
" 'offsetEnd': 10496},\n", | |
" {'verbatim': 'Cyanoplax hartwegii',\n", | |
" 'scientificName': 'Cyanoplax hartwegii',\n", | |
" 'offsetStart': 10525,\n", | |
" 'offsetEnd': 10544},\n", | |
" {'verbatim': 'Mollusca,',\n", | |
" 'scientificName': 'Mollusca',\n", | |
" 'offsetStart': 10843,\n", | |
" 'offsetEnd': 10852},\n", | |
" {'verbatim': 'Cyanoplax',\n", | |
" 'scientificName': 'Cyanoplax',\n", | |
" 'offsetStart': 10925,\n", | |
" 'offsetEnd': 10934},\n", | |
" {'verbatim': 'Cyanonlax hartwegii',\n", | |
" 'scientificName': 'Cyanonlax hartwegii',\n", | |
" 'offsetStart': 11346,\n", | |
" 'offsetEnd': 11365}],\n", | |
" 'language_used': 'eng',\n", | |
" 'execution_time': {'text_preparation_duration': 0.06183362007141113,\n", | |
" 'find_names_duration': 0.23911786079406738,\n", | |
" 'total_duration': 0.30721473693847656},\n", | |
" 'verified_names': [{'supplied_name_string': 'Cyanoplax',\n", | |
" 'is_known_name': True,\n", | |
" 'results': {'match_value': 'EXACT',\n", | |
" 'name_string': 'Cyanoplax',\n", | |
" 'current_name_string': 'Cyanoplax',\n", | |
" 'data_source_id': 1,\n", | |
" 'data_source_title': 'Catalogue of Life',\n", | |
" 'classification_path': 'Animalia|Mollusca|Polyplacophora|Chitonida|Mopalioidea|Lepidochitonidae|Cyanoplax',\n", | |
" 'edit_distance': 0},\n", | |
" 'preferred_results': []},\n", | |
" {'supplied_name_string': 'Pelvetia fastiqiata',\n", | |
" 'is_known_name': False,\n", | |
" 'results': {'match_value': 'FUZZY',\n", | |
" 'name_string': 'Pelvetia fastigiata (J. Agardh) De Toni, 1895',\n", | |
" 'current_name_string': 'Pelvetia fastigiata (J. Agardh) De Toni, 1895',\n", | |
" 'data_source_id': 8,\n", | |
" 'data_source_title': 'The Interim Register of Marine and Nonmarine Genera',\n", | |
" 'classification_path': 'Protista|Heterokontophyta|Phaeophyceae|Fucales|Fucaceae|Pelvetia|Pelvetia fastigiata',\n", | |
" 'edit_distance': 1},\n", | |
" 'preferred_results': []},\n", | |
" {'supplied_name_string': 'Cyanonlax hartwegii',\n", | |
" 'is_known_name': False,\n", | |
" 'results': {'match_value': 'FUZZY',\n", | |
" 'name_string': 'Cyanoplax hartwegii (Carpenter, 1855)',\n", | |
" 'current_name_string': 'Cyanoplax hartwegii (Carpenter, 1855)',\n", | |
" 'data_source_id': 1,\n", | |
" 'data_source_title': 'Catalogue of Life',\n", | |
" 'classification_path': 'Animalia|Mollusca|Polyplacophora|Chitonida|Mopalioidea|Lepidochitonidae|Cyanoplax|Cyanoplax hartwegii',\n", | |
" 'edit_distance': 1},\n", | |
" 'preferred_results': []},\n", | |
" {'supplied_name_string': 'Cyanoplax hartwegii',\n", | |
" 'is_known_name': True,\n", | |
" 'results': {'match_value': 'EXACT',\n", | |
" 'name_string': 'Cyanoplax hartwegii (Carpenter, 1855)',\n", | |
" 'current_name_string': 'Cyanoplax hartwegii (Carpenter, 1855)',\n", | |
" 'data_source_id': 1,\n", | |
" 'data_source_title': 'Catalogue of Life',\n", | |
" 'classification_path': 'Animalia|Mollusca|Polyplacophora|Chitonida|Mopalioidea|Lepidochitonidae|Cyanoplax|Cyanoplax hartwegii',\n", | |
" 'edit_distance': 0},\n", | |
" 'preferred_results': []},\n", | |
" {'supplied_name_string': 'Polyplacophora',\n", | |
" 'is_known_name': True,\n", | |
" 'results': {'match_value': 'EXACT',\n", | |
" 'name_string': 'Polyplacophora',\n", | |
" 'current_name_string': 'Polyplacophora',\n", | |
" 'data_source_id': 1,\n", | |
" 'data_source_title': 'Catalogue of Life',\n", | |
" 'classification_path': 'Animalia|Mollusca|Polyplacophora',\n", | |
" 'edit_distance': 0},\n", | |
" 'preferred_results': []},\n", | |
" {'supplied_name_string': 'Sypharochiton',\n", | |
" 'is_known_name': True,\n", | |
" 'results': {'match_value': 'EXACT',\n", | |
" 'name_string': 'Sypharochiton',\n", | |
" 'current_name_string': 'Sypharochiton',\n", | |
" 'data_source_id': 1,\n", | |
" 'data_source_title': 'Catalogue of Life',\n", | |
" 'classification_path': 'Animalia|Mollusca|Polyplacophora|Chitonida|Chitonoidea|Chitonidae|Sypharochiton',\n", | |
" 'edit_distance': 0},\n", | |
" 'preferred_results': []},\n", | |
" {'supplied_name_string': 'Nuttallina',\n", | |
" 'is_known_name': True,\n", | |
" 'results': {'match_value': 'EXACT',\n", | |
" 'name_string': 'Nuttallina',\n", | |
" 'current_name_string': 'Nuttallina',\n", | |
" 'data_source_id': 1,\n", | |
" 'data_source_title': 'Catalogue of Life',\n", | |
" 'classification_path': 'Animalia|Mollusca|Polyplacophora|Chitonida|Mopalioidea|Lepidochitonidae|Nuttallina',\n", | |
" 'edit_distance': 0},\n", | |
" 'preferred_results': []},\n", | |
" {'supplied_name_string': 'Mollusca',\n", | |
" 'is_known_name': True,\n", | |
" 'results': {'match_value': 'EXACT',\n", | |
" 'name_string': 'Mollusca',\n", | |
" 'current_name_string': 'Mollusca',\n", | |
" 'data_source_id': 1,\n", | |
" 'data_source_title': 'Catalogue of Life',\n", | |
" 'classification_path': 'Animalia|Mollusca',\n", | |
" 'edit_distance': 0},\n", | |
" 'preferred_results': []},\n", | |
" {'supplied_name_string': 'Sypharochiton pelliserpentis',\n", | |
" 'is_known_name': True,\n", | |
" 'results': {'match_value': 'EXACT',\n", | |
" 'name_string': 'Sypharochiton pelliserpentis (Quoy & Gaimard, 1835)',\n", | |
" 'current_name_string': 'Sypharochiton pelliserpentis (Quoy & Gaimard, 1835)',\n", | |
" 'data_source_id': 1,\n", | |
" 'data_source_title': 'Catalogue of Life',\n", | |
" 'classification_path': 'Animalia|Mollusca|Polyplacophora|Chitonida|Chitonoidea|Chitonidae|Sypharochiton|Sypharochiton pelliserpentis',\n", | |
" 'edit_distance': 0},\n", | |
" 'preferred_results': []},\n", | |
" {'supplied_name_string': 'Acmaea',\n", | |
" 'is_known_name': True,\n", | |
" 'results': {'match_value': 'EXACT',\n", | |
" 'name_string': 'Acmaea',\n", | |
" 'current_name_string': 'Acmaea',\n", | |
" 'data_source_id': 1,\n", | |
" 'data_source_title': 'Catalogue of Life',\n", | |
" 'classification_path': 'Animalia|Mollusca|Gastropoda|Not assigned|Lottioidea|Acmaeidae|Acmaea',\n", | |
" 'edit_distance': 0},\n", | |
" 'preferred_results': []},\n", | |
" {'supplied_name_string': 'Aplysia',\n", | |
" 'is_known_name': True,\n", | |
" 'results': {'match_value': 'EXACT',\n", | |
" 'name_string': 'Aplysia',\n", | |
" 'current_name_string': 'Aplysia',\n", | |
" 'data_source_id': 1,\n", | |
" 'data_source_title': 'Catalogue of Life',\n", | |
" 'classification_path': 'Animalia|Mollusca|Gastropoda|Aplysiida|Aplysioidea|Aplysiidae|Aplysia',\n", | |
" 'edit_distance': 0},\n", | |
" 'preferred_results': []},\n", | |
" {'supplied_name_string': 'Henrietta fasciata',\n", | |
" 'is_known_name': False,\n", | |
" 'results': {'match_value': 'PARTIAL_EXACT',\n", | |
" 'name_string': 'Henrietta Macfad.',\n", | |
" 'current_name_string': 'Henrietta Macfad.',\n", | |
" 'data_source_id': 165,\n", | |
" 'data_source_title': 'Tropicos - Missouri Botanical Garden',\n", | |
" 'classification_path': '',\n", | |
" 'edit_distance': 0},\n", | |
" 'preferred_results': []}],\n", | |
" 'total': 46}" | |
] | |
}, | |
"execution_count": 70, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"result_text.json()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 63, | |
"id": "14dfc755-5569-4769-9bc1-5f25087682c8", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Started processing PDF reports with GNRD web services at 2021-06-28 21:49:14.404673\n", | |
"0.....50.....100.....150.....200.....250.....300.....350.....400.....450.....500.....550.....600.....650.....700.....750.....800.....850.....900.....950.....1000.....1050...E1086..1100.....1150.....1200.....1250.....1300..E1329...1350.....1400.....1450.....1500..E1526...1550.....1600..E1622Finished processing at 2021-06-28 22:02:53.102518, total time 13.633333333333333 for 41367\n" | |
] | |
} | |
], | |
"source": [ | |
"gnrd_names_papers = []\n", | |
"errors = []\n", | |
"start = datetime.datetime.utcnow()\n", | |
"print(f\"Started processing PDF reports with GNRD web services at {start}\")\n", | |
"for i,filepath in enumerate(papers_tei.iterdir()):\n", | |
" report_name = filepath.name.split(\".\")[:-1][0]\n", | |
" try:\n", | |
" report_xml = etree.XML(filepath.read_bytes())\n", | |
" except:\n", | |
" errors.append({\"report\": report_name, \"error\": \"XML Parse Error\"})\n", | |
" print(f\"E{i}\", end=\"\")\n", | |
" continue\n", | |
" report_body = report_xml.find(\"tei:text/tei:body\", namespaces=TEI)\n", | |
" raw_text = ''\n", | |
" for row in report_body.itertext():\n", | |
" raw_text += f\" {row}\"\n", | |
" gnrd_result = requests.post(gnrd_api_url,\n", | |
" data={ \"text\": raw_text })\n", | |
"\n", | |
" if gnrd_result.status_code < 400:\n", | |
" retrieved_on = datetime.datetime.utcnow().isoformat()\n", | |
" for name in gnrd_result.json().get('names', []):\n", | |
" gnrd_names_papers.append( {\"report\": report_name, \n", | |
" \"scientificName\": name['scientificName'],\n", | |
" \"retrieved\": retrieved_on})\n", | |
" else:\n", | |
" errors.append({ \"report\": report_name, \"http_status\": gnrd_result.status_code})\n", | |
" print(f\"E{i}\", end=\"\")\n", | |
" if not i%10 and i > 0:\n", | |
" print(\".\", end=\"\")\n", | |
" if not i%50:\n", | |
" print(f\"{i}\", end=\"\")\n", | |
"end = datetime.datetime.utcnow()\n", | |
"print(f\"Finished processing at {end}, total time {(end-start).seconds / 60.} for {len(gnrd_names_papers)}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 64, | |
"id": "24806ad3-a9e5-4b16-b118-2f08dbf29e01", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"41367" | |
] | |
}, | |
"execution_count": 64, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(gnrd_names_papers )" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 65, | |
"id": "c2674eed-592c-495e-8270-4ecb9ed7b41e", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"4" | |
] | |
}, | |
"execution_count": 65, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(errors)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 66, | |
"id": "fcc29adf-6865-4a26-a857-bc41465889c8", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[{'report': 'hms_zw387wm0746', 'http_status': 400},\n", | |
" {'report': 'usc_1976_Su_INDEX', 'http_status': 400},\n", | |
" {'report': 'usc_1979_Su_Ryall', 'http_status': 400},\n", | |
" {'report': '', 'error': 'XML Parse Error'}]" | |
] | |
}, | |
"execution_count": 66, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"errors" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 69, | |
"id": "530d93ca-c5de-4cce-956d-7c59159efee7", | |
"metadata": { | |
"collapsed": true, | |
"jupyter": { | |
"outputs_hidden": true | |
}, | |
"tags": [] | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[{'report': 'hms_zx897dq3818',\n", | |
" 'scientificName': 'Mitrella',\n", | |
" 'retrieved': '2021-06-28T21:58:41.768659'},\n", | |
" {'report': 'hms_zx897dq3818',\n", | |
" 'scientificName': 'Homalopoma',\n", | |
" 'retrieved': '2021-06-28T21:58:41.768659'},\n", | |
" {'report': 'hms_zx897dq3818',\n", | |
" 'scientificName': 'P. hirsutiusculus',\n", | |
" 'retrieved': '2021-06-28T21:58:41.768659'},\n", | |
" {'report': 'hms_zx897dq3818',\n", | |
" 'scientificName': 'T. funebralis',\n", | |
" 'retrieved': '2021-06-28T21:58:41.768659'},\n", | |
" {'report': 'hms_zx897dq3818',\n", | |
" 'scientificName': 'Littorina',\n", | |
" 'retrieved': '2021-06-28T21:58:41.768659'},\n", | |
" {'report': 'hms_zx897dq3818',\n", | |
" 'scientificName': 'Tegula funebralis',\n", | |
" 'retrieved': '2021-06-28T21:58:41.768659'},\n", | |
" {'report': 'hms_zx897dq3818',\n", | |
" 'scientificName': 'Pagurus samuelis',\n", | |
" 'retrieved': '2021-06-28T21:58:41.768659'},\n", | |
" {'report': 'hms_zx897dq3818',\n", | |
" 'scientificName': 'P. hirsutiusculus',\n", | |
" 'retrieved': '2021-06-28T21:58:41.768659'},\n", | |
" {'report': 'hms_zx897dq3818',\n", | |
" 'scientificName': 'Pagurus',\n", | |
" 'retrieved': '2021-06-28T21:58:41.768659'},\n", | |
" {'report': 'hms_zx897dq3818',\n", | |
" 'scientificName': 'Tegula funebralis',\n", | |
" 'retrieved': '2021-06-28T21:58:41.768659'}]" | |
] | |
}, | |
"execution_count": 69, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"gnrd_names_papers[30000:30010]" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"id": "93b82f1a-6f61-434d-b572-094f94484917", | |
"metadata": {}, | |
"source": [ | |
"## Process OSU PDF Papers\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"id": "b809e111-8d42-4445-b3f2-d30aa5deb89f", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Start processing OSU Papers at 2021-07-02 16:28:04.507753\n", | |
"0 1 2 3 4 5 6 7 8 9 Finish processing OSU Papers at 2021-07-02 16:28:42.247340, total time 0.6166666666666667 minutes\n" | |
] | |
} | |
], | |
"source": [ | |
"osu_results, osu_errors = [],[] \n", | |
"start = datetime.datetime.utcnow()\n", | |
"print(f\"Start processing OSU Papers at {start}\")\n", | |
"for i,paper in enumerate(papers_pdf.glob(\"osu*\")):\n", | |
" report_name = paper.name.split(\".\")[0]\n", | |
" result = requests.post(gnrd_api_url, \n", | |
" files={ \"file\": paper.read_bytes()}) \n", | |
" if result.status_code < 400:\n", | |
" retrieved_on = datetime.datetime.utcnow().isoformat()\n", | |
" for name in result.json().get('names', []):\n", | |
" osu_results.append({\"report\": report_name, \n", | |
" \"scientificName\": name['scientificName'],\n", | |
" \"retrieved\": retrieved_on})\n", | |
" print(f\"{i}\", end=\" \")\n", | |
" else:\n", | |
" osu_errors.append({ \"report\": report_name, \"http_status\": gnrd_result.status_code})\n", | |
" print(f\"E{i}\", end=\" \")\n", | |
"end = datetime.datetime.utcnow()\n", | |
"print(f\"Finish processing OSU Papers at {end}, total time {(end-start).seconds / 60.} minutes\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"id": "0a2e0bcb-7ab9-4809-963d-2d613e656783", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with open(\"/Volumes/GoogleDrive/Shared drives/SUL AI 2020-2021/Project - Species Occurrences/data/gnrd_report.csv\") as fo:\n", | |
" gnrd_names_papers = [row for row in csv.DictReader(fo)]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"id": "c28fb725-1d29-4382-9cd2-1e79e4147228", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"with open(\"/Volumes/GoogleDrive/Shared drives/SUL AI 2020-2021/Project - Species Occurrences/data/gnrd_report.csv\",\n", | |
" 'w+', newline='') as fo:\n", | |
" field_names = ['report', 'scientificName', 'retrieved']\n", | |
" csv_writer = csv.DictWriter(fo, field_names)\n", | |
"\n", | |
" csv_writer.writeheader()\n", | |
" for row in gnrd_names_papers:\n", | |
" csv_writer.writerow(row)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"id": "7acc9f3d-3f1a-4ac9-96f4-f39df0115878", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"41367" | |
] | |
}, | |
"execution_count": 11, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(gnrd_names_papers)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"id": "2989a9d7-da57-4c1b-a988-cfdc13cd24e7", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Found 237 osu_20200612144428207\n", | |
"Found 238 osu_20200612144428207\n", | |
"Found 239 osu_20200612144428207\n", | |
"Found 240 osu_20200612144428207\n", | |
"Found 241 osu_20200612144428207\n", | |
"Found 242 osu_20200612144428207\n", | |
"Found 243 osu_20200612144428207\n", | |
"Found 244 osu_20200612144428207\n", | |
"Found 237 osu_20200612144428207\n", | |
"Found 238 osu_20200612144428207\n", | |
"Found 239 osu_20200612144428207\n", | |
"Found 240 osu_20200612144428207\n", | |
"Found 241 osu_20200612144428207\n", | |
"Found 242 osu_20200612144428207\n", | |
"Found 243 osu_20200612144428207\n", | |
"Found 244 osu_20200612144428207\n" | |
] | |
} | |
], | |
"source": [ | |
"for row in gnrd_names_papers:\n", | |
" for i,osu_report in enumerate(osu_results):\n", | |
" if row['report'] == osu_report['report']:\n", | |
" print(f\"Found {i} {osu_report['report']}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"id": "45bff310-244c-43aa-85f1-017a463e638c", | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"for row in osu_results:\n", | |
" if row['report'].startswith('osu_20200612144428207'):\n", | |
" continue\n", | |
" gnrd_names_papers.append(row)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"id": "554fecbe-79a4-4066-a186-8db7a0aec0c9", | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[{'report': 'osu_20200612145031481',\n", | |
" 'scientificName': 'Geobacteraceae',\n", | |
" 'retrieved': '2021-07-02T16:28:05.956653'},\n", | |
" {'report': 'osu_20200612145031481',\n", | |
" 'scientificName': 'Geobacter sulfurreducens',\n", | |
" 'retrieved': '2021-07-02T16:28:05.956653'},\n", | |
" {'report': 'osu_20200612154034430',\n", | |
" 'scientificName': 'Panopea abrupta',\n", | |
" 'retrieved': '2021-07-02T16:28:09.410947'},\n", | |
" {'report': 'osu_20200612154034430',\n", | |
" 'scientificName': 'Panopea abrupta',\n", | |
" 'retrieved': '2021-07-02T16:28:09.410947'},\n", | |
" {'report': 'osu_20200612154034430',\n", | |
" 'scientificName': 'Sebastes',\n", | |
" 'retrieved': '2021-07-02T16:28:09.410947'},\n", | |
" {'report': 'osu_20200612154034430',\n", | |
" 'scientificName': 'Arctica islandica',\n", | |
" 'retrieved': '2021-07-02T16:28:09.410947'},\n", | |
" {'report': 'osu_20200612154034430',\n", | |
" 'scientificName': 'Panopea abrupta',\n", | |
" 'retrieved': '2021-07-02T16:28:09.410947'},\n", | |
" {'report': 'osu_20200612154034430',\n", | |
" 'scientificName': 'P. abrupta',\n", | |
" 'retrieved': '2021-07-02T16:28:09.410947'},\n", | |
" {'report': 'osu_20200612153848020',\n", | |
" 'scientificName': 'Lutjanus campechanus',\n", | |
" 'retrieved': '2021-07-02T16:28:12.416188'},\n", | |
" {'report': 'osu_20200612153848020',\n", | |
" 'scientificName': 'S. pinniger',\n", | |
" 'retrieved': '2021-07-02T16:28:12.416188'},\n", | |
" {'report': 'osu_20200612153848020',\n", | |
" 'scientificName': 'S. melanops',\n", | |
" 'retrieved': '2021-07-02T16:28:12.416188'},\n", | |
" {'report': 'osu_20200612153848020',\n", | |
" 'scientificName': 'S. pinniger',\n", | |
" 'retrieved': '2021-07-02T16:28:12.416188'},\n", | |
" {'report': 'osu_20200612153848020',\n", | |
" 'scientificName': 'S. maliger',\n", | |
" 'retrieved': '2021-07-02T16:28:12.416188'},\n", | |
" {'report': 'osu_20200612153848020',\n", | |
" 'scientificName': 'S. melanops',\n", | |
" 'retrieved': '2021-07-02T16:28:12.416188'},\n", | |
" {'report': 'osu_20200612153848020',\n", | |
" 'scientificName': 'S. pinniger',\n", | |
" 'retrieved': '2021-07-02T16:28:12.416188'},\n", | |
" {'report': 'osu_20200612153848020',\n", | |
" 'scientificName': 'S. maliger',\n", | |
" 'retrieved': '2021-07-02T16:28:12.416188'},\n", | |
" {'report': 'osu_20200612153848020',\n", | |
" 'scientificName': 'S. mystinus',\n", | |
" 'retrieved': '2021-07-02T16:28:12.416188'},\n", | |
" {'report': 'osu_20200612153848020',\n", | |
" 'scientificName': 'S. paucispinis',\n", | |
" 'retrieved': '2021-07-02T16:28:12.416188'},\n", | |
" {'report': 'osu_20200612153848020',\n", | |
" 'scientificName': 'S. auriculatus',\n", | |
" 'retrieved': '2021-07-02T16:28:12.416188'},\n", | |
" {'report': 'osu_20200612153848020',\n", | |
" 'scientificName': 'S. caurinus',\n", | |
" 'retrieved': '2021-07-02T16:28:12.416188'},\n", | |
" {'report': 'osu_20200612153848020',\n", | |
" 'scientificName': 'S. elongates',\n", | |
" 'retrieved': '2021-07-02T16:28:12.416188'},\n", | |
" {'report': 'osu_20200612153848020',\n", | |
" 'scientificName': 'S. entomelas',\n", | |
" 'retrieved': '2021-07-02T16:28:12.416188'},\n", | |
" {'report': 'osu_20200612153848020',\n", | |
" 'scientificName': 'Alena',\n", | |
" 'retrieved': '2021-07-02T16:28:12.416188'},\n", | |
" {'report': 'osu_20200612153848020',\n", | |
" 'scientificName': 'Alena',\n", | |
" 'retrieved': '2021-07-02T16:28:12.416188'},\n", | |
" {'report': 'osu_20200612153848020',\n", | |
" 'scientificName': 'Torn tunica externus',\n", | |
" 'retrieved': '2021-07-02T16:28:12.416188'},\n", | |
" {'report': 'osu_20200612153616194',\n", | |
" 'scientificName': 'Oncorhynchus tshawytsha',\n", | |
" 'retrieved': '2021-07-02T16:28:15.793740'},\n", | |
" {'report': 'osu_20200612153616194',\n", | |
" 'scientificName': 'Micas',\n", | |
" 'retrieved': '2021-07-02T16:28:15.793740'},\n", | |
" {'report': 'osu_20200612153616194',\n", | |
" 'scientificName': 'Micas',\n", | |
" 'retrieved': '2021-07-02T16:28:15.793740'},\n", | |
" {'report': 'osu_20200612153616194',\n", | |
" 'scientificName': 'Micas',\n", | |
" 'retrieved': '2021-07-02T16:28:15.793740'},\n", | |
" {'report': 'osu_20200612153616194',\n", | |
" 'scientificName': 'Acaca',\n", | |
" 'retrieved': '2021-07-02T16:28:15.793740'},\n", | |
" {'report': 'osu_20200612153509246',\n", | |
" 'scientificName': 'Gadus macrocepltalus',\n", | |
" 'retrieved': '2021-07-02T16:28:18.768755'},\n", | |
" {'report': 'osu_20200612153509246',\n", | |
" 'scientificName': 'Gadus morhua',\n", | |
" 'retrieved': '2021-07-02T16:28:18.768755'},\n", | |
" {'report': 'osu_20200612153509246',\n", | |
" 'scientificName': 'Theragra chalcogramma',\n", | |
" 'retrieved': '2021-07-02T16:28:18.768755'},\n", | |
" {'report': 'osu_20200612153509246',\n", | |
" 'scientificName': 'Gadus macrocephalus',\n", | |
" 'retrieved': '2021-07-02T16:28:18.768755'},\n", | |
" {'report': 'osu_20200612153509246',\n", | |
" 'scientificName': 'Artemia',\n", | |
" 'retrieved': '2021-07-02T16:28:18.768755'},\n", | |
" {'report': 'osu_20200612153509246',\n", | |
" 'scientificName': 'Theragra chalcogramma larvae',\n", | |
" 'retrieved': '2021-07-02T16:28:18.768755'},\n", | |
" {'report': 'osu_20200612153509246',\n", | |
" 'scientificName': 'Gadus morhua',\n", | |
" 'retrieved': '2021-07-02T16:28:18.768755'},\n", | |
" {'report': 'osu_20200612153509246',\n", | |
" 'scientificName': 'Pleuronectes platessa',\n", | |
" 'retrieved': '2021-07-02T16:28:18.768755'},\n", | |
" {'report': 'osu_20200612153509246',\n", | |
" 'scientificName': 'Scophthalmus maximus',\n", | |
" 'retrieved': '2021-07-02T16:28:18.768755'},\n", | |
" {'report': 'osu_20200612153509246',\n", | |
" 'scientificName': 'Melanogrammus aeglefinus',\n", | |
" 'retrieved': '2021-07-02T16:28:18.768755'},\n", | |
" {'report': 'osu_20200612153509246',\n", | |
" 'scientificName': 'Gadus morhua',\n", | |
" 'retrieved': '2021-07-02T16:28:18.768755'},\n", | |
" {'report': 'osu_20200612153509246',\n", | |
" 'scientificName': 'Theragra chalcogramma',\n", | |
" 'retrieved': '2021-07-02T16:28:18.768755'},\n", | |
" {'report': 'osu_20200612153509246',\n", | |
" 'scientificName': 'Theragra chalcogramma',\n", | |
" 'retrieved': '2021-07-02T16:28:18.768755'},\n", | |
" {'report': 'osu_20200612153509246',\n", | |
" 'scientificName': 'Phyllis',\n", | |
" 'retrieved': '2021-07-02T16:28:18.768755'},\n", | |
" {'report': 'osu_20200612153112738',\n", | |
" 'scientificName': 'Oncorhynchus mykiss',\n", | |
" 'retrieved': '2021-07-02T16:28:22.519968'},\n", | |
" {'report': 'osu_20200612153112738',\n", | |
" 'scientificName': 'Jessica',\n", | |
" 'retrieved': '2021-07-02T16:28:22.519968'},\n", | |
" {'report': 'osu_20200612153112738',\n", | |
" 'scientificName': 'Campana',\n", | |
" 'retrieved': '2021-07-02T16:28:22.519968'},\n", | |
" {'report': 'osu_20200612153112738',\n", | |
" 'scientificName': 'Campana',\n", | |
" 'retrieved': '2021-07-02T16:28:22.519968'},\n", | |
" {'report': 'osu_20200612153112738',\n", | |
" 'scientificName': 'Campana',\n", | |
" 'retrieved': '2021-07-02T16:28:22.519968'},\n", | |
" {'report': 'osu_20200612153112738',\n", | |
" 'scientificName': 'Campana',\n", | |
" 'retrieved': '2021-07-02T16:28:22.519968'},\n", | |
" {'report': 'osu_20200612153112738',\n", | |
" 'scientificName': 'Campana',\n", | |
" 'retrieved': '2021-07-02T16:28:22.519968'},\n", | |
" {'report': 'osu_20200612153112738',\n", | |
" 'scientificName': 'Campana',\n", | |
" 'retrieved': '2021-07-02T16:28:22.519968'},\n", | |
" {'report': 'osu_20200612153112738',\n", | |
" 'scientificName': 'Campana',\n", | |
" 'retrieved': '2021-07-02T16:28:22.519968'},\n", | |
" {'report': 'osu_20200612153112738',\n", | |
" 'scientificName': 'Campana',\n", | |
" 'retrieved': '2021-07-02T16:28:22.519968'},\n", | |
" {'report': 'osu_20200612153112738',\n", | |
" 'scientificName': 'Campana',\n", | |
" 'retrieved': '2021-07-02T16:28:22.519968'},\n", | |
" {'report': 'osu_20200612153112738',\n", | |
" 'scientificName': 'Oncorhynchus mykiss',\n", | |
" 'retrieved': '2021-07-02T16:28:22.519968'},\n", | |
" {'report': 'osu_20200612153112738',\n", | |
" 'scientificName': 'Campana',\n", | |
" 'retrieved': '2021-07-02T16:28:22.519968'},\n", | |
" {'report': 'osu_20200612153112738',\n", | |
" 'scientificName': 'Campana',\n", | |
" 'retrieved': '2021-07-02T16:28:22.519968'},\n", | |
" {'report': 'osu_20200612153112738',\n", | |
" 'scientificName': 'Campana',\n", | |
" 'retrieved': '2021-07-02T16:28:22.519968'},\n", | |
" {'report': 'osu_20200612153112738',\n", | |
" 'scientificName': 'Campana',\n", | |
" 'retrieved': '2021-07-02T16:28:22.519968'},\n", | |
" {'report': 'osu_20200612153112738',\n", | |
" 'scientificName': 'Campana',\n", | |
" 'retrieved': '2021-07-02T16:28:22.519968'},\n", | |
" {'report': 'osu_20200612153112738',\n", | |
" 'scientificName': 'Campana',\n", | |
" 'retrieved': '2021-07-02T16:28:22.519968'},\n", | |
" {'report': 'osu_20200612153112738',\n", | |
" 'scientificName': 'Brevoortia',\n", | |
" 'retrieved': '2021-07-02T16:28:22.519968'},\n", | |
" {'report': 'osu_20200612153112738',\n", | |
" 'scientificName': 'Morone sexatilis',\n", | |
" 'retrieved': '2021-07-02T16:28:22.519968'},\n", | |
" {'report': 'osu_20200612153112738',\n", | |
" 'scientificName': 'Salmo',\n", | |
" 'retrieved': '2021-07-02T16:28:22.519968'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Oncorhynchus mykiss mykiss',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Oncorhynchus mykiss mykiss',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Lecithaster gibbosus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'L. gibbosus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Lecithaster gibbosus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Plagioporus shawi',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'P. shawi',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Deropegus aspina',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Plagioporus shawi',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Anisakis simplex',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'P. shawi',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Rhadinorhynchus trachuri',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Lecithaster gibbosus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'P. shawi',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Hysterothylacium aduncum',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'L. gibbosus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'P. shawi',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'H. aduncum',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'L. gibbosus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'R. trachuri',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'L. gibbosus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'L. gibbosus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'R. trachuri',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'H. aduncum',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'L. gibbosus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'L. gibbosus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'R. trachuri',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'R. trachuri',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'R. trachuri',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'L. gibbosus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'R. trachuri',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'L. gibbosus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'L. gibbosus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'L. gibbosus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'L. gibbosus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Centropages abdominalis',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Pseudocalanus minutus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'C. abdominalis',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'P. minutus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'L. gibbosus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Anisakis',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Deropegus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Hemiurus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Plagioporus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Hysterothylacium',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Rhadinorhynchus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Apophallus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'L. gibbosus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'P. shawi',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'P. shawi',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'P. shawi',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Reinhardtius hippoglossoides',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Oncorhynchus kisutch',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Anisakis',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Nematoda',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Mallotus villosus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Clupea harengus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Delphinapterus leucas',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Lecithaster gibbosus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Digenea',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Hemiuroidea',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Deropegus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'D. aspina',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Oncorhynchus c�arki clarki',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Plagioporus shawi',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Trematoda',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Opecoelidae',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Lecithaster gibbosus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Plagioporus shawi',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Crepidostomum farionis',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Deropegus aspina',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Parahemiurus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Rhadinorhynchus trachuri',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Hysterothylacium aduncum',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Anisakis simplex unknown',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Lecithaster',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Lecithaster',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Rhadinorhynchus trachuri',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Cestoda',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Nematoda',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Anisakis simplex',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Hysterothylacium aduncum unknown',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Trematoda',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Crepidostomum farionis',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Deropegus aspina',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Lecithaster gibbosus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Parahemiurus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Plagioporus shawi',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Oncorhynchus mykiss',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Rhadinorhynchus trachuri',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Cestoda',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Nematoda',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Anisakis simplex',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Hysterothylacium aduncum',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Trematoda',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Crepidostomum farionis',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Deropegus aspina',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Lecithaster gibbosus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Parahemiurus',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612153337143',\n", | |
" 'scientificName': 'Plagioporus shawi',\n", | |
" 'retrieved': '2021-07-02T16:28:28.960387'},\n", | |
" {'report': 'osu_20200612152633039',\n", | |
" 'scientificName': 'Neotrypaea',\n", | |
" 'retrieved': '2021-07-02T16:28:32.403263'},\n", | |
" {'report': 'osu_20200612152633039',\n", | |
" 'scientificName': 'Callianassa',\n", | |
" 'retrieved': '2021-07-02T16:28:32.403263'},\n", | |
" {'report': 'osu_20200612152633039',\n", | |
" 'scientificName': 'N. californiensis larvae',\n", | |
" 'retrieved': '2021-07-02T16:28:32.403263'},\n", | |
" {'report': 'osu_20200612152633039',\n", | |
" 'scientificName': 'N. californiensis',\n", | |
" 'retrieved': '2021-07-02T16:28:32.403263'},\n", | |
" {'report': 'osu_20200612152633039',\n", | |
" 'scientificName': 'Neotrypaea californiensis',\n", | |
" 'retrieved': '2021-07-02T16:28:32.403263'},\n", | |
" {'report': 'osu_20200612152633039',\n", | |
" 'scientificName': 'Neot�ypaea californiensis',\n", | |
" 'retrieved': '2021-07-02T16:28:32.403263'},\n", | |
" {'report': 'osu_20200612152633039',\n", | |
" 'scientificName': 'N. californiensis',\n", | |
" 'retrieved': '2021-07-02T16:28:32.403263'},\n", | |
" {'report': 'osu_20200612152633039',\n", | |
" 'scientificName': 'N. californiensis',\n", | |
" 'retrieved': '2021-07-02T16:28:32.403263'},\n", | |
" {'report': 'osu_20200612152633039',\n", | |
" 'scientificName': 'N. californiensis larvae',\n", | |
" 'retrieved': '2021-07-02T16:28:32.403263'},\n", | |
" {'report': 'osu_20200612152633039',\n", | |
" 'scientificName': 'N. californiensis larvae',\n", | |
" 'retrieved': '2021-07-02T16:28:32.403263'},\n", | |
" {'report': 'osu_20200612152633039',\n", | |
" 'scientificName': 'N. californiensis',\n", | |
" 'retrieved': '2021-07-02T16:28:32.403263'},\n", | |
" {'report': 'osu_20200612152633039',\n", | |
" 'scientificName': 'N. californiensis larvae',\n", | |
" 'retrieved': '2021-07-02T16:28:32.403263'},\n", | |
" {'report': 'osu_20200612152633039',\n", | |
" 'scientificName': 'N. californiensis',\n", | |
" 'retrieved': '2021-07-02T16:28:32.403263'},\n", | |
" {'report': 'osu_20200612152633039',\n", | |
" 'scientificName': 'Neotrypaea californiensus larvae',\n", | |
" 'retrieved': '2021-07-02T16:28:32.403263'},\n", | |
" {'report': 'osu_20200612152633039',\n", | |
" 'scientificName': 'Callianassa',\n", | |
" 'retrieved': '2021-07-02T16:28:32.403263'},\n", | |
" {'report': 'osu_20200612152633039',\n", | |
" 'scientificName': 'Neotrypea californiensis',\n", | |
" 'retrieved': '2021-07-02T16:28:32.403263'},\n", | |
" {'report': 'osu_20200612152633039',\n", | |
" 'scientificName': 'Callianassa cal�fomiensis',\n", | |
" 'retrieved': '2021-07-02T16:28:32.403263'},\n", | |
" {'report': 'osu_20200612152633039',\n", | |
" 'scientificName': 'Crustacea',\n", | |
" 'retrieved': '2021-07-02T16:28:32.403263'},\n", | |
" {'report': 'osu_20200612152633039',\n", | |
" 'scientificName': 'Decapoda',\n", | |
" 'retrieved': '2021-07-02T16:28:32.403263'},\n", | |
" {'report': 'osu_20200612152633039',\n", | |
" 'scientificName': 'Callianassa califomiensis',\n", | |
" 'retrieved': '2021-07-02T16:28:32.403263'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Crustacea',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Orthione',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia pugettensis',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Orthione grijfenis',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia pugettensis',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Orthione grijfenis',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Orthione',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia littoralis',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia pugettensis',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Orthione',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia sex',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Orthione',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Orthione',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Orthione',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Orthione',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Gyge branchialis',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Orthione',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Orthione',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Neotrypaea californiensis',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia pugettensis',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Probopyrus pandalicola',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Palaemonetes paludosus',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Crustacea',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Isopoda',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Crustacea',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Decapoda',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Orthione',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Cornalia',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Crustacea',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Gyge branchialis',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia littoralis',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Orthione',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Orthione',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612152822066',\n", | |
" 'scientificName': 'Upogebia',\n", | |
" 'retrieved': '2021-07-02T16:28:34.997472'},\n", | |
" {'report': 'osu_20200612144428207',\n", | |
" 'scientificName': 'Sebastes diploproa',\n", | |
" 'retrieved': '2021-07-02T16:28:42.246943'},\n", | |
" {'report': 'osu_20200612144428207',\n", | |
" 'scientificName': 'Echeverria',\n", | |
" 'retrieved': '2021-07-02T16:28:42.246943'},\n", | |
" {'report': 'osu_20200612144428207',\n", | |
" 'scientificName': 'Sebastes pinniger',\n", | |
" 'retrieved': '2021-07-02T16:28:42.246943'},\n", | |
" {'report': 'osu_20200612144428207',\n", | |
" 'scientificName': 'S. diploproa',\n", | |
" 'retrieved': '2021-07-02T16:28:42.246943'},\n", | |
" {'report': 'osu_20200612144428207',\n", | |
" 'scientificName': 'Scorpaenidae',\n", | |
" 'retrieved': '2021-07-02T16:28:42.246943'},\n", | |
" {'report': 'osu_20200612144428207',\n", | |
" 'scientificName': 'Sebastes',\n", | |
" 'retrieved': '2021-07-02T16:28:42.246943'},\n", | |
" {'report': 'osu_20200612144428207',\n", | |
" 'scientificName': 'Echeverria',\n", | |
" 'retrieved': '2021-07-02T16:28:42.246943'},\n", | |
" {'report': 'osu_20200612144428207',\n", | |
" 'scientificName': 'Sebastes flavidus',\n", | |
" 'retrieved': '2021-07-02T16:28:42.246943'}]" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"with open(\"osu_results.pk\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"id": "6ee63e25-535e-4c74-84fe-7821c9651ff7", | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment