Last active
November 7, 2017 13:33
-
-
Save mikesname/3f03e09c8c2865cd3b80eb51351c5f85 to your computer and use it in GitHub Desktop.
An example for fetching data from the EHRI GraphQL API and converting it to TSV
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Fetch id, name, and scope-content data for documentary units | |
# and write as tab-separated values. | |
import sys, requests, csv, json | |
if len(sys.argv) < 1: | |
sys.stderr.write("usage: history.py <initial-api-url>\n") | |
sys.exit(1) | |
# Fetch 50 items per request | |
FETCH_NUM = 50 | |
URL = sys.argv[1] | |
# The actual GraphQL query. The number of items per request | |
# is a mandatory parameter (which we could also hard-code). | |
# The cursor is an optional parameter which defaults to null. | |
QUERY = """ | |
query getRepositoryHistory($num: Int!, $cursor: Cursor) { | |
page: repositories(first: $num, from: $cursor) { | |
items { | |
id | |
description { | |
name | |
history | |
} | |
} | |
pageInfo { | |
nextPage | |
} | |
} | |
} | |
""" | |
csvwriter = csv.writer(sys.stdout, delimiter="\t", quoting=csv.QUOTE_MINIMAL) | |
csvwriter.writerow(["id", "name", "history"]) # header | |
def get_history(page_num = 1, cursor=None): | |
sys.stderr.write("Fetching page: %d\n" % page_num) | |
args = dict( | |
query = QUERY, | |
variables = dict( | |
num = FETCH_NUM, | |
cursor = cursor | |
) | |
) | |
r = requests.post(URL, | |
data = json.dumps(args), | |
headers = {"Content-type": "application/json"}) | |
data = r.json() | |
page = data["data"]["page"] | |
for item in page["items"]: | |
# fetch the ID and first description... | |
id = item["id"] | |
description = item["description"] | |
if description is not None: | |
name = item["description"]["name"] | |
history = item["description"]["history"] | |
if not history is None: | |
csvwriter.writerow([id, name, history]) | |
next = page["pageInfo"]["nextPage"] | |
if next is not None: | |
get_history(page_num + 1, next) | |
get_history() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment