mikesname · November 7, 2017 13:33
diff --git a/history.py b/history.py
 #!/usr/bin/env python3

 # Fetch id, name, and scope-content data for documentary units
 # and write as tab-separated values.

 import sys, requests, csv, json

 if len(sys.argv) < 1:
    sys.stderr.write("usage: history.py <initial-api-url>\n")
    sys.exit(1)

 # Fetch 50 items per request
 FETCH_NUM = 50
 URL = sys.argv[1]

 # The actual GraphQL query. The number of items per request
 # is a mandatory parameter (which we could also hard-code).
 # The cursor is an optional parameter which defaults to null.
 QUERY = """
 query getRepositoryHistory($num: Int!, $cursor: Cursor) {
    page: repositories(first: $num, from: $cursor) {
        items {
            id
            description {
                name
                history
            }
        }
        pageInfo {
            nextPage
        }
    }
 }
 """

 csvwriter = csv.writer(sys.stdout, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
 csvwriter.writerow(["id", "name", "history"]) # header

 def get_history(page_num = 1, cursor=None):

    sys.stderr.write("Fetching page: %d\n" % page_num)

    args = dict(
        query = QUERY,
        variables = dict(
            num = FETCH_NUM,
            cursor = cursor
        )
    )

    r = requests.post(URL, 
            data = json.dumps(args), 
            headers = {"Content-type": "application/json"})
    data = r.json()

    page = data["data"]["page"]

    for item in page["items"]:
        # fetch the ID and first description...
        id = item["id"]
        description = item["description"]
        if description is not None:
            name = item["description"]["name"]
            history = item["description"]["history"]
        
            if not history is None:
                csvwriter.writerow([id, name, history])

    next = page["pageInfo"]["nextPage"]
    if next is not None:
        get_history(page_num + 1, next)

 get_history()
	#!/usr/bin/env python3

	# Fetch id, name, and scope-content data for documentary units
	# and write as tab-separated values.

	import sys, requests, csv, json

	if len(sys.argv) < 1:
	sys.stderr.write("usage: history.py <initial-api-url>\n")
	sys.exit(1)

	# Fetch 50 items per request
	FETCH_NUM = 50
	URL = sys.argv[1]

	# The actual GraphQL query. The number of items per request
	# is a mandatory parameter (which we could also hard-code).
	# The cursor is an optional parameter which defaults to null.
	QUERY = """
	query getRepositoryHistory($num: Int!, $cursor: Cursor) {
	page: repositories(first: $num, from: $cursor) {
	items {
	id
	description {
	name
	history
	}
	}
	pageInfo {
	nextPage
	}
	}
	}
	"""

	csvwriter = csv.writer(sys.stdout, delimiter="\t", quoting=csv.QUOTE_MINIMAL)
	csvwriter.writerow(["id", "name", "history"]) # header

	def get_history(page_num = 1, cursor=None):

	sys.stderr.write("Fetching page: %d\n" % page_num)

	args = dict(
	query = QUERY,
	variables = dict(
	num = FETCH_NUM,
	cursor = cursor
	)
	)

	r = requests.post(URL,
	data = json.dumps(args),
	headers = {"Content-type": "application/json"})
	data = r.json()

	page = data["data"]["page"]

	for item in page["items"]:
	# fetch the ID and first description...
	id = item["id"]
	description = item["description"]
	if description is not None:
	name = item["description"]["name"]
	history = item["description"]["history"]

	if not history is None:
	csvwriter.writerow([id, name, history])

	next = page["pageInfo"]["nextPage"]
	if next is not None:
	get_history(page_num + 1, next)

	get_history()