Last active
December 21, 2015 14:38
-
-
Save greglinch/6320618 to your computer and use it in GitHub Desktop.
Script using python-documentcloud API wrapper to get the first annotation URL for each document in a specified project.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from documentcloud import DocumentCloud | |
import csv | |
""" | |
Return a list of annotation URLs for each document in the specified project. | |
TK: output to a CSV file. | |
""" | |
# define your variables | |
username = "USERNAME_HERE" | |
password = "PASSWORD_HERE" | |
project_title = "PROJECT_TITLE_HERE" | |
## Create the DocumentCloud.org client | |
client = DocumentCloud(username, password) | |
# assign the project to a project object | |
proj_obj = client.projects.get_by_title(project_title) | |
# assign the document ids to a docs object | |
docs = proj_obj.document_ids | |
## constructs the url | |
def create_annotation_url(doc_id, annotation_page, annotation_id): | |
# or use document_obj.canonical_url in the doc loop | |
return "https://documentcloud.org/documents/" + str(doc_id) + ".html#document/p" + str(annotation_page) + "/a" + str(annotation_id) | |
## sample url | |
# https://www.documentcloud.org/documents/774544-maine-island-trail-association-2011.html#document/p20/a116380 | |
## loops thru the docs list of ids | |
for doc in docs: | |
# make each doc id a string | |
doc = str(doc) | |
# assign each doc to a doc_obj object | |
doc_obj = client.documents.get(doc) | |
# assign the annotations for a doc obj to an annotations obj | |
annotation_obj = doc_obj.annotations | |
if annotation_obj: | |
# assign the annotation page to a variable | |
annotation_page = annotation_obj[0].page | |
# assign the annotation id to a variable | |
annotation_id = annotation_obj[0].id | |
# pass the id as a string to the create_annotation function | |
create_annotation_url(doc, annotation_page, annotation_id) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment