from documentcloud import DocumentCloud
import urllib, cStringIO, csv


## Create the DocumentCloud.org client
client = DocumentCloud("USERNAME", "PASSWORD")

## Set additional data to store with document by mapping csv field keys to new values that will be they keys on Document Cloud
## you could abstract this by providing these key-value pairs in a separate csv, then supplying the data csv and field mapping csv as args in the command line
field_mapping = {
    "wpid":             "wpid",
    "name":             "org_name",
    "city":             "org_city",
    "state":            "org_state",
    "year":             "org_year",
    "revenue":          "org_revenue",
    "org_type":         "org_type",
    "desc":             "org_description",
    "docurl":           "source_url",
    "ein":              "org_ein"
}

def upload_doc(data_dict):
    """
    Map fields from csv to Document Cloud fields and upload
    """
    
    ## create dict
    clean_data_kwargs = {}

    ## map the old values as the new keys and the old keys as the new values
    for key, value in data_dict.items():
        new_key = field_mapping[key]
        clean_data_kwargs[new_key] = value

    ## Download the URL with urllib
    url = clean_data_kwargs["source_url"]
    file_contents = urllib.urlopen(url).read()
    ## Stuff it in a file object with cStringIO
    file_obj = cStringIO.StringIO(file_contents)

    ## Set kwargs for documentcloud.org
    kwargs = {
        "title":            clean_data_kwargs["org_name"] + " - " + clean_data_kwargs["org_year"], # update as needed
        "source":           "SOURCE",
        "description":      "DESC",
        "access":           "ACCESS",
        "project":          "PROJ",
        "data":             clean_data_kwargs, # optional
        "secure":           False # or True if you don't want to send docs to OpenCalais
    }

    ## Upload that to DocumentCloud
    obj = client.documents.upload(file_obj, **kwargs)
    print "Uploaded: %s" % (kwargs["title"])
    print "\n"

## set the file name of the csv with all your urls and doc metadata
filename = "FILENAME.csv"

## open the csv
with open(filename, 'rb') as handle:
    ## read the csv
    reader = csv.DictReader(handle)
    
    ## loop thru the rows
    for row in reader:
        ## pass each row to the function
        upload_doc(row)