-
-
Save greglinch/1ae5901fbe00c38bfc400bb689472e1e to your computer and use it in GitHub Desktop.
Python script (based on @nickjevershed's original) to convert Google spreadsheets to simple JSON file and save it locally and/or to S3. Assumes your data is on the left-most sheet (i.e. the default) and that you've already clicked the "Publish to the web" option in the "File" menu. S3 requires environment variables.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import json | |
import argparse | |
import requests | |
import tinys3 | |
''' | |
Modified version of nickjevershed's code | |
to-do's: | |
- abstract sheet_id option so it can be passed in | |
- Slack output option | |
''' | |
## arguments -- support TK | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--id", help="Google Sheet ID") | |
parser.add_argument("--quotes", help="Use " instead of escaped quotes") | |
args = parser.parse_args() | |
id_arg = args.id | |
quotes_arg = args.quotes | |
# TK TK | |
## spreadsheet id -- abstract TK | |
sheet_id = 'SHEET_ID' # change to param to be accepted via CLI; maybe other ways, too | |
## Google API request urls | |
url1 = 'https://spreadsheets.google.com/feeds/cells/' + sheet_id + '/od6/public/values?alt=json' | |
url2 = 'https://spreadsheets.google.com/feeds/list/' + sheet_id + '/od6/public/values?alt=json' | |
## get the json in cell format from google | |
ss_content1 = requests.get(url1).json() | |
## lists to store new keys and data | |
new_keys = [] | |
new_data = [] | |
## make a list of the entries in the first row for nice keys | |
for item in ss_content1['feed']['entry']: | |
if item['gs$cell']['row'] == '1': | |
new_keys.append(item['content']['$t']) | |
print new_keys | |
## get json in list format | |
ss_content2 = requests.get(url2).json() | |
## remap entries from having gsx$-prefixed keys to having no prefix, i.e. our first row as keys | |
for entry in ss_content2['feed']['entry']: | |
row_data = [] | |
for key in new_keys: | |
## default escaped quote marks in json | |
# row_data.append(entry['gsx$' + key]['$t']) | |
## optional replace quote marks as encoded | |
row_data.append(entry['gsx$' + key]['$t'].replace('"', """)) | |
new_data.append(dict(zip(new_keys, row_data))) | |
print new_data | |
## make it into a json object for writing to file or s3 | |
new_json = json.dumps(new_data) | |
print new_json | |
## save file locally with sheet_id as file name | |
json_file = 'json/%s.json' % (sheet_id) | |
with open(json_file,'w') as file_out: | |
file_out.write(new_json) | |
## save file on S3 with sheet_id as file name | |
## S3 access keys | |
S3_ACCESS_KEY = os.environ['S3_ACCESS_KEY'] | |
S3_SECRET_KEY = os.environ['S3_SECRET_KEY'] | |
S3_BUCKET = os.environ['S3_BUCKET'] | |
S3_REGION = os.environ['S3_REGION'] | |
## set up and call the s3 connection using tinyS3 | |
s3_connection = tinys3.Connection( | |
S3_ACCESS_KEY, | |
S3_SECRET_KEY, | |
default_bucket=S3_BUCKET, | |
tls=True | |
) | |
filename_s3 = 'data/' + json_file | |
domain = os.environ['S3_DOMAIN'] | |
# domain = '%s.s3-%s.amazonaws.com' % (S3_BUCKET, S3_REGION) | |
# domain = os.environ['CDN_DOMAIN'] | |
url = domain + '/' + filename_s3 | |
with open(json_file,'rb') as file_to_upload: | |
s3_connection.upload(filename_s3, file_to_upload) | |
print "JSON uploaded to S3:\n\n%s" % url |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment