greglinch · March 14, 2017 17:35
diff --git a/google_sheets_json.py b/google_sheets_json.py
 import os
 import json
 import argparse
 import requests
 import tinys3


 '''
 Modified version of nickjevershed's code

 to-do's:
 - abstract sheet_id option so it can be passed in
 - Slack output option
 '''

 ## arguments -- support TK

 parser = argparse.ArgumentParser()

 parser.add_argument("--id", help="Google Sheet ID")
 parser.add_argument("--quotes", help="Use &quot; instead of escaped quotes")

 args = parser.parse_args()

 id_arg = args.id
 quotes_arg = args.quotes
 # TK TK

 ## spreadsheet id -- abstract TK

 sheet_id = 'SHEET_ID' # change to param to be accepted via CLI; maybe other ways, too

 ## Google API request urls

 url1 = 'https://spreadsheets.google.com/feeds/cells/' + sheet_id + '/od6/public/values?alt=json'
 url2 = 'https://spreadsheets.google.com/feeds/list/' + sheet_id + '/od6/public/values?alt=json'

 ## get the json in cell format from google

 ss_content1 = requests.get(url1).json()

 ## lists to store new keys and data

 new_keys = []
 new_data = []

 ## make a list of the entries in the first row for nice keys

 for item in ss_content1['feed']['entry']:
    if item['gs$cell']['row'] == '1':
        new_keys.append(item['content']['$t'])

 print new_keys

 ## get json in list format 

 ss_content2 = requests.get(url2).json()

 ## remap entries from having gsx$-prefixed keys to having no prefix, i.e. our first row as keys

 for entry in ss_content2['feed']['entry']:
    row_data = []
    for key in new_keys:
        ## default escaped quote marks in json
        # row_data.append(entry['gsx$' + key]['$t'])
        ## optional replace quote marks as encoded
        row_data.append(entry['gsx$' + key]['$t'].replace('"', "&quot;"))
    new_data.append(dict(zip(new_keys, row_data)))

 print new_data

 ## make it into a json object for writing to file or s3

 new_json = json.dumps(new_data) 

 print new_json

 ## save file locally with sheet_id as file name

 json_file = 'json/%s.json' % (sheet_id)

 with open(json_file,'w') as file_out:
    file_out.write(new_json)

 ## save file on S3 with sheet_id as file name

 ## S3 access keys
 S3_ACCESS_KEY = os.environ['S3_ACCESS_KEY']
 S3_SECRET_KEY = os.environ['S3_SECRET_KEY']
 S3_BUCKET = os.environ['S3_BUCKET']
 S3_REGION = os.environ['S3_REGION']

 ## set up and call the s3 connection using tinyS3    
 s3_connection = tinys3.Connection(
    S3_ACCESS_KEY, 
    S3_SECRET_KEY, 
    default_bucket=S3_BUCKET, 
    tls=True
 )

 filename_s3 = 'data/' + json_file

 domain = os.environ['S3_DOMAIN']
 # domain = '%s.s3-%s.amazonaws.com' % (S3_BUCKET, S3_REGION)
 # domain = os.environ['CDN_DOMAIN']
 url = domain + '/' + filename_s3


 with open(json_file,'rb') as file_to_upload:
    s3_connection.upload(filename_s3, file_to_upload)

 print "JSON uploaded to S3:\n\n%s" % url
	import os
	import json
	import argparse
	import requests
	import tinys3


	'''
	Modified version of nickjevershed's code

	to-do's:
	- abstract sheet_id option so it can be passed in
	- Slack output option
	'''

	## arguments -- support TK

	parser = argparse.ArgumentParser()

	parser.add_argument("--id", help="Google Sheet ID")
	parser.add_argument("--quotes", help="Use " instead of escaped quotes")

	args = parser.parse_args()

	id_arg = args.id
	quotes_arg = args.quotes
	# TK TK

	## spreadsheet id -- abstract TK

	sheet_id = 'SHEET_ID' # change to param to be accepted via CLI; maybe other ways, too

	## Google API request urls

	url1 = 'https://spreadsheets.google.com/feeds/cells/' + sheet_id + '/od6/public/values?alt=json'
	url2 = 'https://spreadsheets.google.com/feeds/list/' + sheet_id + '/od6/public/values?alt=json'

	## get the json in cell format from google

	ss_content1 = requests.get(url1).json()

	## lists to store new keys and data

	new_keys = []
	new_data = []

	## make a list of the entries in the first row for nice keys

	for item in ss_content1['feed']['entry']:
	if item['gs$cell']['row'] == '1':
	new_keys.append(item['content']['$t'])

	print new_keys

	## get json in list format

	ss_content2 = requests.get(url2).json()

	## remap entries from having gsx$-prefixed keys to having no prefix, i.e. our first row as keys

	for entry in ss_content2['feed']['entry']:
	row_data = []
	for key in new_keys:
	## default escaped quote marks in json
	# row_data.append(entry['gsx$' + key]['$t'])
	## optional replace quote marks as encoded
	row_data.append(entry['gsx$' + key]['$t'].replace('"', """))
	new_data.append(dict(zip(new_keys, row_data)))

	print new_data

	## make it into a json object for writing to file or s3

	new_json = json.dumps(new_data)

	print new_json

	## save file locally with sheet_id as file name

	json_file = 'json/%s.json' % (sheet_id)

	with open(json_file,'w') as file_out:
	file_out.write(new_json)

	## save file on S3 with sheet_id as file name

	## S3 access keys
	S3_ACCESS_KEY = os.environ['S3_ACCESS_KEY']
	S3_SECRET_KEY = os.environ['S3_SECRET_KEY']
	S3_BUCKET = os.environ['S3_BUCKET']
	S3_REGION = os.environ['S3_REGION']

	## set up and call the s3 connection using tinyS3
	s3_connection = tinys3.Connection(
	S3_ACCESS_KEY,
	S3_SECRET_KEY,
	default_bucket=S3_BUCKET,
	tls=True
	)

	filename_s3 = 'data/' + json_file

	domain = os.environ['S3_DOMAIN']
	# domain = '%s.s3-%s.amazonaws.com' % (S3_BUCKET, S3_REGION)
	# domain = os.environ['CDN_DOMAIN']
	url = domain + '/' + filename_s3


	with open(json_file,'rb') as file_to_upload:
	s3_connection.upload(filename_s3, file_to_upload)

	print "JSON uploaded to S3:\n\n%s" % url