Last active
December 20, 2017 06:25
-
-
Save Sonophoto/8de17356a6e0f8ddfe365c5ab2d4bacf to your computer and use it in GitHub Desktop.
Example of using python to perform simple ETL on JSON data to a flat Sqlite3 table
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"name": "Chicago, IL", | |
"id": 1, | |
"scores": { | |
"walkability": 1.7, | |
"job_growth": 2.32, | |
"green_space": 0.9, | |
"taxes": 0.6 | |
} | |
}, | |
{ | |
"name": "Seattle, WA", | |
"id": 2, | |
"scores": { | |
"walkability": 1.3, | |
"job_growth": 3.1, | |
"green_space": 1.2, | |
"taxes": 0.8 | |
} | |
}, | |
{ | |
"name": "San Fransisco, CA", | |
"id": 3, | |
"scores": { | |
"walkability": 1.4, | |
"job_growth": 1.1, | |
"green_space": 1.5, | |
"taxes": 0.3 | |
} | |
}, | |
{ | |
"name": "Kansas City, KS", | |
"id": 4, | |
"scores": { | |
"walkability": 0.3, | |
"job_growth": 1.1, | |
"green_space": 1.0, | |
"taxes": 1.3 | |
} | |
}, | |
{ | |
"name": "Miami, FL", | |
"id": 5, | |
"scores": { | |
"walkability": 0.3, | |
"job_growth": 1.43, | |
"green_space": 0.9, | |
"taxes": 1.2 | |
} | |
}, | |
{ | |
"name": "New York, NY", | |
"id": 6, | |
"scores": { | |
"walkability": 1.5, | |
"job_growth": 1.8, | |
"green_space": 1.4, | |
"taxes": 0.7 | |
} | |
}, | |
{ | |
"name": "Detroit, MI", | |
"id": 7, | |
"scores": { | |
"walkability": 0.3, | |
"job_growth": 0.5, | |
"green_space": 0.3, | |
"taxes": 2.0 | |
} | |
}, | |
{ | |
"name": "Los Angeles, CA", | |
"id": 8, | |
"scores": { | |
"walkability": 0.2, | |
"job_growth": 1.0, | |
"green_space": 0.7, | |
"taxes": 0.8 | |
} | |
} | |
] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" Extracts json formatted city data from the file 'file_name' | |
Transforms json data into a single table schema with sqlite3 type names | |
Loads the flattened data records into the sqlite3 table 'cities' | |
AUTHOR: Brig Young <[email protected]> | |
COPYRIGHT: Copyright 2017 Brig Young, Sonophotostudios.com | |
LICENSE: BSD2c (Citation Required) | |
""" | |
import sqlite3 as sqlite | |
import json as json | |
file_name = "city.json" | |
db_name = file_name.replace("json", "sqlite") | |
def load_json_data (file_name): | |
""" Load json data from file_name into data_json and return it | |
""" | |
try: | |
data_json = json.load(open(file_name)) | |
except Exception as err: | |
print(err) | |
exit() | |
return data_json | |
def ETL2_sqlite (data_json): | |
""" Extracts, Transforms and Loads city data into sqlite db | |
To modify this function: | |
1. Add or remove data member from flat_schema | |
2. Add or remove its type from flat_types | |
3. Add or remove its entry in the loader loop below | |
4. Modify the test in test_etl.py (NOT INCLUDED IN THIS GIST) | |
""" | |
# Connect to our db | |
conn = sqlite.connect(db_name) | |
cursor = conn.cursor() | |
# Create our table - if it exists, overwrite it. | |
flat_schema = ['id', 'name', 'taxes', 'green_space', 'job_growth', 'walkability'] | |
flat_types = ['text', 'text', 'real', 'real', 'real', 'real'] | |
create_query = "create table cities (" | |
for x in range(len(flat_schema)): | |
create_query = (create_query + str(flat_schema[x]) + " " + str(flat_types[x])) | |
if x < (len(flat_schema)-1): | |
create_query += ", " | |
else: | |
create_query += ")" | |
try: | |
cursor.execute(create_query) | |
except sqlite.OperationalError: #ASSUMING exception is the table exists... | |
cursor.execute("""drop table cities""") | |
cursor.execute(create_query) | |
# Load our data into the db | |
for x in range(len(data_json)): | |
cursor.execute('insert into cities values (?,?,?,?,?,?)',\ | |
(data_json[x]['id'],\ | |
data_json[x]['name'],\ | |
data_json[x]['scores']['taxes'],\ | |
data_json[x]['scores']['green_space'],\ | |
data_json[x]['scores']['job_growth'],\ | |
data_json[x]['scores']['walkability'])) | |
# Commit our changes and shutdown sqlite | |
conn.commit() | |
conn.close() | |
if __name__ == "__main__": | |
data_json = load_json_data(file_name) | |
ETL2_sqlite(data_json) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment