Created
February 14, 2019 07:18
-
-
Save handcircus/184e8d47feec2037cb01429ce760dafd to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import os.path | |
import csv | |
import cjson | |
import sys | |
import gzip | |
import ntpath | |
def write_event(prefix, event_type, headers, event): | |
data = prepare_data(headers, event) | |
path = "%s_%s.csv" % (prefix, event_type) | |
with open(path, 'ab') as fp: | |
cw = csv.writer(fp, lineterminator="\n", quoting=csv.QUOTE_NONNUMERIC) | |
# write headers only to empty file | |
if os.stat(path)[6] == 0: | |
cw.writerow(headers) | |
cw.writerow(data) | |
def prepare_data(headers, event): | |
result = [] | |
for header in headers: | |
if "." in header: | |
# handle 2nd level fields | |
keys = header.split(".") | |
if keys[0] in event: | |
if keys[1] == "revenue": | |
revenue = event[keys[0]][keys[1]] | |
if revenue == {}: | |
result.append("") | |
else: | |
for revenue_key in revenue: | |
result.append("%s %d" % (revenue_key, revenue[revenue_key])) | |
else: | |
#print " Looking for '"+keys[0]+"' sub '"+keys[1]+"'" | |
if keys[1] in event[keys[0]]: | |
#print " Found '"+keys[0]+"' sub '"+keys[1]+"' : '"+str(event[keys[0]][keys[1]])+"'" | |
result.append(event[keys[0]][keys[1]]) | |
else: | |
#print "Can't find '"+keys[0]+"' sub '"+keys[1]+"'" | |
result.append("") | |
else: | |
if keys[0] in event: | |
if keys[1] in event[keys[0]]: | |
result.append(event[keys[0]][keys[1]]) | |
else: | |
result.append("") | |
else: | |
result.append("") | |
else: | |
# handle 1st level fields | |
if header in event: | |
result.append(event[header]) | |
else: | |
result.append("") | |
return tuple(result) | |
#TODO: check with collectors validation logic for missing fields | |
def get_csv_header(event_type): | |
header = [] | |
common_data_fields = ["data.event_id","data.session_id", "data.user_id", "data.build", | |
"data.device","data.platform","data.os_version", | |
"data.client_ts"] | |
common_fields = ["country_code","arrival_ts","game_id","ip"] | |
common_user_meta_fields = ["user_meta.install_ts","user_meta.revenue"] | |
if event_type == "quality": | |
header = common_data_fields | |
header += ["data.value","data.event_id"] | |
header += common_fields | |
header += ["user_meta.platform","data.device", | |
"user_meta.os_major", "user_meta.os_minor", | |
"user_meta.sdk_version"] | |
header += common_user_meta_fields | |
elif event_type == "design": | |
header = common_data_fields | |
header += ["data.value"] | |
header += common_fields | |
header += common_user_meta_fields | |
elif event_type == "progression": | |
header = common_data_fields | |
header += ["data.attempt_num"] | |
header += common_fields | |
header += common_user_meta_fields | |
elif event_type == "session_end": | |
header = common_data_fields | |
header += ["data.attempt_num"] | |
header += common_fields | |
header += common_user_meta_fields | |
elif event_type == "error": | |
header = common_data_fields | |
header += ["data.severity","data.x","data.y","data.z", | |
"data.message"] | |
header += common_fields | |
#header += ["user_meta.gender"] | |
header += common_user_meta_fields | |
elif event_type == "user": | |
header = common_data_fields | |
header += ["data.os_major","data.os_minor", | |
"data.platform","data.sdk_version"] | |
header += common_fields | |
#header += ["user_meta.platform", | |
# "user_meta.os_major", "user_meta.os_minor", | |
# "user_meta.sdk_version"] | |
header += common_user_meta_fields | |
elif event_type == "business": | |
header = common_data_fields | |
header += ["data.event_id"] | |
header += common_fields + ["currency", "amount"] | |
header += common_user_meta_fields | |
return header | |
def main(): | |
if len(sys.argv) < 2: | |
print "Usage: ./events2csv.py <source_file>" | |
else: | |
source_name = sys.argv[1] | |
if os.path.isfile(source_name): | |
print "Uncompressing file..." | |
with gzip.open(source_name) as f: | |
file_prefix = os.path.splitext( | |
ntpath.basename(source_name))[0].replace(".json", "") | |
print "Reading events from file..." | |
i = 0 | |
for raw_event in f: | |
event = cjson.decode(raw_event.encode("ascii", "ignore")) | |
event_data=event["data"] | |
# for entry in event: | |
# print "Event entry " + entry | |
# print "Event category '" + event_data["category"] +"' ID '" + event_data["event_id"] + "'" | |
headers = get_csv_header(event_data["category"]) | |
# report progress | |
sys.stdout.write("\r%d rows written..." % i) | |
sys.stdout.flush() | |
write_event(file_prefix, event_data["category"], headers, event) | |
i = i + 1 | |
print "\nDone" | |
else: | |
print "Supplied source file does not exists!" | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment