-
-
Save judge2020/79d0a813c29d5996e254ef2d00f86e6b to your computer and use it in GitHub Desktop.
Simple script to plot your Discord machine-learning predicted age and gender from the data dump.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import json | |
from datetime import datetime | |
from matplotlib import pyplot | |
age_ts = [] | |
age_keys = ["prob_13_17", "prob_18_24", "prob_25_34", "prob_35_over"] | |
age_lists = {k:[] for k in age_keys} | |
gen_ts = [] | |
gen_keys = ["prob_male", "prob_female", "prob_non_binary_gender_expansive"] | |
gen_lists = {k:[] for k in gen_keys} | |
activity_files = glob.glob("./events-*-*-of-*.json") | |
for activity_file in activity_files: | |
print("Processing", activity_file) | |
with open(activity_file, "r") as f: | |
for l in f: | |
if ',"predicted_' in l: | |
j = json.loads(l) | |
if "predicted_age" in j: | |
age_ts.append(datetime.fromisoformat(j.get("day_pt").replace(" UTC", ""))) | |
for key in age_keys: | |
age_lists[key].append(j.get(key)) | |
if "predicted_gender" in j: | |
gen_ts.append(datetime.fromisoformat(j.get("day_pt").replace(" UTC", ""))) | |
for key in gen_lists: | |
gen_lists[key].append(j.get(key)) | |
def sort_data(ts, data): | |
combined = list(zip(ts, *data.values())) | |
combined.sort(key=lambda x: x[0]) | |
sorted_ts = [x[0] for x in combined] | |
sorted_data = {k: [x[i + 1] for x in combined] for i, k in enumerate(data.keys())} | |
return sorted_ts, sorted_data | |
if len(age_ts): | |
age_ts, age_lists = sort_data(age_ts, age_lists) | |
pyplot.title("Discord predicted age") | |
for key in age_keys: | |
pyplot.plot(age_ts, age_lists[key], marker="o") | |
pyplot.legend(["13-17", "18-24", "25-34", "35+"]) | |
pyplot.show() | |
if len(gen_ts): | |
gen_ts, gen_lists = sort_data(gen_ts, gen_lists) | |
pyplot.title("Discord predicted gender") | |
for key in gen_keys: | |
pyplot.plot(gen_ts, gen_lists[key], marker="o") | |
pyplot.legend(["male", "female", "non-binary"]) | |
pyplot.show() | |
if not len(activity_files): | |
input("Couldn't find your activity file! Make sure you have a file named events-YEAR-XXXXX-of-XXXXX.json in the current directory. It should be in activity/analytics/ in your Discord data dump.") | |
elif not len(age_ts) + len(gen_ts): | |
input("No predicted data found for you :(") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment