Last active
July 7, 2025 18:27
-
-
Save Nikolaj-K/1ab0890b3a9772eaa1ffaf7992e7004f to your computer and use it in GitHub Desktop.
Create a log log plot of Somnia user rank data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
To use this script, the variable starting with 'ps_docstring_' needs to be | |
the 4000 line long string holding the 1000 points data. | |
To get it, just go to https://quest.somnia.network/ and copy the 1000 rows. | |
""" | |
import matplotlib.pyplot as plt | |
from matplotlib.ticker import ScalarFormatter | |
import itertools | |
class Config: | |
USE_LOG_LOG = True | |
HIGHLIGHT = ['0x_empire'] # highlight names, can be any user name | |
# Example single docstring (replace with your real data strings) | |
ps_docstring_250707_REF = """1 | |
GratefulApe | |
0xa7...0834 | |
3241267 | |
2 | |
dmitrys | |
0xe3...0d0b | |
772010 | |
3 | |
drew589 | |
0x92...7253 | |
555949 | |
999 | |
shukrisayuti | |
0xb1...04ed | |
19066 | |
1000 | |
greenberg_2365 | |
0xba...e9cb | |
19062""" | |
def parse_points(docstring): | |
"""Parse a multiline docstring into a list of dicts with keys rank, name, address, points.""" | |
lines = docstring.strip().split('\n') | |
assert len(lines) % 4 == 0, "Input lines count must be a multiple of 4" | |
points = [] | |
for rank_line, name_line, address_line, points_line in zip(*(lines[i::4] for i in range(4))): | |
points.append({ | |
'rank': int(rank_line), | |
'name': name_line, | |
'address': address_line, | |
'points': int(points_line) | |
}) | |
return points | |
def prepare_plot_data(points): | |
"""Sort points by rank and extract lists for plotting.""" | |
sorted_points = sorted(points, key=lambda x: x['rank']) | |
ranks = [p['rank'] for p in sorted_points] | |
point_values = [p['points'] for p in sorted_points] | |
names = [' ' + p['name'] for p in sorted_points] | |
return sorted_points, ranks, point_values, names | |
def plot_points_vs_rank(multi_data): | |
""" | |
multi_data: list of tuples like | |
(sorted_points, ranks, point_values, names, label, color) | |
""" | |
plt.figure(figsize=(12, 7)) | |
ax = plt.gca() | |
# Plot each dataset with its color and label | |
for sorted_points, ranks, point_values, names, label, color in multi_data: | |
# Highlight mask | |
highlight_mask = [p['name'] in Config.HIGHLIGHT for p in sorted_points] | |
# Normal points | |
normal_ranks = [r for r, h in zip(ranks, highlight_mask) if not h] | |
normal_points = [p for p, h in zip(point_values, highlight_mask) if not h] | |
# Highlight points | |
highlight_ranks = [r for r, h in zip(ranks, highlight_mask) if h] | |
highlight_points = [p for p, h in zip(point_values, highlight_mask) if h] | |
# Plot normal points | |
plt.scatter(normal_ranks, normal_points, alpha=1.0, s=10, color=color, label=label) | |
# Plot highlighted points (red, double size) | |
plt.scatter(highlight_ranks, highlight_points, alpha=1.0, s=20, color='red', label='Highlighted Users') | |
# Add all text labels, highlighted users' texts are red and 3x bigger | |
for x, y, label_text, is_highlight in zip(ranks, point_values, names, highlight_mask): | |
if is_highlight: | |
plt.text(x, y, label_text, fontsize=21, rotation=30, alpha=0.9, color='red') | |
else: | |
plt.text(x, y, label_text, fontsize=7, rotation=30, alpha=0.9, color=color) | |
if Config.USE_LOG_LOG: | |
plt.xscale('log') | |
plt.yscale('log') | |
# Format Y-axis with full numbers (no scientific notation) | |
formatter = ScalarFormatter() | |
formatter.set_scientific(False) | |
formatter.set_useOffset(False) | |
ax.yaxis.set_major_formatter(formatter) | |
plt.xlabel('Rank (log)' if Config.USE_LOG_LOG else 'Rank') | |
plt.ylabel('Points (log)' if Config.USE_LOG_LOG else 'Points') | |
plt.title(('Log-Log ' if Config.USE_LOG_LOG else '') + 'Scatter Plot: Points vs. Rank') | |
plt.grid(True, which='both', linestyle='--', linewidth=0.5) | |
# Annotate selected users: top 6, plus two additional (e.g. 500th and last) | |
# Combine all annotated users from all datasets + highlights | |
annotated_users = {} | |
for sorted_points, _, _, _, _, _ in multi_data: | |
top6 = sorted_points[:6] | |
extra = [] | |
if len(sorted_points) > 500: | |
extra.append(sorted_points[-500]) | |
extra.append(sorted_points[-1]) | |
for u in top6 + extra: | |
annotated_users[u['rank']] = u | |
for u in sorted_points: | |
if u['name'] in Config.HIGHLIGHT: | |
annotated_users[u['rank']] = u | |
xmin, _ = ax.get_xlim() | |
for user in annotated_users.values(): | |
y = user['points'] | |
label = f"{y} pts ({user['name'].strip()})" | |
ax.text(xmin * 0.9, y, label, fontsize=7, ha='right', va='center', color='green') | |
plt.legend() | |
plt.tight_layout() | |
plt.show() | |
def main(): | |
ps_docstrings = [ps_docstring_250707] | |
colors = itertools.cycle(['blue', 'orange', 'green', 'purple', 'brown']) # Extend as needed | |
multi_data = [] | |
for idx, doc in enumerate(ps_docstrings): | |
points = parse_points(doc) | |
sorted_points, ranks, point_values, names = prepare_plot_data(points) | |
color = next(colors) | |
label = f"Dataset {idx+1}" | |
multi_data.append((sorted_points, ranks, point_values, names, label, color)) | |
plot_points_vs_rank(multi_data) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment