Nikolaj-K · July 7, 2025 18:27
diff --git a/plot_ranks_log_log.py b/plot_ranks_log_log.py
 """
 To use this script, the variable starting with 'ps_docstring_' needs to be 
 the 4000 line long string holding the 1000 points data.
 To get it, just go to https://quest.somnia.network/ and copy the 1000 rows.
 """

 import matplotlib.pyplot as plt
 from matplotlib.ticker import ScalarFormatter
 import itertools

 class Config:
    USE_LOG_LOG = True
    HIGHLIGHT = ['0x_empire']  # highlight names, can be any user name

 # Example single docstring (replace with your real data strings)
 ps_docstring_250707_REF = """1
 GratefulApe
 0xa7...0834
 3241267
 2
 dmitrys
 0xe3...0d0b
 772010
 3
 drew589
 0x92...7253
 555949
 999
 shukrisayuti
 0xb1...04ed
 19066
 1000
 greenberg_2365
 0xba...e9cb
 19062"""


 def parse_points(docstring):
    """Parse a multiline docstring into a list of dicts with keys rank, name, address, points."""
    lines = docstring.strip().split('\n')
    assert len(lines) % 4 == 0, "Input lines count must be a multiple of 4"
    points = []
    for rank_line, name_line, address_line, points_line in zip(*(lines[i::4] for i in range(4))):
        points.append({
            'rank': int(rank_line),
            'name': name_line,
            'address': address_line,
            'points': int(points_line)
        })
    return points

 def prepare_plot_data(points):
    """Sort points by rank and extract lists for plotting."""
    sorted_points = sorted(points, key=lambda x: x['rank'])
    ranks = [p['rank'] for p in sorted_points]
    point_values = [p['points'] for p in sorted_points]
    names = ['  ' + p['name'] for p in sorted_points]
    return sorted_points, ranks, point_values, names

 def plot_points_vs_rank(multi_data):
    """
    multi_data: list of tuples like
      (sorted_points, ranks, point_values, names, label, color)
    """
    plt.figure(figsize=(12, 7))
    ax = plt.gca()

    # Plot each dataset with its color and label
    for sorted_points, ranks, point_values, names, label, color in multi_data:
        # Highlight mask
        highlight_mask = [p['name'] in Config.HIGHLIGHT for p in sorted_points]

        # Normal points
        normal_ranks = [r for r, h in zip(ranks, highlight_mask) if not h]
        normal_points = [p for p, h in zip(point_values, highlight_mask) if not h]

        # Highlight points
        highlight_ranks = [r for r, h in zip(ranks, highlight_mask) if h]
        highlight_points = [p for p, h in zip(point_values, highlight_mask) if h]

        # Plot normal points
        plt.scatter(normal_ranks, normal_points, alpha=1.0, s=10, color=color, label=label)

        # Plot highlighted points (red, double size)
        plt.scatter(highlight_ranks, highlight_points, alpha=1.0, s=20, color='red', label='Highlighted Users')

        # Add all text labels, highlighted users' texts are red and 3x bigger
        for x, y, label_text, is_highlight in zip(ranks, point_values, names, highlight_mask):
            if is_highlight:
                plt.text(x, y, label_text, fontsize=21, rotation=30, alpha=0.9, color='red')
            else:
                plt.text(x, y, label_text, fontsize=7, rotation=30, alpha=0.9, color=color)

    if Config.USE_LOG_LOG:
        plt.xscale('log')
        plt.yscale('log')

    # Format Y-axis with full numbers (no scientific notation)
    formatter = ScalarFormatter()
    formatter.set_scientific(False)
    formatter.set_useOffset(False)
    ax.yaxis.set_major_formatter(formatter)

    plt.xlabel('Rank (log)' if Config.USE_LOG_LOG else 'Rank')
    plt.ylabel('Points (log)' if Config.USE_LOG_LOG else 'Points')
    plt.title(('Log-Log ' if Config.USE_LOG_LOG else '') + 'Scatter Plot: Points vs. Rank')
    plt.grid(True, which='both', linestyle='--', linewidth=0.5)

    # Annotate selected users: top 6, plus two additional (e.g. 500th and last)
    # Combine all annotated users from all datasets + highlights
    annotated_users = {}
    for sorted_points, _, _, _, _, _ in multi_data:
        top6 = sorted_points[:6]
        extra = []
        if len(sorted_points) > 500:
            extra.append(sorted_points[-500])
        extra.append(sorted_points[-1])
        for u in top6 + extra:
            annotated_users[u['rank']] = u
        for u in sorted_points:
            if u['name'] in Config.HIGHLIGHT:
                annotated_users[u['rank']] = u

    xmin, _ = ax.get_xlim()

    for user in annotated_users.values():
        y = user['points']
        label = f"{y} pts ({user['name'].strip()})"
        ax.text(xmin * 0.9, y, label, fontsize=7, ha='right', va='center', color='green')

    plt.legend()
    plt.tight_layout()
    plt.show()

 def main():
    ps_docstrings = [ps_docstring_250707]

    colors = itertools.cycle(['blue', 'orange', 'green', 'purple', 'brown'])  # Extend as needed

    multi_data = []
    for idx, doc in enumerate(ps_docstrings):
        points = parse_points(doc)
        sorted_points, ranks, point_values, names = prepare_plot_data(points)
        color = next(colors)
        label = f"Dataset {idx+1}"
        multi_data.append((sorted_points, ranks, point_values, names, label, color))

    plot_points_vs_rank(multi_data)

 if __name__ == "__main__":
    main()
	"""
	To use this script, the variable starting with 'ps_docstring_' needs to be
	the 4000 line long string holding the 1000 points data.
	To get it, just go to https://quest.somnia.network/ and copy the 1000 rows.
	"""

	import matplotlib.pyplot as plt
	from matplotlib.ticker import ScalarFormatter
	import itertools

	class Config:
	USE_LOG_LOG = True
	HIGHLIGHT = ['0x_empire'] # highlight names, can be any user name

	# Example single docstring (replace with your real data strings)
	ps_docstring_250707_REF = """1
	GratefulApe
	0xa7...0834
	3241267
	2
	dmitrys
	0xe3...0d0b
	772010
	3
	drew589
	0x92...7253
	555949
	999
	shukrisayuti
	0xb1...04ed
	19066
	1000
	greenberg_2365
	0xba...e9cb
	19062"""


	def parse_points(docstring):
	"""Parse a multiline docstring into a list of dicts with keys rank, name, address, points."""
	lines = docstring.strip().split('\n')
	assert len(lines) % 4 == 0, "Input lines count must be a multiple of 4"
	points = []
	for rank_line, name_line, address_line, points_line in zip(*(lines[i::4] for i in range(4))):
	points.append({
	'rank': int(rank_line),
	'name': name_line,
	'address': address_line,
	'points': int(points_line)
	})
	return points

	def prepare_plot_data(points):
	"""Sort points by rank and extract lists for plotting."""
	sorted_points = sorted(points, key=lambda x: x['rank'])
	ranks = [p['rank'] for p in sorted_points]
	point_values = [p['points'] for p in sorted_points]
	names = [' ' + p['name'] for p in sorted_points]
	return sorted_points, ranks, point_values, names

	def plot_points_vs_rank(multi_data):
	"""
	multi_data: list of tuples like
	(sorted_points, ranks, point_values, names, label, color)
	"""
	plt.figure(figsize=(12, 7))
	ax = plt.gca()

	# Plot each dataset with its color and label
	for sorted_points, ranks, point_values, names, label, color in multi_data:
	# Highlight mask
	highlight_mask = [p['name'] in Config.HIGHLIGHT for p in sorted_points]

	# Normal points
	normal_ranks = [r for r, h in zip(ranks, highlight_mask) if not h]
	normal_points = [p for p, h in zip(point_values, highlight_mask) if not h]

	# Highlight points
	highlight_ranks = [r for r, h in zip(ranks, highlight_mask) if h]
	highlight_points = [p for p, h in zip(point_values, highlight_mask) if h]

	# Plot normal points
	plt.scatter(normal_ranks, normal_points, alpha=1.0, s=10, color=color, label=label)

	# Plot highlighted points (red, double size)
	plt.scatter(highlight_ranks, highlight_points, alpha=1.0, s=20, color='red', label='Highlighted Users')

	# Add all text labels, highlighted users' texts are red and 3x bigger
	for x, y, label_text, is_highlight in zip(ranks, point_values, names, highlight_mask):
	if is_highlight:
	plt.text(x, y, label_text, fontsize=21, rotation=30, alpha=0.9, color='red')
	else:
	plt.text(x, y, label_text, fontsize=7, rotation=30, alpha=0.9, color=color)

	if Config.USE_LOG_LOG:
	plt.xscale('log')
	plt.yscale('log')

	# Format Y-axis with full numbers (no scientific notation)
	formatter = ScalarFormatter()
	formatter.set_scientific(False)
	formatter.set_useOffset(False)
	ax.yaxis.set_major_formatter(formatter)

	plt.xlabel('Rank (log)' if Config.USE_LOG_LOG else 'Rank')
	plt.ylabel('Points (log)' if Config.USE_LOG_LOG else 'Points')
	plt.title(('Log-Log ' if Config.USE_LOG_LOG else '') + 'Scatter Plot: Points vs. Rank')
	plt.grid(True, which='both', linestyle='--', linewidth=0.5)

	# Annotate selected users: top 6, plus two additional (e.g. 500th and last)
	# Combine all annotated users from all datasets + highlights
	annotated_users = {}
	for sorted_points, _, _, _, _, _ in multi_data:
	top6 = sorted_points[:6]
	extra = []
	if len(sorted_points) > 500:
	extra.append(sorted_points[-500])
	extra.append(sorted_points[-1])
	for u in top6 + extra:
	annotated_users[u['rank']] = u
	for u in sorted_points:
	if u['name'] in Config.HIGHLIGHT:
	annotated_users[u['rank']] = u

	xmin, _ = ax.get_xlim()

	for user in annotated_users.values():
	y = user['points']
	label = f"{y} pts ({user['name'].strip()})"
	ax.text(xmin * 0.9, y, label, fontsize=7, ha='right', va='center', color='green')

	plt.legend()
	plt.tight_layout()
	plt.show()

	def main():
	ps_docstrings = [ps_docstring_250707]

	colors = itertools.cycle(['blue', 'orange', 'green', 'purple', 'brown']) # Extend as needed

	multi_data = []
	for idx, doc in enumerate(ps_docstrings):
	points = parse_points(doc)
	sorted_points, ranks, point_values, names = prepare_plot_data(points)
	color = next(colors)
	label = f"Dataset {idx+1}"
	multi_data.append((sorted_points, ranks, point_values, names, label, color))

	plot_points_vs_rank(multi_data)

	if __name__ == "__main__":
	main()