Last active
July 28, 2025 18:45
-
-
Save Nikolaj-K/32991a5566c001981c5fe8b4f9f70e41 to your computer and use it in GitHub Desktop.
Compute the order statistic (by default: for the uniform distribution on [0,1])
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Code for the video at | |
https://youtu.be/CquQe7Y05VA | |
This script will be easy to generalize, also. | |
""" | |
from datetime import datetime | |
import imageio.v2 as imageio | |
from io import BytesIO | |
import matplotlib.pyplot as plt | |
from matplotlib import colormaps | |
import numpy as np | |
import os | |
from scipy.stats import beta | |
class Config: | |
LEN_RANDOM_VEC = 7 | |
NUM_VEC_SAMPLES = 100 | |
NUM_BINS = 20 | |
IMAGES_DIRPATH = r"/path/to/your/target/image/dirpath" | |
GIF_RUNTIME_S = 50 | |
COLOR_MAP = colormaps['Set1'] | |
NUM_IMAGES = Config.NUM_VEC_SAMPLES | |
FPS = 3 * int(NUM_IMAGES / Config.GIF_RUNTIME_S) | |
# To look nice and likely see all values falling in, set ylim to the exp+2*std | |
NUM_SAMPLES = Config.NUM_VEC_SAMPLES * Config.LEN_RANDOM_VEC | |
EXP_PER_BIN = NUM_SAMPLES / Config.NUM_BINS | |
VAR_PER_BIN = EXP_PER_BIN * (1 - EXP_PER_BIN / NUM_SAMPLES) # binomial model | |
YLIM = EXP_PER_BIN + 3 * np.sqrt(VAR_PER_BIN) | |
def show_beta(): | |
plt.figure(figsize=(8, 5)) | |
xs = np.linspace(0, 1, 500) | |
n = Config.LEN_RANDOM_VEC | |
for idx in range(n): | |
k = idx + 1 | |
a, b = k, n + 1 - k | |
terms = [] | |
if a >= 2: | |
terms.append(f"x^{{{a - 1}}}" if a > 2 else "x") | |
if b >= 2: | |
terms.append(f"(1 - x)^{{{b - 1}}}" if b > 2 else "(1 - x)") | |
func = " \\cdot ".join(terms) if terms else "1" | |
label = f"$\\mathrm{{Beta}}({a}, {b}) \\propto {func}$" | |
ys = beta.pdf(xs, a, b) | |
color = Config.COLOR_MAP(1 - k / n) | |
plt.plot(xs, ys, label=label, color=color, linewidth=2) | |
plt.fill_between(xs, ys, color=color, alpha=0.4) | |
plt.title(f"PDFs of Beta Distributions for Order Statistics\nof {n} Samples from $\\mathcal{{U}}(0, 1)$") | |
plt.xlabel("x") | |
plt.ylabel("Density") | |
plt.legend(loc="upper right", fontsize='small') | |
plt.grid(True, alpha=0.3) | |
plt.tight_layout() | |
image_filepath = f"{Config.IMAGES_DIRPATH}/euler_betas_{Config.LEN_RANDOM_VEC}.png" | |
plt.savefig(image_filepath) | |
#plt.show() | |
def sample_random_vectors(callable_distributions): | |
return [[p() for p in callable_distributions] for _ in range(Config.NUM_VEC_SAMPLES)] | |
def generate_frame(random_vector_samples): | |
fig, axs = plt.subplots(1, 2, figsize=(9, 5)) | |
fig, axs = plt.subplots(2, 1, figsize=(5, 10)) # Vertical stacking of plots | |
t_num_samples = len(random_vector_samples) * Config.LEN_RANDOM_VEC | |
# Combined histogram | |
flattened_samples = np.array(random_vector_samples).flatten() | |
axs[0].hist(flattened_samples, bins=Config.NUM_BINS, range=(0, 1), color='skyblue', edgecolor='black') | |
axs[0].set_title(f"Count histogram evolution, frame #{t_num_samples} / {NUM_SAMPLES},\nSampling {Config.LEN_RANDOM_VEC} values at a time\nEach via uniform distribution $\\mathcal{{U}}(0, 1)$") | |
axs[0].set_xlim(0, 1) | |
axs[0].set_ylim(0, YLIM) | |
# Overlay individual histograms | |
bin_edges = np.linspace(0, 1, Config.NUM_BINS + 1) | |
bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2 | |
for idx in range(Config.LEN_RANDOM_VEC): | |
samples = [elem[idx] for elem in random_vector_samples] | |
counts, _ = np.histogram(samples, bins=bin_edges) | |
ramp = (1+idx) / Config.LEN_RANDOM_VEC | |
axs[1].bar(bin_centers, counts, width=1/Config.NUM_BINS, alpha=0.9*ramp, color=Config.COLOR_MAP(1 - ramp)) | |
axs[1].set_title(f"Count histogram evolutions,\nSame samples\nSeperating all {Config.LEN_RANDOM_VEC} order statistics") | |
axs[1].set_xlim(0, 1) | |
axs[1].set_ylim(0, YLIM) | |
# axvline | |
for ax in axs: | |
for idx, val in enumerate(random_vector_samples[-1]): | |
k = idx + 1 | |
ramp = k / Config.LEN_RANDOM_VEC | |
ax.axvline(x=val, color=Config.COLOR_MAP(1 - ramp), linestyle='-', linewidth=(1+idx)/2) | |
plt.tight_layout() | |
# Render to in-memory image | |
buf = BytesIO() | |
plt.savefig(buf, format='png', dpi=120) | |
plt.close(fig) | |
buf.seek(0) | |
img = imageio.imread(buf) | |
buf.close() | |
return img | |
def make_gif(all_samples_sorted): | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
filename = f"euler_beta_sampling_{Config.LEN_RANDOM_VEC}_{timestamp}.gif" | |
gif_filepath = os.path.join(Config.IMAGES_DIRPATH, "gif", filename) | |
imageio.mimsave(gif_filepath, frames, fps=FPS) | |
print(f"GIF saved to: {gif_filepath}") | |
if __name__ == '__main__': | |
show_beta() | |
CALLABLE_DISTRIBUTIONS = [ | |
(lambda: np.random.uniform(0, 1)) | |
for _ in range(Config.LEN_RANDOM_VEC) | |
] | |
all_samples = sample_random_vectors(CALLABLE_DISTRIBUTIONS) | |
all_samples_sorted = list(map(sorted, all_samples)) # We want to see the order statistics | |
print(f"Rendering images and GIF with {NUM_IMAGES} frames...") | |
frames = [ | |
generate_frame(all_samples_sorted[:idx + 1]) | |
for idx in range(NUM_IMAGES) | |
] | |
os.makedirs(os.path.join(Config.IMAGES_DIRPATH, "gif"), exist_ok=True) | |
make_gif(all_samples_sorted) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment