Skip to content

Instantly share code, notes, and snippets.

@al6x
Created May 16, 2025 08:19
Show Gist options
  • Save al6x/a320d1233f47255e4c296ab56a121bb4 to your computer and use it in GitHub Desktop.
Save al6x/a320d1233f47255e4c296ab56a121bb4 to your computer and use it in GitHub Desktop.
Insanely high mean annual returns for volatile stocks in historical data
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import FuncFormatter, LogLocator
# Data:
# lr_rf_1y_t : float — risk-free log return at time t (e.g., log(1.03) for 3%)
# lr_t2 : float — stock actual log return at time t2 = t + period_d
# ema_var_d_t : float — daily variance as log(return)^2, current estimate at time t as EMA(span=365/3)
# h_var_d : float — daily variance as log(return)^2, historical estimate over whole stock history
# period_d : int — always 365, can be ignored
# symbol : str — stock symbol (e.g., 'AAPL')
# t : str — date at time t in 'YYYY-MM-DD' format
df = pd.read_csv('/storage/data/alien/experiments/historical_predictor/returns_period365_step365.tsv', sep='\t')
df['nlr_t2'] = df['lr_t2'] - df['lr_rf_1y_t'] # Risk-adjusted log return
df['r_t2'] = np.exp(df['lr_t2']) # Multiplicative stock return
df['nr_t2'] = np.exp(df['nlr_t2']) # Multiplicative risk-adjusted stock return
def plot_mean_vs_vol(window: int = 500):
df_sorted = df.sort_values('ema_var_d_t').reset_index(drop=True)
result_x = []
result_y = []
for i in range(len(df_sorted)):
left = i - window
right = i + window
if left < 0 or right >= len(df_sorted):
continue # Not enough samples, skip
avg_exp_nlr_t2 = np.mean(df_sorted['nr_t2'].iloc[left:right+1])
result_x.append(df_sorted['ema_var_d_t'].iloc[i])
result_y.append(avg_exp_nlr_t2)
# Plot
plt.figure(figsize=(12, 6))
plt.plot(result_x, result_y, linewidth=2)
plt.title(f'Average of 1y Stock Returns vs its current Volatility')
plt.xlabel('Volatility at t0 as EWMA[(log return)^2, span=365/3]')
plt.xscale('log')
plt.ylabel('mean[S_365/S_0/RF_0] as Moving Window')
plt.grid(True)
plt.tight_layout()
plt.gca().xaxis.set_major_locator(LogLocator(base=10.0, subs=[1.0, 2.0, 5.0], numticks=10))
plt.gca().xaxis.set_major_formatter(FuncFormatter(lambda x, _: f'{x:.4f}'))
plt.show()
plot_mean_vs_vol(window=500)
@al6x
Copy link
Author

al6x commented May 16, 2025

This simulation may explain it... generating random brownian walks and dropping 0.3% bancruptsies, results are the same, very high mean of high vol stocks...

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

np.random.seed(0)

def simulate_gbm(volatility, T=1.0, n_sim=10000):
  # GBM with mu = 0, sigma = volatility
  Z = np.random.normal(0, 1, n_sim)
  S_T_S_0 = np.exp(-0.5 * volatility**2 * T + volatility * np.sqrt(T) * Z)
  return S_T_S_0

vols = [0.1, 0.2, 0.3, 0.5, 0.7, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0]

results = []
for vol in vols:
  returns = simulate_gbm(vol)
  # Drop worst 0.3% percentile
  threshold = np.percentile(returns, 0.3)
  returns_filtered = returns[returns >= threshold]
  results.append({
    'volatility': vol,
    'mean_S_T_S_0': np.mean(returns_filtered)
  })

df_results = pd.DataFrame(results)
print(df_results.round(4))

# Plot
plt.figure(figsize=(10, 6))
plt.plot(df_results['volatility'], df_results['mean_S_T_S_0'], label='Mean S_T / S_0', marker='o')
plt.axhline(1.0, color='gray', linestyle='--', label='Expected Value')
plt.xlabel('Volatility (σ)')
plt.ylabel('S_T / S_0')
plt.title('Effect of Volatility on mean(S_T/S_0) vs median(S_T/S_0)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment