Created
May 16, 2025 08:19
-
-
Save al6x/a320d1233f47255e4c296ab56a121bb4 to your computer and use it in GitHub Desktop.
Insanely high mean annual returns for volatile stocks in historical data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import matplotlib.pyplot as plt | |
import numpy as np | |
from matplotlib.ticker import FuncFormatter, LogLocator | |
# Data: | |
# lr_rf_1y_t : float — risk-free log return at time t (e.g., log(1.03) for 3%) | |
# lr_t2 : float — stock actual log return at time t2 = t + period_d | |
# ema_var_d_t : float — daily variance as log(return)^2, current estimate at time t as EMA(span=365/3) | |
# h_var_d : float — daily variance as log(return)^2, historical estimate over whole stock history | |
# period_d : int — always 365, can be ignored | |
# symbol : str — stock symbol (e.g., 'AAPL') | |
# t : str — date at time t in 'YYYY-MM-DD' format | |
df = pd.read_csv('/storage/data/alien/experiments/historical_predictor/returns_period365_step365.tsv', sep='\t') | |
df['nlr_t2'] = df['lr_t2'] - df['lr_rf_1y_t'] # Risk-adjusted log return | |
df['r_t2'] = np.exp(df['lr_t2']) # Multiplicative stock return | |
df['nr_t2'] = np.exp(df['nlr_t2']) # Multiplicative risk-adjusted stock return | |
def plot_mean_vs_vol(window: int = 500): | |
df_sorted = df.sort_values('ema_var_d_t').reset_index(drop=True) | |
result_x = [] | |
result_y = [] | |
for i in range(len(df_sorted)): | |
left = i - window | |
right = i + window | |
if left < 0 or right >= len(df_sorted): | |
continue # Not enough samples, skip | |
avg_exp_nlr_t2 = np.mean(df_sorted['nr_t2'].iloc[left:right+1]) | |
result_x.append(df_sorted['ema_var_d_t'].iloc[i]) | |
result_y.append(avg_exp_nlr_t2) | |
# Plot | |
plt.figure(figsize=(12, 6)) | |
plt.plot(result_x, result_y, linewidth=2) | |
plt.title(f'Average of 1y Stock Returns vs its current Volatility') | |
plt.xlabel('Volatility at t0 as EWMA[(log return)^2, span=365/3]') | |
plt.xscale('log') | |
plt.ylabel('mean[S_365/S_0/RF_0] as Moving Window') | |
plt.grid(True) | |
plt.tight_layout() | |
plt.gca().xaxis.set_major_locator(LogLocator(base=10.0, subs=[1.0, 2.0, 5.0], numticks=10)) | |
plt.gca().xaxis.set_major_formatter(FuncFormatter(lambda x, _: f'{x:.4f}')) | |
plt.show() | |
plot_mean_vs_vol(window=500) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This simulation may explain it... generating random brownian walks and dropping 0.3% bancruptsies, results are the same, very high mean of high vol stocks...