al6x · June 25, 2025 07:28
diff --git a/synthetic_prices.py b/synthetic_prices.py
 import pandas as pd
 import numpy as np
 from scipy.integrate import quad
 from scipy.stats import norm
 import matplotlib.pyplot as plt
 import math

 def data_stats():
  # Distrs as (weights, scales_rel) where scales_rel - relative to the main scale

  periods = [30, 60, 91, 182, 365]
  mmeans = [
    1.0253,
    1.0417,
    1.0477,
    1.0878,
    1.1151,
  ]
  scales = [
    [0.0443, 0.0511, 0.0556, 0.0618, 0.0640, 0.0725, 0.0791, 0.0850, 0.0976, 0.1360],
    [0.0705, 0.0788, 0.0904, 0.0967, 0.1035, 0.1113, 0.1213, 0.1364, 0.1473, 0.2158],
    [0.0904, 0.1015, 0.1192, 0.1177, 0.1350, 0.1449, 0.1509, 0.1761, 0.2010, 0.3580],
    [0.1164, 0.1432, 0.1491, 0.1606, 0.1697, 0.1795, 0.1937, 0.2217, 0.3317, 0.4263],
    [0.1814, 0.2099, 0.2338, 0.2487, 0.2746, 0.2786, 0.2953, 0.3348, 0.4542, 0.5563],
  ]
  distrs = [
    ([0.5275, 0.3493, 0.1232], [0.6265, 1.2806, 1.3374]),
    ([0.5372, 0.3437, 0.1191], [0.6707, 1.2586, 1.3404]),
    ([0.5611, 0.3199, 0.1190], [0.6360, 1.2282, 1.5624]),
    ([0.5617, 0.3193, 0.1190], [0.6285, 1.2543, 1.5224]),
    ([0.5668, 0.3214, 0.1118], [0.6388, 1.1271, 1.7953]),
  ]

  return np.asarray(periods), np.asarray(mmeans), np.asarray(scales), np.asarray(distrs)

 def put_premium_true(ds, ks):
  def put_premium_true_scalar(d, k):
    weights, locs, scales = d
    def lognorm_pdf(x):
      return np.sum(weights * norm.pdf(np.log(x), loc=locs, scale=scales) / x)

    result, _ = quad(lambda s: (k - s) * lognorm_pdf(s), 0, k)
    return result

  return np.array([put_premium_true_scalar(d, k) for d, k in zip(ds, ks)])

 def put_premium_bs(loc, scale, k):
  lk = np.log(k)
  z  = (lk - loc) / scale
  z2 = z - scale
  return k * norm.cdf(z) - np.exp(loc + 0.5 * scale**2) * norm.cdf(z2)

 def put_premium(ds, ks):
  out = []
  for i in range(len(ks)):
    weights, locs, scales = ds[i]
    locs = np.asarray(locs)
    scales = np.asarray(scales)
    bs_vals = put_premium_bs(locs, scales, ks[i])
    out.append(np.dot(weights, bs_vals))
  return np.array(out)

 def gen_data(n, stubs = {}):
  # Generating data as Brownian Motion with stochastic time
  periods, mmeans, vol_levels, distrs = data_stats()
  dfs = []
  for pi, period in enumerate(periods):
    mmean, (weights, scales_rel) = mmeans[pi], distrs[pi]

    # Sampling volatility, vol used as stochastic time
    vol_idx = np.random.randint(0, len(vol_levels[pi]), size=n)
    vols = np.array(vol_levels[pi])[vol_idx]

    # Sampling normal mixture
    comp_idx= np.random.choice(len(weights), size=n, p=weights)
    scales = vols * scales_rel[comp_idx]
    locs   = np.log(mmean) - 0.5 * scales**2

    # Generating random returns
    returns = np.random.normal(locs, scales)
    dfp = pd.DataFrame({
      'vol_dc':   vol_idx + 1,
      'lr_t2':    returns,
      'vol':      vols,
      'period_d': period
    })
    dfs.append(dfp)

  df_all = pd.concat(dfs, ignore_index=True)

  # Add stubs
  for k, v in stubs.items():
    df_all[k] = v

  return periods, df_all

 def plot_premium(model=None, use_zoom=False):
  ks_grid = np.linspace(0.5, 0.9, 10)
  periods, mmeans, vol_levels, distrs = data_stats()

  # Preparing data
  plot_data = {}
  for pi, period in enumerate(periods):
    mmean, (weights, scales_rel) = mmeans[pi], distrs[pi]
    zoom = 365/period if use_zoom else 1.0

    xy = []
    for vol in vol_levels[pi]:
      premiums_model = zoom*model(period, vol, ks_grid) if model else None

      scales = vol * scales_rel
      d = (weights, np.log(mmean) - 0.5*scales**2, scales)
      premiums_true  = zoom*put_premium([d] * len(ks_grid), ks_grid)

      xy.append((vol, premiums_model, premiums_true))
    plot_data[period] = xy

  # Plotting
  y_range = (0.001, 0.1)
  n = len(periods); ncols = 3; nrows = math.ceil(n / ncols)
  fig, axes = plt.subplots(nrows, ncols, figsize=(5*ncols, 4*nrows), squeeze=False)

  cmap = plt.get_cmap('coolwarm')
  for idx, period in enumerate(periods):
    ax = axes[idx//ncols][idx%ncols]
    for i, (vol, prem_model, prem_true) in enumerate(plot_data[period]):
      ax.plot(ks_grid, prem_true, label=f"vol={vol:.2f}", color=cmap(i / 9))
      if prem_model is not None:
        ax.plot(ks_grid, prem_model, '--', color=cmap(i / 9))
    ax.set_title(f"Period {period}")
    ax.set_yscale('log')
    ax.set_ylim(y_range[0], y_range[1])
    ax.grid(True, which='both', ls=':')
    if idx == 0:
      ax.legend()

  # remove any unused subplots
  for j in range(n, nrows * ncols):
    fig.delaxes(axes[j//ncols][j%ncols])

  fig.suptitle(f"Put Premiums vs Strikes for Different Periods and Volatilities (zoom: {use_zoom})")
  plt.tight_layout()
  plt.show()
  return fig

 def run():
  # df = gen_data(10_000)
  plot_premium()

 if __name__ == "__main__":
  run()
	import pandas as pd
	import numpy as np
	from scipy.integrate import quad
	from scipy.stats import norm
	import matplotlib.pyplot as plt
	import math

	def data_stats():
	# Distrs as (weights, scales_rel) where scales_rel - relative to the main scale

	periods = [30, 60, 91, 182, 365]
	mmeans = [
	1.0253,
	1.0417,
	1.0477,
	1.0878,
	1.1151,
	]
	scales = [
	[0.0443, 0.0511, 0.0556, 0.0618, 0.0640, 0.0725, 0.0791, 0.0850, 0.0976, 0.1360],
	[0.0705, 0.0788, 0.0904, 0.0967, 0.1035, 0.1113, 0.1213, 0.1364, 0.1473, 0.2158],
	[0.0904, 0.1015, 0.1192, 0.1177, 0.1350, 0.1449, 0.1509, 0.1761, 0.2010, 0.3580],
	[0.1164, 0.1432, 0.1491, 0.1606, 0.1697, 0.1795, 0.1937, 0.2217, 0.3317, 0.4263],
	[0.1814, 0.2099, 0.2338, 0.2487, 0.2746, 0.2786, 0.2953, 0.3348, 0.4542, 0.5563],
	]
	distrs = [
	([0.5275, 0.3493, 0.1232], [0.6265, 1.2806, 1.3374]),
	([0.5372, 0.3437, 0.1191], [0.6707, 1.2586, 1.3404]),
	([0.5611, 0.3199, 0.1190], [0.6360, 1.2282, 1.5624]),
	([0.5617, 0.3193, 0.1190], [0.6285, 1.2543, 1.5224]),
	([0.5668, 0.3214, 0.1118], [0.6388, 1.1271, 1.7953]),
	]

	return np.asarray(periods), np.asarray(mmeans), np.asarray(scales), np.asarray(distrs)

	def put_premium_true(ds, ks):
	def put_premium_true_scalar(d, k):
	weights, locs, scales = d
	def lognorm_pdf(x):
	return np.sum(weights * norm.pdf(np.log(x), loc=locs, scale=scales) / x)

	result, _ = quad(lambda s: (k - s) * lognorm_pdf(s), 0, k)
	return result

	return np.array([put_premium_true_scalar(d, k) for d, k in zip(ds, ks)])

	def put_premium_bs(loc, scale, k):
	lk = np.log(k)
	z = (lk - loc) / scale
	z2 = z - scale
	return k * norm.cdf(z) - np.exp(loc + 0.5 * scale*2) norm.cdf(z2)

	def put_premium(ds, ks):
	out = []
	for i in range(len(ks)):
	weights, locs, scales = ds[i]
	locs = np.asarray(locs)
	scales = np.asarray(scales)
	bs_vals = put_premium_bs(locs, scales, ks[i])
	out.append(np.dot(weights, bs_vals))
	return np.array(out)

	def gen_data(n, stubs = {}):
	# Generating data as Brownian Motion with stochastic time
	periods, mmeans, vol_levels, distrs = data_stats()
	dfs = []
	for pi, period in enumerate(periods):
	mmean, (weights, scales_rel) = mmeans[pi], distrs[pi]

	# Sampling volatility, vol used as stochastic time
	vol_idx = np.random.randint(0, len(vol_levels[pi]), size=n)
	vols = np.array(vol_levels[pi])[vol_idx]

	# Sampling normal mixture
	comp_idx= np.random.choice(len(weights), size=n, p=weights)
	scales = vols * scales_rel[comp_idx]
	locs = np.log(mmean) - 0.5 * scales**2

	# Generating random returns
	returns = np.random.normal(locs, scales)
	dfp = pd.DataFrame({
	'vol_dc': vol_idx + 1,
	'lr_t2': returns,
	'vol': vols,
	'period_d': period
	})
	dfs.append(dfp)

	df_all = pd.concat(dfs, ignore_index=True)

	# Add stubs
	for k, v in stubs.items():
	df_all[k] = v

	return periods, df_all

	def plot_premium(model=None, use_zoom=False):
	ks_grid = np.linspace(0.5, 0.9, 10)
	periods, mmeans, vol_levels, distrs = data_stats()

	# Preparing data
	plot_data = {}
	for pi, period in enumerate(periods):
	mmean, (weights, scales_rel) = mmeans[pi], distrs[pi]
	zoom = 365/period if use_zoom else 1.0

	xy = []
	for vol in vol_levels[pi]:
	premiums_model = zoom*model(period, vol, ks_grid) if model else None

	scales = vol * scales_rel
	d = (weights, np.log(mmean) - 0.5scales*2, scales)
	premiums_true = zoomput_premium([d] len(ks_grid), ks_grid)

	xy.append((vol, premiums_model, premiums_true))
	plot_data[period] = xy

	# Plotting
	y_range = (0.001, 0.1)
	n = len(periods); ncols = 3; nrows = math.ceil(n / ncols)
	fig, axes = plt.subplots(nrows, ncols, figsize=(5ncols, 4nrows), squeeze=False)

	cmap = plt.get_cmap('coolwarm')
	for idx, period in enumerate(periods):
	ax = axes[idx//ncols][idx%ncols]
	for i, (vol, prem_model, prem_true) in enumerate(plot_data[period]):
	ax.plot(ks_grid, prem_true, label=f"vol={vol:.2f}", color=cmap(i / 9))
	if prem_model is not None:
	ax.plot(ks_grid, prem_model, '--', color=cmap(i / 9))
	ax.set_title(f"Period {period}")
	ax.set_yscale('log')
	ax.set_ylim(y_range[0], y_range[1])
	ax.grid(True, which='both', ls=':')
	if idx == 0:
	ax.legend()

	# remove any unused subplots
	for j in range(n, nrows * ncols):
	fig.delaxes(axes[j//ncols][j%ncols])

	fig.suptitle(f"Put Premiums vs Strikes for Different Periods and Volatilities (zoom: {use_zoom})")
	plt.tight_layout()
	plt.show()
	return fig

	def run():
	# df = gen_data(10_000)
	plot_premium()

	if __name__ == "__main__":
	run()