Last active
January 21, 2022 00:34
-
-
Save robcarver17/fe1fab08ba1e78b9550a9671ac4ae8b4 to your computer and use it in GitHub Desktop.
Bin plotting
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib | |
matplotlib.use("TkAgg") | |
import matplotlib.pyplot as plt | |
import scipy.stats as stats | |
import numpy as np | |
def plot_results_for_bin_size(size, pd_result, centre_on_mean = False): | |
bins = get_bins_for_size(size, pd_result, centre_on_mean = centre_on_mean) | |
results = calculate_results_for_bins(bins, pd_result) | |
avg_results = [x.mean() for x in results] | |
centre_bins = [np.mean([bins[idx], bins[idx - 1]]) for idx in range(len(bins))[1:]] | |
plt.plot(centre_bins, avg_results) | |
ans = print_t_stats(results) | |
return ans | |
def print_t_stats(results): | |
t_results = [] | |
print("For each bin:") | |
for idx in range(len(results))[1:]: | |
t_stat = stats.ttest_ind(results[idx], results[idx-1], axis=0, equal_var=True) | |
t_results.append(t_stat) | |
print(t_stat) | |
print("Comparing final and first bins:") | |
t_stat = stats.ttest_ind(results[-1], results[0], axis=0, equal_var=True) | |
t_results.append(t_stat) | |
print(t_stat) | |
return t_results | |
def get_bins_for_size(size, pd_result, centre_on_mean = False): | |
if centre_on_mean: | |
centre = pd_result.x.mean() | |
else: | |
centre = 0 | |
lower_quantiles = quantile_in_range(size, pd_result, min=centre-0.001) | |
upper_quantiles = quantile_in_range(size, pd_result, max=centre+0.001) | |
return lower_quantiles[:-1]+[centre]+upper_quantiles[1:] | |
def quantile_in_range(size, pd_result, min=-9999., max=9999.): | |
xvar = pd_result.x | |
signed_distribution = xvar[(xvar>min) & (xvar<max)] | |
quantile_ranges = get_quantile_ranges(size) | |
quantile_points = [signed_distribution.quantile(q) for q in quantile_ranges] | |
return quantile_points | |
def get_quantile_ranges(size): | |
quantile_ranges = np.arange(0,1.0000001,1.0/size) | |
return quantile_ranges | |
def calculate_results_for_bins(bins, pd_result): | |
results = [] | |
for idx in range(len(bins))[1:]: | |
selected_results = pd_result[(pd_result.x>bins[idx-1]) & (pd_result.x < bins[idx])] | |
results.append(selected_results.y) | |
return results |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment