Skip to content

Instantly share code, notes, and snippets.

@Idan707
Last active January 14, 2018 09:05
Show Gist options
  • Save Idan707/0ff774614c780c3e6abf7ed61947a811 to your computer and use it in GitHub Desktop.
Save Idan707/0ff774614c780c3e6abf7ed61947a811 to your computer and use it in GitHub Desktop.
get_most_correlated_variables and plot_correlation_matrix
def get_most_correlated_variables(corr, num_pairs=10):
correlation_melted = pd.melt(corr.reset_index().rename(columns={"index": "var_1"}), id_vars=("var_1"),var_name='var_2')
correlation_melted = correlation_melted[correlation_melted.var_1!=correlation_melted.var_2]
correlation_melted['var_couple'] = correlation_melted[['var_1','var_2']].apply(lambda x:tuple(sorted([x[0],x[1]])), axis=1)
correlation_melted = correlation_melted.drop_duplicates(subset='var_couple').drop(['var_couple'],axis=1)
correlation_melted['abs_value'] = correlation_melted['value'].abs().round(3)
return correlation_melted.sort_values(by='abs_value').tail(num_pairs).drop('abs_value', axis=1).reset_index(drop=True)
def plot_correlation_matrix(X, features2):
corr = X[features2].corr()
# return the most correlated variables
most_correlated_variables = get_most_correlated_variables(corr, num_pairs=10)
max_correlation = 1.25*most_correlated_variables['value'].abs().max()
# Generate a mask for the upper triangle
mask = np.zeros_like(corr, dtype=np.bool)
mask[np.triu_indices_from(mask)] = True
# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))
# Generate a custom diverging colormap
cmap = sns.diverging_palette(220, 10, as_cmap=True)
ax.set_yticklabels(features2, fontsize=18)
ax.set_xticklabels(features2, rotation='vertical', fontsize=18)
# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=max_correlation, center=0,
square=True, linewidths=.5, cbar_kws={"shrink": .8})
return most_correlated_variables
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment