Skip to content

Instantly share code, notes, and snippets.

@Idan707
Last active January 14, 2018 09:04
Show Gist options
  • Save Idan707/8d6d29011694beb62b627915f439f9bf to your computer and use it in GitHub Desktop.
Save Idan707/8d6d29011694beb62b627915f439f9bf to your computer and use it in GitHub Desktop.
def describe_categorical_values(df, non_interesting_columns=[], num_categories=5):
values_df = pd.DataFrame()
for i, column in enumerate(df.columns):
if column in non_interesting_columns:
continue
top_values0 = ["{}: {}%".format(x,int(round(100*y/len(df))))
for x, y in zip(df[column].value_counts(dropna=False).head(num_categories).index,
df[column].value_counts(dropna=False).head(num_categories).values)]
if len(top_values0) < num_categories:
top_values = [None]*num_categories
top_values[:len(top_values0)] = top_values0
else:
top_values = top_values0
values_df[column] = top_values
return values_df.transpose()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment