Last active
January 14, 2018 09:04
-
-
Save Idan707/8d6d29011694beb62b627915f439f9bf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def describe_categorical_values(df, non_interesting_columns=[], num_categories=5): | |
values_df = pd.DataFrame() | |
for i, column in enumerate(df.columns): | |
if column in non_interesting_columns: | |
continue | |
top_values0 = ["{}: {}%".format(x,int(round(100*y/len(df)))) | |
for x, y in zip(df[column].value_counts(dropna=False).head(num_categories).index, | |
df[column].value_counts(dropna=False).head(num_categories).values)] | |
if len(top_values0) < num_categories: | |
top_values = [None]*num_categories | |
top_values[:len(top_values0)] = top_values0 | |
else: | |
top_values = top_values0 | |
values_df[column] = top_values | |
return values_df.transpose() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment