Last active
February 12, 2022 20:42
-
-
Save chris1610/bb2dd075e5ae530c6a22ed7b29146ff0 to your computer and use it in GitHub Desktop.
custom pretty print descriptor for a pandas dataframe
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@pd.api.extensions.register_dataframe_accessor("pt") | |
# Performs formatting on a dataframe | |
# Usage: df.pt.pretty() | |
class Pretty: | |
def __init__(self, pandas_obj): | |
self._validate(pandas_obj) | |
self._obj = pandas_obj | |
@staticmethod | |
def _validate(obj): | |
# verify this is a DataFrame | |
if not isinstance(obj, pd.DataFrame): | |
raise AttributeError("Must be a pandas DataFrame") | |
# Code from https://github.com/azaitsev/millify | |
def _pretty_col(self, col, precision, percent): | |
millnames = ['', 'k', 'M', 'B', 'T', 'P', 'E', 'Z', 'Y'] | |
max_val = col.astype('float').max(axis=0) | |
magnitude = int(math.floor(0 if max_val==0 else math.log10(abs(max_val)) / 3)) | |
millindex = max(0, min(len(millnames) - 1, magnitude)) | |
values = col.div(10**(3 * millindex)) | |
format_letter = millnames[millindex] | |
precision = f'.{precision}' | |
format_string = '{:' + precision +'f}'+f'{format_letter}' | |
if percent and max_val < 1: | |
format_string = "{:.2%}" | |
values = col | |
return values, format_string | |
def pretty(self, precision=2, percent=True): | |
numeric_cols = self._obj.select_dtypes(include='number').columns | |
other_cols = self._obj.select_dtypes(exclude='number') | |
format_dict = {} | |
results = [] | |
for col in numeric_cols: | |
new_col, format_data = self._pretty_col(self._obj[col], precision, percent) | |
format_dict[col] = format_data | |
results.append(pd.Series(new_col, name=col)) | |
formatted_df = pd.concat(results, axis=1) | |
full_df = pd.concat([other_cols, formatted_df], axis=1) | |
# Max sure the column order is preserved | |
return full_df[self._obj.columns].style.format(format_dict) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment