krassowski · October 19, 2022 14:09 · Jan 22, 2021 · Jan 22, 2021
diff --git a/pandas_explorer.py b/pandas_explorer.py
@@ -1,3 +1,5 @@
+# Copyright (c) 2021 Michał Krassowski.
+# Distributed under the terms of the Modified BSD License.
 from sidecar import Sidecar
 from ipywidgets import widgets
 from IPython.display import display, update_display, HTML
@@ -58,7 +60,14 @@ def show_frame(
 
         if row_filter:
             df = df[
-                df.apply(lambda row: row.astype(str).apply(contains, substring=row_filter, fuzzy=row_filter_fuzzy).any(), axis=1)
+                df.apply(
+                    lambda row: (
+                        row
+                        .astype(str)
+                        .apply(contains, substring=row_filter, fuzzy=row_filter_fuzzy)
+                        .any()
+                    ),
+                    axis=1)
             ]
             if len(df) < max_rows and not filtered_from:
                 filtered_from = max_rows

diff --git a/example.py b/example.py
@@ -0,0 +1,6 @@
+from pandas_explorer import pandas_explorer
+from pandas import read_csv
+
+iris = read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')
+
+pandas_explorer(iris, title='Iris')
diff --git a/pandas_explorer.py b/pandas_explorer.py
@@ -0,0 +1,145 @@
+from sidecar import Sidecar
+from ipywidgets import widgets
+from IPython.display import display, update_display, HTML
+from types import SimpleNamespace
+from pandas import DataFrame, option_context
+import string
+import re
+
+
+def pandas_explorer(data: DataFrame, title='Explorer', default_rows=30, drop_index=False):
+
+    data = data.reset_index(drop=drop_index)
+
+    split_on = '|'.join(re.escape(x) for x in string.punctuation + ' ')
+
+    table_widgets = SimpleNamespace(
+        row_filter=widgets.Text(description='Rows filter'),
+        row_filter_fuzzy=widgets.Checkbox(description='Fuzzy', value=True),
+        show_index=widgets.Checkbox(description='Index', value=False),
+        max_rows=widgets.IntSlider(value=default_rows, description='Max rows', min=0, max=len(data)),
+        max_columns=widgets.IntSlider(value=10, description='Max columns', min=0, max=len(data.columns)),
+        sort_column=widgets.Dropdown(options=[None, *data.columns], description='Sort'),
+        sort_ascending=widgets.Checkbox(description='Ascending')
+    )
+    filtered_from = None
+
+    def highlight(value, substring, marker='b', options=''):
+        value = str(value)
+        value = value.split(substring)
+        return f'<{marker} {options}>{substring}</{marker}>'.join(value)
+
+    def split(text: str):
+        return [
+            v
+            for v in re.split(split_on, text)
+            if v
+        ]
+
+    def contains(value, substring: str, fuzzy: bool):
+        if fuzzy:
+            parts = split(substring)
+
+            if len(parts) > 1:
+                return all(
+                    contains(value, part, fuzzy=fuzzy)
+                    for part in parts
+                )
+        # todo case sensitivity option?
+        return substring.lower() in value.lower()
+
+    def show_frame(
+        row_filter: str, row_filter_fuzzy: bool,
+        show_index: bool, max_rows: int, max_columns: int,
+        sort_column: str, sort_ascending: bool
+    ):
+        nonlocal filtered_from
+        df = data.copy()
+
+        if row_filter:
+            df = df[
+                df.apply(lambda row: row.astype(str).apply(contains, substring=row_filter, fuzzy=row_filter_fuzzy).any(), axis=1)
+            ]
+            if len(df) < max_rows and not filtered_from:
+                filtered_from = max_rows
+
+        def highlight_matches(value):
+            if row_filter:
+                if row_filter_fuzzy:
+                    parts = split(row_filter)
+                    for part in parts:
+                        value = highlight(value, part)
+                else:
+                    value = highlight(value, row_filter)
+            return value
+
+        columns_to_hide = list(df.columns)[max_columns:]
+
+        notes = []
+
+        if max_rows < len(df):
+            notes.append(f'{len(df) - max_rows} rows hidden')
+        if columns_to_hide:
+            notes.append(f'{len(columns_to_hide)} columns hidden')
+
+        if sort_column is not None:
+            df = df.sort_values(sort_column, ascending=sort_ascending)
+
+        try:
+            styled = (
+                df.head(max_rows).style
+                .hide_columns(columns_to_hide)
+                .format(highlight_matches)
+                .set_caption(' '.join(notes))
+            )
+
+            if not show_index:
+                styled = styled.hide_index()
+            displayed = display(styled)
+        except ValueError as e:
+            if 'style is not supported for non-unique indices' not in e.args[0]:
+                raise
+            with option_context('display.max_rows', max_rows):
+                styled = df.head(max_rows).loc[:,df.columns.isin(columns_to_hide)]
+                displayed = display(styled)
+
+        if filtered_from is not None and len(df) != 0:
+            current_value = min(filtered_from, len(df))
+            if len(df) > filtered_from:
+                filtered_from = None
+            table_widgets.max_rows.max = current_value
+            table_widgets.max_rows.value = current_value
+
+        table_widgets.max_rows.max = len(df)
+
+        return displayed
+
+    sc = Sidecar(title=title)
+
+    # https://github.com/jupyter-widgets/jupyterlab-sidecar/issues/25
+    sc_out = widgets.Output(layout={'overflow': 'scroll', 'max-width': '100%'})
+    with sc:
+        display(sc_out)
+
+    with sc_out:
+        out = widgets.interactive_output(
+            show_frame,
+            vars(table_widgets)
+        )
+        ui = widgets.VBox([
+            widgets.HBox([table_widgets.row_filter, table_widgets.row_filter_fuzzy]),
+            widgets.HBox([table_widgets.max_rows, table_widgets.max_columns]),
+            widgets.HBox([table_widgets.sort_column, table_widgets.sort_ascending]),
+        ])
+        out_box = widgets.Box(
+            [out],
+            # sadly hard-coded to allow for scroll as sidcar has layout issues
+            layout=widgets.Layout(max_width='600px', max_height='1300px')
+        )
+
+        display(
+            widgets.VBox([
+                ui,
+                out_box
+            ])
+        )
No results found