Last active
February 8, 2021 03:59
-
-
Save stuarteberg/13bc5fba55b2c4ea88da30e67490fad7 to your computer and use it in GitHub Desktop.
Python code (with IPython timing) for comparing rle implementations.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Python code (with IPython timing) for comparing rle implementations. | |
# See: https://twitter.com/double_thunk/status/1358509387044827138?s=20 | |
# | |
import random | |
import string | |
import platform | |
from itertools import groupby | |
import numpy as np | |
import pandas as pd | |
def rle_numpy(text): | |
a = np.fromiter(text, '<U1') | |
starts = 1 + (a[1:] != a[:-1]).nonzero()[0] | |
letters = [a[0], *a[starts].tolist()] | |
counts = np.diff(starts, 1, 0, 0, len(a)) | |
return [*zip(letters, counts.tolist())] | |
def rle_itertools(text): | |
return [(c, sum(1 for _ in g)) for c, g in groupby(text)] | |
def rle_beazley(items): | |
if len(items) == 0: | |
return [] | |
result = [] | |
prev_item = items[0] | |
count = 0 | |
for item in items: | |
if item == prev_item: | |
count += 1 | |
else: | |
result.append((prev_item, count)) | |
prev_item = item | |
count = 1 | |
if count > 0: | |
result.append((prev_item, count)) | |
return result | |
def rle_pandas(items): | |
s = pd.Series(list(items)) | |
s = s[s.ne(s.shift(-1))] | |
c = s.index.to_series() + 1 | |
c -= c.shift(fill_value=0) | |
return list(zip(s.tolist(), c.tolist())) | |
counts = np.random.randint(10, size=1_000_000) | |
letters = [random.choice(string.ascii_lowercase) for _ in counts] | |
text = ''.join(l*c for l,c in zip(letters, counts))[:1_000_000] | |
print(f"Input head: {text[:50]}") | |
assert rle_itertools(text) == rle_numpy(text) == rle_beazley(text) | |
print("-------------------") | |
print(f"Testing on {platform.python_implementation()}") | |
print("-------------------") | |
print("\nrle_itertools:") | |
%timeit r = rle_itertools(text) | |
print("\nrle_numpy:") | |
%timeit r = rle_numpy(text) | |
print("\nrle_beazley:") | |
%timeit r = rle_beazley(text) | |
print("\nrle_pandas:") | |
%timeit r = rle_pandas(text) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment