Created
June 2, 2026 15:50
-
-
Save matchaxnb/bded0dd73d3f1196b959f156d4980b36 to your computer and use it in GitHub Desktop.
friendly-units.py, a formatter for numbers to help you make sense of file sizes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!python3.11 | |
| """friendly-units: format numbers in readable sizes""" | |
| class splitterstring(str): | |
| pass | |
| def width_preserving_split(s: str, splitchar: str = ' ') -> list[str]: | |
| in_split = False | |
| res = [] | |
| acc = '' | |
| line_len = len(s) | |
| for i, c in enumerate(s): | |
| if in_split: | |
| # when we are in the split, i.e. we met a splitchar | |
| if c == splitchar: | |
| acc += c | |
| if i + 1 == line_len: | |
| # we finish in an only-split boundary | |
| res.append(splitterstring(acc)) | |
| break # unnecessary normally? | |
| else: | |
| res.append(splitterstring(acc)) | |
| acc = c | |
| in_split = False | |
| else: | |
| if c == splitchar: | |
| res.append(acc) | |
| acc = c | |
| in_split = True | |
| if i + 1 == line_len: | |
| # we finish in an only-split boundary | |
| res.append(acc) | |
| break # unnecessary normally? | |
| else: | |
| acc += c | |
| if i + 1 == line_len: | |
| res.append(acc) | |
| break | |
| return res | |
| def detab(s: str, tab_width=8) -> str: | |
| def _d(): | |
| for i, c in enumerate(s): | |
| if c == '\t': | |
| numspaces = i % tab_width | |
| for e in range(numspaces+1): | |
| yield ' ' | |
| else: | |
| yield c | |
| return ''.join(a for a in _d()) | |
| def best_unit(s: str, si=False): | |
| num = int(s) | |
| units = ['k', 'M', 'G', 'T'] | |
| base = 1000 if si else 1024 | |
| suffix = '' if si else 'i' | |
| if num < base: | |
| return s | |
| for unit in units: | |
| num = num / base | |
| if num < base: | |
| return unit + suffix | |
| return units[-1] + suffix | |
| def to_unit(num: str, unit: str) -> int: | |
| base = 1024 if unit.endswith('i') else 1000 | |
| units = {'k': base, 'M': base**2, 'G': base**3, 'T': base**4} | |
| if not unit or not any (unit.startswith(u) for u in units): | |
| return int(num) | |
| divider = units.get(unit[0]) | |
| return int(float(num) / divider) | |
| def unit_to_unit(s: str, target_unit): | |
| s_len = len(s) | |
| ltu = len(target_unit) | |
| match target_unit: | |
| case 'k'|'M'|'G'|'T'|'ki'|'Mi'|'Gi'|'Ti': | |
| return str(to_unit(s, target_unit)).rjust(s_len-ltu) + target_unit | |
| case 'None': | |
| return s | |
| case 'auto': | |
| return unit_to_unit(s, best_unit(s, False)) | |
| case 'auto-si': | |
| return unit_to_unit(s, best_unit(s, True)) | |
| case _: | |
| return s | |
| def convert_friendly(line_stream, out_stream, unit='Mi'): | |
| for line in line_stream.readlines(): | |
| line = detab(line) | |
| dline = '' | |
| for e in width_preserving_split(line, ' '): | |
| if e.isnumeric(): | |
| dline += unit_to_unit(e, unit) | |
| else: | |
| dline += e | |
| print(dline, file=out_stream, end='') | |
| if __name__ == '__main__': | |
| import sys | |
| if len(sys.argv) > 1: | |
| unit = sys.argv[1] | |
| assert unit in 'k ki M Mi G Gi auto auto-si'.split() | |
| else: | |
| unit = 'Mi' | |
| convert_friendly(sys.stdin, sys.stdout, unit) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment