Skip to content

Instantly share code, notes, and snippets.

@matchaxnb
Created June 2, 2026 15:50
Show Gist options
  • Select an option

  • Save matchaxnb/bded0dd73d3f1196b959f156d4980b36 to your computer and use it in GitHub Desktop.

Select an option

Save matchaxnb/bded0dd73d3f1196b959f156d4980b36 to your computer and use it in GitHub Desktop.
friendly-units.py, a formatter for numbers to help you make sense of file sizes
#!python3.11
"""friendly-units: format numbers in readable sizes"""
class splitterstring(str):
pass
def width_preserving_split(s: str, splitchar: str = ' ') -> list[str]:
in_split = False
res = []
acc = ''
line_len = len(s)
for i, c in enumerate(s):
if in_split:
# when we are in the split, i.e. we met a splitchar
if c == splitchar:
acc += c
if i + 1 == line_len:
# we finish in an only-split boundary
res.append(splitterstring(acc))
break # unnecessary normally?
else:
res.append(splitterstring(acc))
acc = c
in_split = False
else:
if c == splitchar:
res.append(acc)
acc = c
in_split = True
if i + 1 == line_len:
# we finish in an only-split boundary
res.append(acc)
break # unnecessary normally?
else:
acc += c
if i + 1 == line_len:
res.append(acc)
break
return res
def detab(s: str, tab_width=8) -> str:
def _d():
for i, c in enumerate(s):
if c == '\t':
numspaces = i % tab_width
for e in range(numspaces+1):
yield ' '
else:
yield c
return ''.join(a for a in _d())
def best_unit(s: str, si=False):
num = int(s)
units = ['k', 'M', 'G', 'T']
base = 1000 if si else 1024
suffix = '' if si else 'i'
if num < base:
return s
for unit in units:
num = num / base
if num < base:
return unit + suffix
return units[-1] + suffix
def to_unit(num: str, unit: str) -> int:
base = 1024 if unit.endswith('i') else 1000
units = {'k': base, 'M': base**2, 'G': base**3, 'T': base**4}
if not unit or not any (unit.startswith(u) for u in units):
return int(num)
divider = units.get(unit[0])
return int(float(num) / divider)
def unit_to_unit(s: str, target_unit):
s_len = len(s)
ltu = len(target_unit)
match target_unit:
case 'k'|'M'|'G'|'T'|'ki'|'Mi'|'Gi'|'Ti':
return str(to_unit(s, target_unit)).rjust(s_len-ltu) + target_unit
case 'None':
return s
case 'auto':
return unit_to_unit(s, best_unit(s, False))
case 'auto-si':
return unit_to_unit(s, best_unit(s, True))
case _:
return s
def convert_friendly(line_stream, out_stream, unit='Mi'):
for line in line_stream.readlines():
line = detab(line)
dline = ''
for e in width_preserving_split(line, ' '):
if e.isnumeric():
dline += unit_to_unit(e, unit)
else:
dline += e
print(dline, file=out_stream, end='')
if __name__ == '__main__':
import sys
if len(sys.argv) > 1:
unit = sys.argv[1]
assert unit in 'k ki M Mi G Gi auto auto-si'.split()
else:
unit = 'Mi'
convert_friendly(sys.stdin, sys.stdout, unit)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment