Skip to content

Instantly share code, notes, and snippets.

@cloneofsimo
Created June 23, 2025 16:39
Show Gist options
  • Save cloneofsimo/8abd0284d4738f28f04200628f9a83f5 to your computer and use it in GitHub Desktop.
Save cloneofsimo/8abd0284d4738f28f04200628f9a83f5 to your computer and use it in GitHub Desktop.
humanize
# credit: https://github.com/Nordth/humanize-ai-lib/blob/main/src/humanize-string.ts
import re
_HIDDEN_CHARS = re.compile(
r"[\u00AD\u180E\u200B-\u200F\u202A-\u202E\u2060\u2066-\u2069\uFEFF]"
)
_TRAILING_WS = re.compile(r"[ \t\x0B\f]+$", re.MULTILINE)
_NBSP = re.compile(r"\u00A0")
_DASHES = re.compile(r"[—–]+") # em- & en-dashes → ASCII hyphen
_DQUOTES = re.compile(r"[“”«»„]") # curly / guillemets → "
_SQUOTES = re.compile(r"[‘’ʼ]") # curly apostrophes → '
_ELLIPSIS = re.compile(r"…") # single‐char ellipsis → "..."
def humanize_str(text: str) -> str:
text = _HIDDEN_CHARS.sub("", text)
text = _TRAILING_WS.sub("", text)
text = _NBSP.sub(" ", text)
text = _DASHES.sub("-", text)
text = _DQUOTES.sub('"', text)
text = _SQUOTES.sub("'", text)
text = _ELLIPSIS.sub("...", text)
return text
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment