Skip to content

Instantly share code, notes, and snippets.

@coderberry
Created May 28, 2025 22:07
Show Gist options
  • Save coderberry/5ce1ba53bf7277a8605584b14689fa0e to your computer and use it in GitHub Desktop.
Save coderberry/5ce1ba53bf7277a8605584b14689fa0e to your computer and use it in GitHub Desktop.
Remove special characters from AI responses in Python
def humanize_string(text):
"""
Simplistic function to humanize AI-generated text by converting fancy Unicode characters
to standard keyboard equivalents.
Args:
text (str): The input text to humanize
Returns:
str: Humanized text
"""
# Simple replacements with no dependencies
replacements = [
# Remove hidden Unicode characters
('\u200B', ''), # Zero-width space
('\u200C', ''), # Zero-width non-joiner
('\u200D', ''), # Zero-width joiner
('\u200E', ''), # Left-to-right mark
('\u200F', ''), # Right-to-left mark
('\uFEFF', ''), # Zero-width no-break space
# Replace non-breaking space with regular space
('\u00A0', ' '),
# Replace fancy dashes with regular dash
('—', '-'), # Em dash
('–', '-'), # En dash
# Replace fancy quotes with regular quotes
('"', '"'), # Left double quotation mark
('"', '"'), # Right double quotation mark
('«', '"'), # Left-pointing double angle quotation mark
('»', '"'), # Right-pointing double angle quotation mark
('„', '"'), # Double low-9 quotation mark
# Replace fancy apostrophes with regular apostrophe
(''', "'"), # Left single quotation mark
(''', "'"), # Right single quotation mark
('ʼ', "'"), # Modifier letter apostrophe
# Replace other symbols
('…', '...'), # Horizontal ellipsis
]
# Apply all replacements
for old, new in replacements:
text = text.replace(old, new)
# Remove trailing whitespace from each line
lines = text.splitlines()
for i in range(len(lines)):
lines[i] = lines[i].rstrip()
return '\n'.join(lines)
def humanize_dict(data):
"""
Recursively processes a dictionary and applies humanize_string to all string values,
including those in nested dictionaries, lists, and tuples.
Args:
data: A dictionary or any nested data structure containing strings
Returns:
The same data structure with all string values humanized
"""
if isinstance(data, dict):
return {key: humanize_dict(value) for key, value in data.items()}
elif isinstance(data, list):
return [humanize_dict(item) for item in data]
elif isinstance(data, tuple):
return tuple(humanize_dict(item) for item in data)
elif isinstance(data, str):
return humanize_string(data)
else:
return data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment