Created
May 28, 2025 22:07
-
-
Save coderberry/5ce1ba53bf7277a8605584b14689fa0e to your computer and use it in GitHub Desktop.
Remove special characters from AI responses in Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def humanize_string(text): | |
""" | |
Simplistic function to humanize AI-generated text by converting fancy Unicode characters | |
to standard keyboard equivalents. | |
Args: | |
text (str): The input text to humanize | |
Returns: | |
str: Humanized text | |
""" | |
# Simple replacements with no dependencies | |
replacements = [ | |
# Remove hidden Unicode characters | |
('\u200B', ''), # Zero-width space | |
('\u200C', ''), # Zero-width non-joiner | |
('\u200D', ''), # Zero-width joiner | |
('\u200E', ''), # Left-to-right mark | |
('\u200F', ''), # Right-to-left mark | |
('\uFEFF', ''), # Zero-width no-break space | |
# Replace non-breaking space with regular space | |
('\u00A0', ' '), | |
# Replace fancy dashes with regular dash | |
('—', '-'), # Em dash | |
('–', '-'), # En dash | |
# Replace fancy quotes with regular quotes | |
('"', '"'), # Left double quotation mark | |
('"', '"'), # Right double quotation mark | |
('«', '"'), # Left-pointing double angle quotation mark | |
('»', '"'), # Right-pointing double angle quotation mark | |
('„', '"'), # Double low-9 quotation mark | |
# Replace fancy apostrophes with regular apostrophe | |
(''', "'"), # Left single quotation mark | |
(''', "'"), # Right single quotation mark | |
('ʼ', "'"), # Modifier letter apostrophe | |
# Replace other symbols | |
('…', '...'), # Horizontal ellipsis | |
] | |
# Apply all replacements | |
for old, new in replacements: | |
text = text.replace(old, new) | |
# Remove trailing whitespace from each line | |
lines = text.splitlines() | |
for i in range(len(lines)): | |
lines[i] = lines[i].rstrip() | |
return '\n'.join(lines) | |
def humanize_dict(data): | |
""" | |
Recursively processes a dictionary and applies humanize_string to all string values, | |
including those in nested dictionaries, lists, and tuples. | |
Args: | |
data: A dictionary or any nested data structure containing strings | |
Returns: | |
The same data structure with all string values humanized | |
""" | |
if isinstance(data, dict): | |
return {key: humanize_dict(value) for key, value in data.items()} | |
elif isinstance(data, list): | |
return [humanize_dict(item) for item in data] | |
elif isinstance(data, tuple): | |
return tuple(humanize_dict(item) for item in data) | |
elif isinstance(data, str): | |
return humanize_string(data) | |
else: | |
return data |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment