Last active
April 16, 2026 11:22
-
-
Save twobob/1b9c3b4c94d8f9d2361aca11952de8db to your computer and use it in GitHub Desktop.
convert_us_to_uk_orthography_oneliner python thingy
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import re | |
| from functools import reduce | |
| def convert_us_to_uk_orthography_optimised(text: str) -> str: | |
| """ | |
| Executes a comprehensive US to UK orthographic conversion. | |
| Strictly handles spelling patterns (orthography) rather than vocabulary shifts. | |
| """ | |
| return reduce(lambda t, rule: re.sub(rule[0], lambda m: (lambda orig, exp: exp.upper() if orig.isupper() else (exp[0].upper() + exp[1:] if orig[0].isupper() else exp.lower()))(m.group(0), m.expand(rule[1])), t, flags=re.IGNORECASE), [ | |
| # 1. -yze -> -yse | |
| (r"\b(\w+)yz(e|es|ed|ing)\b", r"\1ys\2"), | |
| # 2. -ize -> -ise (protected exceptions) | |
| (r"\b(?!(?:size|prize|capsize|seize|maize|assize|glaze|gaze|raze|doze|blaze)\b)(\w+)iz(e|es|ed|ing)\b", r"\1is\2"), | |
| # 3. -or -> -our (protected exceptions) | |
| (r"\b(?!(?:actor|author|doctor|error|motor|sponsor|mirror|major|minor|sensor|factor|prior|mayor|senator|governor|chancellor|successor|vendor|visitor|terror|honorary)\b)(\w{2,})or(s|)\b", r"\1our\2"), | |
| # 4. Consonant doubling (l -> ll) | |
| (r"\b(\w*[aeiou])l(ed|ing|er|ers)\b", r"\1ll\2"), | |
| # 5. -er -> -re | |
| (r"\b(cent|met|theat|lit|fib|sombr|meagr|calibr|lust|spect|sepulch)er(s|)\b", r"\1re\2"), | |
| # 6. -og -> -ogue | |
| (r"\b(\w+)(log|gog)(s|)\b", r"\1\2ue\3"), | |
| # 7. -ense -> -ence | |
| (r"\b(def|off|pret)ense(s|)\b", r"\1ence\2"), | |
| # 8. ae/oe Ligatures | |
| (r"\b(an|p|orthop|gyn|leuk|an|arch|encyclop|h)e(m|diatr|d|col|sthes|ol|matol)", r"\1ae\2"), | |
| (r"\b(estrogen|esophagus|edema)\b", r"o\1"), | |
| # 9. Systematic Orthographic Miscellaneous | |
| (r"\bmaneuver(s|ed|ing|)\b", r"manoeuvre\1"), | |
| (r"\bjewelry\b", "jewellery"), | |
| (r"\bprogram(s|)\b", r"programme\1"), | |
| (r"\bmold(s|)\b", r"mould\1"), | |
| (r"\bmustache(s|)\b", r"moustache\1"), | |
| (r"\bpajamas\b", "pyjamas"), | |
| (r"\bgray\b", "grey") | |
| ], text) | |
| if __name__ == "__main__": | |
| # Unit Test | |
| input_text = "The traveler organized a theater program to analyze pediatric archeology." | |
| # Expected: "...traveller organised a theatre programme to analyse paediatric archaeology." | |
| print(convert_us_to_uk_orthography_optimised(input_text)) |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
if https://github.com/HoldOffHunger/convert-british-to-american-spellings is too heavy