Last active
March 30, 2026 12:01
-
-
Save nk9/e26755fa866ace249883f4c30a5181fe to your computer and use it in GitHub Desktop.
A tool which constructs a regex of likely scannos from a query and then runs Okapi with that new query
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| import sys | |
| import subprocess | |
| import shlex | |
| import re | |
| OKAPI_PATH = "okapi" # Assumes it's in your $PATH | |
| def generate_typo_regex(text): | |
| # Mapping table. | |
| # For multi-char replacements, use (?:alt1|alt2) to match either the | |
| # original or the typo. | |
| mapping = { | |
| # Multi-char mappings (sorted by length descending in the logic below) | |
| "nn": r"(?:nn|rm|rn|mm)", | |
| "rn": r"(?:rn|rm|nn|m)", | |
| "mm": r"(?:mm|nn|rnn)", | |
| "cl": r"(?:cl|d)", | |
| "fi": r"(?:fi|fl)", | |
| "fl": r"(?:fi|fl)", | |
| # Single-char mappings | |
| "a": "[ase]", | |
| "b": "[blh]", | |
| "B": "[BPRF]", | |
| "c": "[ceo]", | |
| "C": "[COQ]", | |
| "d": r"(?:cl|[cdal])", | |
| "D": "[DI]", | |
| "E": "[EFPB]", | |
| "e": "[esaoc]", | |
| "F": "[FPR]", | |
| "f": "[ft]", | |
| "H": r"(?:I|Li)", | |
| "i": "[ilf]", | |
| "j": "[ij]", | |
| "h": r"(?:li|[hbl])", | |
| "l": "[ilt]", | |
| "L": "[LI]", | |
| "m": r"(?:ni|in|rn|[mn])", | |
| "M": "[MNV]", | |
| "N": "[MNV]", | |
| "n": "[onum]", | |
| "O": "[COQ]", | |
| "o": "[ocnu]", | |
| "P": "[PRF]", | |
| "Q": "[COQ]", | |
| "R": "[RPF]", | |
| "r": "[rt]", | |
| "s": "[esa]", | |
| "t": "[ft]", | |
| "u": "[nuo]", | |
| "U": "[VU]", | |
| "v": "[wvr]", | |
| "w": "[wv]", | |
| "x": "[xz]", | |
| } | |
| # 1. Sort keys by length descending so "nn" matches before "n" | |
| sorted_keys = sorted(mapping.keys(), key=len, reverse=True) | |
| # 2. Build a master regex pattern to find any of these keys | |
| # re.escape handles keys that might have regex special characters | |
| master_pattern = "|".join(re.escape(k) for k in sorted_keys) | |
| # 3. Define the substitution function | |
| def replace_match(match): | |
| return mapping[match.group(0)] | |
| # 4. Perform the substitution | |
| result = re.sub(master_pattern, replace_match, text) | |
| return r"\b" + result + r"\b" | |
| def main(): | |
| if len(sys.argv) < 2: | |
| print("Usage: okt <string> [extra_args...]") | |
| sys.exit(1) | |
| original_val = sys.argv[1] | |
| extra_args_list = sys.argv[2:] | |
| regex_val = generate_typo_regex(original_val) | |
| extra_args_str = shlex.join(extra_args_list) | |
| # Use shlex.quote for safety | |
| cmd_str = ( | |
| f"{OKAPI_PATH} {shlex.quote(regex_val)} -e {shlex.quote(original_val)} {extra_args_str}" | |
| ) | |
| print(f"Executing: {cmd_str}") | |
| try: | |
| # -i forces interactive mode to load aliases from .zshrc | |
| subprocess.run(["zsh", "-i", "-c", cmd_str]) | |
| except KeyboardInterrupt: | |
| sys.exit(0) | |
| if __name__ == "__main__": | |
| main() |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
NB: You must install
okapifirst!