Skip to content

Instantly share code, notes, and snippets.

@nk9
Last active March 30, 2026 12:01
Show Gist options
  • Select an option

  • Save nk9/e26755fa866ace249883f4c30a5181fe to your computer and use it in GitHub Desktop.

Select an option

Save nk9/e26755fa866ace249883f4c30a5181fe to your computer and use it in GitHub Desktop.
A tool which constructs a regex of likely scannos from a query and then runs Okapi with that new query
#!/usr/bin/env python3
import sys
import subprocess
import shlex
import re
OKAPI_PATH = "okapi" # Assumes it's in your $PATH
def generate_typo_regex(text):
# Mapping table.
# For multi-char replacements, use (?:alt1|alt2) to match either the
# original or the typo.
mapping = {
# Multi-char mappings (sorted by length descending in the logic below)
"nn": r"(?:nn|rm|rn|mm)",
"rn": r"(?:rn|rm|nn|m)",
"mm": r"(?:mm|nn|rnn)",
"cl": r"(?:cl|d)",
"fi": r"(?:fi|fl)",
"fl": r"(?:fi|fl)",
# Single-char mappings
"a": "[ase]",
"b": "[blh]",
"B": "[BPRF]",
"c": "[ceo]",
"C": "[COQ]",
"d": r"(?:cl|[cdal])",
"D": "[DI]",
"E": "[EFPB]",
"e": "[esaoc]",
"F": "[FPR]",
"f": "[ft]",
"H": r"(?:I|Li)",
"i": "[ilf]",
"j": "[ij]",
"h": r"(?:li|[hbl])",
"l": "[ilt]",
"L": "[LI]",
"m": r"(?:ni|in|rn|[mn])",
"M": "[MNV]",
"N": "[MNV]",
"n": "[onum]",
"O": "[COQ]",
"o": "[ocnu]",
"P": "[PRF]",
"Q": "[COQ]",
"R": "[RPF]",
"r": "[rt]",
"s": "[esa]",
"t": "[ft]",
"u": "[nuo]",
"U": "[VU]",
"v": "[wvr]",
"w": "[wv]",
"x": "[xz]",
}
# 1. Sort keys by length descending so "nn" matches before "n"
sorted_keys = sorted(mapping.keys(), key=len, reverse=True)
# 2. Build a master regex pattern to find any of these keys
# re.escape handles keys that might have regex special characters
master_pattern = "|".join(re.escape(k) for k in sorted_keys)
# 3. Define the substitution function
def replace_match(match):
return mapping[match.group(0)]
# 4. Perform the substitution
result = re.sub(master_pattern, replace_match, text)
return r"\b" + result + r"\b"
def main():
if len(sys.argv) < 2:
print("Usage: okt <string> [extra_args...]")
sys.exit(1)
original_val = sys.argv[1]
extra_args_list = sys.argv[2:]
regex_val = generate_typo_regex(original_val)
extra_args_str = shlex.join(extra_args_list)
# Use shlex.quote for safety
cmd_str = (
f"{OKAPI_PATH} {shlex.quote(regex_val)} -e {shlex.quote(original_val)} {extra_args_str}"
)
print(f"Executing: {cmd_str}")
try:
# -i forces interactive mode to load aliases from .zshrc
subprocess.run(["zsh", "-i", "-c", cmd_str])
except KeyboardInterrupt:
sys.exit(0)
if __name__ == "__main__":
main()
@nk9

nk9 commented Mar 30, 2026

Copy link
Copy Markdown
Author

NB: You must install okapi first!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment