Created
September 14, 2025 20:06
-
-
Save jarrodhroberson/63e08923be45dda5c8e1da46e2a34bc4 to your computer and use it in GitHub Desktop.
Rust Metaphone
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| use std::env; | |
| use std::fs::File; | |
| use std::io::{self, BufRead, BufReader}; | |
| /// A straightforward implementation of the original Metaphone algorithm for English. | |
| pub fn metaphone(input: &str) -> String { | |
| // Prepare: uppercase and keep only ASCII letters | |
| let mut chars: Vec<char> = input | |
| .to_uppercase() | |
| .chars() | |
| .filter(|c| c.is_ascii_alphabetic()) | |
| .collect(); | |
| if chars.is_empty() { | |
| return String::new(); | |
| } | |
| let mut idx = 0usize; | |
| let len = chars.len(); | |
| let mut out = String::with_capacity(len); | |
| let is_vowel = |c: char| matches!(c, 'A' | 'E' | 'I' | 'O' | 'U'); | |
| let peek = |i: usize| if i < len { Some(chars[i]) } else { None }; | |
| let next = |i: usize| if i + 1 < len { Some(chars[i + 1]) } else { None }; | |
| // Step 1: deal with some initial letter combinations | |
| if len > 1 { | |
| match (chars[0], chars[1]) { | |
| ('K', 'N') | ('G', 'N') | ('P', 'N') | ('A', 'E') | ('W', 'R') => { | |
| idx = 1; | |
| } | |
| ('W', 'H') => { | |
| out.push('W'); | |
| idx = 2; | |
| } | |
| _ => {} | |
| } | |
| } | |
| if idx == 0 && chars[0] == 'X' { | |
| out.push('S'); | |
| idx = 1; | |
| } | |
| while idx < len { | |
| let c = chars[idx]; | |
| let prev = if idx > 0 { Some(chars[idx - 1]) } else { None }; | |
| let nxt = next(idx); | |
| let nxt2 = if idx + 2 < len { Some(chars[idx + 2]) } else { None }; | |
| match c { | |
| 'A' | 'E' | 'I' | 'O' | 'U' => { | |
| if idx == 0 { | |
| out.push(c); | |
| } | |
| idx += 1; | |
| } | |
| 'B' => { | |
| if !(prev == Some('M') && nxt.is_none()) { | |
| out.push('B'); | |
| } | |
| idx += 1; | |
| } | |
| 'C' => { | |
| if nxt == Some('H') { | |
| out.push('X'); | |
| idx += 2; | |
| } else if nxt == Some('I') && nxt2 == Some('A') { | |
| out.push('X'); | |
| idx += 3; | |
| } else if matches!(nxt, Some('I') | Some('E') | Some('Y')) { | |
| out.push('S'); | |
| idx += 2; | |
| } else { | |
| out.push('K'); | |
| idx += 1; | |
| if next(idx - 1) == Some('C') { | |
| idx += 1; | |
| } | |
| } | |
| } | |
| 'D' => { | |
| if nxt == Some('G') && matches!(nxt2, Some('E') | Some('I') | Some('Y')) { | |
| out.push('J'); | |
| idx += 3; | |
| } else { | |
| out.push('D'); | |
| idx += 1; | |
| } | |
| } | |
| 'G' => { | |
| if nxt == Some('H') { | |
| if idx > 0 && !matches!(peek(idx + 2), Some('A' | 'E' | 'I' | 'O' | 'U')) { | |
| idx += 2; | |
| } else { | |
| if idx > 0 && is_vowel(chars[idx - 1]) { | |
| out.push('F'); | |
| } | |
| idx += 2; | |
| } | |
| } else if nxt == Some('N') { | |
| idx += 1; | |
| } else if matches!(nxt, Some('I') | Some('E') | Some('Y')) && prev != Some('G') { | |
| out.push('J'); | |
| idx += 1; | |
| } else { | |
| out.push('K'); | |
| idx += 1; | |
| } | |
| } | |
| 'H' => { | |
| if idx == 0 || !is_vowel(prev.unwrap_or(' ')) || !nxt.map_or(false, |ch| is_vowel(ch)) { | |
| idx += 1; | |
| } else { | |
| out.push('H'); | |
| idx += 1; | |
| } | |
| } | |
| 'F' | 'J' | 'L' | 'M' | 'N' | 'R' => { | |
| out.push(c); | |
| idx += 1; | |
| } | |
| 'K' => { | |
| if prev == Some('C') { | |
| idx += 1; | |
| } else { | |
| out.push('K'); | |
| idx += 1; | |
| } | |
| } | |
| 'P' => { | |
| if nxt == Some('H') { | |
| out.push('F'); | |
| idx += 2; | |
| } else { | |
| out.push('P'); | |
| idx += 1; | |
| } | |
| } | |
| 'Q' => { | |
| out.push('K'); | |
| idx += 1; | |
| } | |
| 'S' => { | |
| if nxt == Some('H') { | |
| out.push('X'); | |
| idx += 2; | |
| } else if nxt == Some('I') && matches!(nxt2, Some('O') | Some('A')) { | |
| out.push('X'); | |
| idx += 3; | |
| } else { | |
| out.push('S'); | |
| idx += 1; | |
| } | |
| } | |
| 'T' => { | |
| if nxt == Some('I') && matches!(nxt2, Some('O') | Some('A')) { | |
| out.push('X'); | |
| idx += 3; | |
| } else if nxt == Some('H') { | |
| out.push('0'); // metaphone convention for "TH" | |
| idx += 2; | |
| } else if !(nxt == Some('C') && nxt2 == Some('H')) { | |
| out.push('T'); | |
| idx += 1; | |
| } else { | |
| idx += 1; | |
| } | |
| } | |
| 'V' => { | |
| out.push('F'); | |
| idx += 1; | |
| } | |
| 'W' => { | |
| if nxt.map_or(false, |ch| is_vowel(ch)) { | |
| out.push('W'); | |
| } | |
| idx += 1; | |
| } | |
| 'X' => { | |
| out.push('K'); | |
| out.push('S'); | |
| idx += 1; | |
| } | |
| 'Y' => { | |
| if nxt.map_or(false, |ch| is_vowel(ch)) { | |
| out.push('Y'); | |
| } | |
| idx += 1; | |
| } | |
| 'Z' => { | |
| out.push('S'); | |
| idx += 1; | |
| } | |
| other => { | |
| out.push(other); | |
| idx += 1; | |
| } | |
| } | |
| // collapse repeated letters in output | |
| if out.len() >= 2 { | |
| let len_out = out.len(); | |
| if out.chars().nth(len_out - 1) == out.chars().nth(len_out - 2) { | |
| out.pop(); | |
| } | |
| } | |
| } | |
| out | |
| } | |
| fn run_with_file(path: &str) -> io::Result<()> { | |
| let file = File::open(path)?; | |
| let reader = BufReader::new(file); | |
| // Print as "word<TAB>metaphone" | |
| for line in reader.lines() { | |
| let line = line?; | |
| let word = line.trim(); | |
| if word.is_empty() { | |
| continue; | |
| } | |
| let code = metaphone(word); | |
| println!("{}\t{}", word, code); | |
| } | |
| Ok(()) | |
| } | |
| fn main() { | |
| let mut args = env::args().skip(1); | |
| let file_path = match args.next() { | |
| Some(p) => p, | |
| None => { | |
| eprintln!("Usage: metaphone_file <filename>"); | |
| std::process::exit(2); | |
| } | |
| }; | |
| if let Err(e) = run_with_file(&file_path) { | |
| eprintln!("Error processing file {}: {}", file_path, e); | |
| std::process::exit(1); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment