Skip to content

Instantly share code, notes, and snippets.

@jarrodhroberson
Created September 14, 2025 20:06
Show Gist options
  • Save jarrodhroberson/63e08923be45dda5c8e1da46e2a34bc4 to your computer and use it in GitHub Desktop.
Save jarrodhroberson/63e08923be45dda5c8e1da46e2a34bc4 to your computer and use it in GitHub Desktop.
Rust Metaphone
use std::env;
use std::fs::File;
use std::io::{self, BufRead, BufReader};
/// A straightforward implementation of the original Metaphone algorithm for English.
pub fn metaphone(input: &str) -> String {
// Prepare: uppercase and keep only ASCII letters
let mut chars: Vec<char> = input
.to_uppercase()
.chars()
.filter(|c| c.is_ascii_alphabetic())
.collect();
if chars.is_empty() {
return String::new();
}
let mut idx = 0usize;
let len = chars.len();
let mut out = String::with_capacity(len);
let is_vowel = |c: char| matches!(c, 'A' | 'E' | 'I' | 'O' | 'U');
let peek = |i: usize| if i < len { Some(chars[i]) } else { None };
let next = |i: usize| if i + 1 < len { Some(chars[i + 1]) } else { None };
// Step 1: deal with some initial letter combinations
if len > 1 {
match (chars[0], chars[1]) {
('K', 'N') | ('G', 'N') | ('P', 'N') | ('A', 'E') | ('W', 'R') => {
idx = 1;
}
('W', 'H') => {
out.push('W');
idx = 2;
}
_ => {}
}
}
if idx == 0 && chars[0] == 'X' {
out.push('S');
idx = 1;
}
while idx < len {
let c = chars[idx];
let prev = if idx > 0 { Some(chars[idx - 1]) } else { None };
let nxt = next(idx);
let nxt2 = if idx + 2 < len { Some(chars[idx + 2]) } else { None };
match c {
'A' | 'E' | 'I' | 'O' | 'U' => {
if idx == 0 {
out.push(c);
}
idx += 1;
}
'B' => {
if !(prev == Some('M') && nxt.is_none()) {
out.push('B');
}
idx += 1;
}
'C' => {
if nxt == Some('H') {
out.push('X');
idx += 2;
} else if nxt == Some('I') && nxt2 == Some('A') {
out.push('X');
idx += 3;
} else if matches!(nxt, Some('I') | Some('E') | Some('Y')) {
out.push('S');
idx += 2;
} else {
out.push('K');
idx += 1;
if next(idx - 1) == Some('C') {
idx += 1;
}
}
}
'D' => {
if nxt == Some('G') && matches!(nxt2, Some('E') | Some('I') | Some('Y')) {
out.push('J');
idx += 3;
} else {
out.push('D');
idx += 1;
}
}
'G' => {
if nxt == Some('H') {
if idx > 0 && !matches!(peek(idx + 2), Some('A' | 'E' | 'I' | 'O' | 'U')) {
idx += 2;
} else {
if idx > 0 && is_vowel(chars[idx - 1]) {
out.push('F');
}
idx += 2;
}
} else if nxt == Some('N') {
idx += 1;
} else if matches!(nxt, Some('I') | Some('E') | Some('Y')) && prev != Some('G') {
out.push('J');
idx += 1;
} else {
out.push('K');
idx += 1;
}
}
'H' => {
if idx == 0 || !is_vowel(prev.unwrap_or(' ')) || !nxt.map_or(false, |ch| is_vowel(ch)) {
idx += 1;
} else {
out.push('H');
idx += 1;
}
}
'F' | 'J' | 'L' | 'M' | 'N' | 'R' => {
out.push(c);
idx += 1;
}
'K' => {
if prev == Some('C') {
idx += 1;
} else {
out.push('K');
idx += 1;
}
}
'P' => {
if nxt == Some('H') {
out.push('F');
idx += 2;
} else {
out.push('P');
idx += 1;
}
}
'Q' => {
out.push('K');
idx += 1;
}
'S' => {
if nxt == Some('H') {
out.push('X');
idx += 2;
} else if nxt == Some('I') && matches!(nxt2, Some('O') | Some('A')) {
out.push('X');
idx += 3;
} else {
out.push('S');
idx += 1;
}
}
'T' => {
if nxt == Some('I') && matches!(nxt2, Some('O') | Some('A')) {
out.push('X');
idx += 3;
} else if nxt == Some('H') {
out.push('0'); // metaphone convention for "TH"
idx += 2;
} else if !(nxt == Some('C') && nxt2 == Some('H')) {
out.push('T');
idx += 1;
} else {
idx += 1;
}
}
'V' => {
out.push('F');
idx += 1;
}
'W' => {
if nxt.map_or(false, |ch| is_vowel(ch)) {
out.push('W');
}
idx += 1;
}
'X' => {
out.push('K');
out.push('S');
idx += 1;
}
'Y' => {
if nxt.map_or(false, |ch| is_vowel(ch)) {
out.push('Y');
}
idx += 1;
}
'Z' => {
out.push('S');
idx += 1;
}
other => {
out.push(other);
idx += 1;
}
}
// collapse repeated letters in output
if out.len() >= 2 {
let len_out = out.len();
if out.chars().nth(len_out - 1) == out.chars().nth(len_out - 2) {
out.pop();
}
}
}
out
}
fn run_with_file(path: &str) -> io::Result<()> {
let file = File::open(path)?;
let reader = BufReader::new(file);
// Print as "word<TAB>metaphone"
for line in reader.lines() {
let line = line?;
let word = line.trim();
if word.is_empty() {
continue;
}
let code = metaphone(word);
println!("{}\t{}", word, code);
}
Ok(())
}
fn main() {
let mut args = env::args().skip(1);
let file_path = match args.next() {
Some(p) => p,
None => {
eprintln!("Usage: metaphone_file <filename>");
std::process::exit(2);
}
};
if let Err(e) = run_with_file(&file_path) {
eprintln!("Error processing file {}: {}", file_path, e);
std::process::exit(1);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment