Created
October 29, 2022 02:33
-
-
Save mo-xiaoming/9fb87da16d6ef459e1b94c16055b9978 to your computer and use it in GitHub Desktop.
wildcards matching
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//! https://www.drdobbs.com/architecture-and-design/matching-wildcards-an-algorithm/210200888 | |
#![allow(dead_code)] | |
fn wildcard_match(pattern_text: &str, plain_text: &str) -> bool { | |
struct AfterWildcard { | |
plain_idx: usize, | |
pattern_idx: usize, | |
} | |
// it is None if there are no prev wildcard | |
// if there were, then `plain_idx` stores *next* index in plain text that wildcard supposes to match | |
// `pattern_idx` stores *next* index right after the wildcard | |
// when they first assigned | |
// latter they become to be the next possible non matches | |
// anything pre these two considered match | |
let mut after_wildcard: Option<AfterWildcard> = None; | |
// current indices moving in two strings | |
let mut cur_pos_plain_text = 0_usize; | |
let mut cur_pos_pattern_text = 0_usize; | |
loop { | |
// current chars in two strings | |
let plain_c = plain_text.chars().nth(cur_pos_plain_text); | |
let pattern_c = pattern_text.chars().nth(cur_pos_pattern_text); | |
if plain_c.is_none() { | |
// plain text ends | |
match pattern_c { | |
None => return true, // pattern text ends as well, happy ending | |
Some('*') => { | |
// since we make wildcard matches non-eager | |
// go back to use `after_wildcard` only make it less possible to match | |
// | |
// matches iff pattern only have '*' till the end | |
return pattern_text[cur_pos_pattern_text..] | |
.chars() | |
.all(|e| e == '*'); | |
} | |
Some(w) => { | |
// go back to last wildcard and try next possible char in plain text | |
if let Some(AfterWildcard { | |
ref mut plain_idx, | |
pattern_idx, | |
}) = after_wildcard | |
{ | |
// move `plain_idx` in `after_wildcard` to the next position of `w` in plain text | |
// any positions before that is impossible to match the pattern text | |
if let Some(i) = plain_text[*plain_idx..].chars().position(|c| c == w) { | |
*plain_idx = i; | |
cur_pos_plain_text = *plain_idx; | |
cur_pos_pattern_text = pattern_idx; | |
continue; | |
} | |
// if `w` doesn't even exists in plain text, then give up | |
return false; | |
} | |
// if no prev wildcard exists, then that's it, no match | |
return false; | |
} | |
} | |
} else if plain_c != pattern_c { | |
if pattern_c == Some('*') { | |
// skip '*'s, one is as good as many | |
if let Some(i) = pattern_text[cur_pos_pattern_text..] | |
.chars() | |
.position(|e| e != '*') | |
{ | |
cur_pos_pattern_text += i; | |
} else { | |
// even better, pattern text ends with a '*', which matches everything | |
return true; | |
} | |
// pattern text doesn't end with this '*', then find next non '*' char | |
let w = pattern_text.chars().nth(cur_pos_pattern_text).unwrap(); | |
// char in pattern text does exist in plain text | |
if let Some(i) = plain_text[cur_pos_plain_text..] | |
.chars() | |
.position(|c| c == w) | |
{ | |
// update both positions | |
after_wildcard.replace(AfterWildcard { | |
plain_idx: i, | |
pattern_idx: cur_pos_pattern_text, | |
}); | |
continue; | |
} | |
// otherwise, we cannot match | |
return false; | |
} | |
if let Some(AfterWildcard { pattern_idx, .. }) = after_wildcard { | |
// go back to last wildcard | |
if pattern_idx != cur_pos_pattern_text { | |
cur_pos_pattern_text = pattern_idx; | |
// matches this char, move pattern idx forward | |
if pattern_text.chars().nth(cur_pos_pattern_text) == plain_c { | |
cur_pos_pattern_text += 1; | |
} | |
} | |
// try next plain text char anyway, current one gets swallowed by '*' | |
// or by a matching char in pattern text | |
cur_pos_plain_text += 1; | |
continue; | |
} | |
return false; | |
} else { | |
cur_pos_plain_text += 1; | |
cur_pos_pattern_text += 1; | |
} | |
} | |
} | |
#[cfg(test)] | |
mod test { | |
use super::wildcard_match; | |
#[test] | |
fn mississippi() { | |
assert!(wildcard_match("abc", "abc")); | |
assert!(!wildcard_match("abc*", "a0c")); | |
assert!(wildcard_match("mississipp**", "mississippi")); | |
assert!(wildcard_match("mississippi*", "mississippi")); | |
assert!(wildcard_match("*.*", "mississippi.river")); | |
assert!(wildcard_match("*ip*", "mississippi.river")); | |
assert!(wildcard_match("m*i*.*r", "mississippi.river")); | |
assert!(!wildcard_match("m*x*.*r", "mississippi.river")); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
A detailed explanation of wildcard pattern matching and general globbing can be found here:
https://www.partow.net/programming/wildcardmatching/index.html