Created
June 12, 2023 13:14
-
-
Save monyone/f9d240701fc85c6cac904b04f5d29210 to your computer and use it in GitHub Desktop.
Aho Corasick Implementation in TypeScript
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Trie { | |
private goto: Map<string, Trie> = new Map<string, Trie>(); | |
public keywords: string[] = [] | |
public failure: Trie | null = null; | |
public has(s: string) { | |
return this.goto.has(s); | |
} | |
public get(s: string) { | |
return this.goto.get(s); | |
} | |
public set(s: string, next: Trie) { | |
return this.goto.set(s, next); | |
} | |
public entries() { | |
return this.goto.entries(); | |
} | |
public empty() { | |
return this.keywords.length === 0; | |
} | |
public add(k: string) { | |
this.keywords.push(k); | |
} | |
public join(t: Trie) { | |
this.keywords.push(... t.keywords); | |
} | |
} | |
export default class AhoTrie { | |
private root = new Trie(); | |
constructor(keywords: string[]) { | |
// build goto | |
for (const keyword of keywords) { | |
let current: Trie = this.root; | |
for (const ch of keyword) { | |
let next: Trie = current.get(ch) ?? (new Trie()) | |
current.set(ch, next); | |
current = next; | |
} | |
current.add(keyword); | |
} | |
// build failure | |
const queue: [Trie, Trie, string][] = []; | |
for (const [ch, next] of this.root.entries()) { | |
queue.push([next, this.root, ch]); | |
} | |
while (queue.length > 0) { | |
const [current, parent, ch] = queue.shift()!; | |
let failure = parent.failure; | |
while (failure != null && !failure.has(ch)) { | |
failure = failure.failure; | |
} | |
current.failure = failure?.get(ch) ?? this.root; | |
current.join(current.failure); | |
for (const [ch, next] of current.entries()) { | |
queue.push([next, current, ch]); | |
} | |
} | |
this.root.failure = this.root; | |
} | |
parseText(text: string): string[] { | |
const textLength = text.length; | |
const collected: string[] = []; | |
let state: Trie = this.root; | |
for (const ch of text) { | |
if (!state.empty()) { | |
collected.push(... state.keywords); | |
} | |
/* | |
while (!state.has(ch) && state !== this.root) { | |
state = state.failure ?? this.root; | |
} | |
state = state.get(ch) ?? this.root; | |
//*/ | |
//* | |
let next = state.get(ch); | |
while (next == null) { | |
state = state.failure!; | |
next = state.get(ch) ?? (state !== this.root ? undefined : this.root); | |
} | |
state = next; | |
//*/ | |
} | |
if (!state.empty()) { | |
collected.push(... state.keywords); | |
} | |
return collected; | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Trie { | |
private goto: Map<string, Trie> = new Map<string, Trie>(); | |
public terminate: boolean = false; | |
public failure: Trie | null = null; | |
public has(s: string) { | |
return this.goto.has(s); | |
} | |
public get(s: string) { | |
return this.goto.get(s); | |
} | |
public set(s: string, next: Trie) { | |
return this.goto.set(s, next); | |
} | |
public entries() { | |
return this.goto.entries(); | |
} | |
} | |
export default class NGTrie { | |
private root = new Trie(); | |
constructor(ngwords: string[]) { | |
// build goto | |
for (const ngword of ngwords) { | |
let current: Trie = this.root; | |
for (const ch of ngword) { | |
let next: Trie = current.get(ch) ?? (new Trie()) | |
current.set(ch, next); | |
current = next; | |
} | |
current.terminate = true; | |
} | |
// build failure | |
const queue: [Trie, Trie, string][] = []; | |
for (const [ch, next] of this.root.entries()) { | |
queue.push([next, this.root, ch]); | |
} | |
while (queue.length > 0) { | |
const [current, parent, ch] = queue.shift()!; | |
let failure = parent.failure; | |
while (failure != null && !failure.has(ch)) { | |
failure = failure.failure; | |
} | |
current.failure = failure?.get(ch) ?? this.root; | |
current.terminate ||= current.failure.terminate; | |
for (const [ch, next] of current.entries()) { | |
queue.push([next, current, ch]); | |
} | |
} | |
this.root.failure = this.root; | |
} | |
// validate text | |
validate(text: string): boolean { | |
const textLength = text.length; | |
const collected: string[] = []; | |
let state: Trie = this.root; | |
for (const ch of text) { | |
if (state.terminate) { return true; } | |
while (!state.has(ch) && state !== this.root) { | |
state = state.failure ?? this.root; | |
} | |
state = state.get(ch) ?? this.root; | |
} | |
return state.terminate; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
RegExp を Trie っぽくしたが、普通の RegExp の 100 倍以上遅い