-
-
Save idrise/6999e260fb6b77b07b156495de3552b6 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /** | |
| * Calculates the Damerau-Levenshtein distance between two strings. | |
| * | |
| * The Damerau-Levenshtein distance is a measure of the similarity between two strings, | |
| * which considers the minimum number of operations (insertions, deletions, substitutions, | |
| * and transpositions of two adjacent characters) required to change one string into the other. | |
| * | |
| * @param source - The first string. | |
| * @param target - The second string. | |
| * @param limit - An optional limit to the maximum distance calculated. | |
| * If the distance exceeds this limit, the calculation is aborted. | |
| * @returns An object containing: | |
| * - steps: The Damerau-Levenshtein distance between the two strings. | |
| * - relative: The relative distance (steps divided by the maximum string length). | |
| * - similarity: A value between 0 and 1 representing the similarity between the two strings. | |
| */ | |
| export function damerauLevenshteinDistance( | |
| source: string, | |
| target: string, | |
| limit?: number | |
| ): { | |
| steps: number; | |
| relative: number; | |
| similarity: number; | |
| } { | |
| const sourceLength: number = source.length; | |
| const targetLength: number = target.length; | |
| // Determine limit if not provided | |
| limit = limit !== undefined ? limit : Math.max(sourceLength, targetLength) + 1; | |
| // Early exit if length difference exceeds limit | |
| if (Math.abs(sourceLength - targetLength) > limit) { | |
| return prepare(limit); | |
| } | |
| // Early exits for empty strings | |
| if (sourceLength === 0) { | |
| return prepare(targetLength); | |
| } | |
| if (targetLength === 0) { | |
| return prepare(sourceLength); | |
| } | |
| // Initialize matrix | |
| const matrix: Uint16Array[] = Array.from({ length: sourceLength + 1 }, () => | |
| new Uint16Array(targetLength + 1) | |
| ); | |
| for (let i = 0; i <= sourceLength; i++) { | |
| matrix[i][0] = i; | |
| } | |
| for (let j = 0; j <= targetLength; j++) { | |
| matrix[0][j] = j; | |
| } | |
| // Compute matrix | |
| for (let i = 1; i <= sourceLength; i++) { | |
| const sourceChar = source.charAt(i - 1); | |
| for (let j = 1; j <= targetLength; j++) { | |
| const targetChar = target.charAt(j - 1); | |
| const cost = sourceChar === targetChar ? 0 : 1; | |
| let deletion = matrix[i - 1][j] + 1; | |
| let insertion = matrix[i][j - 1] + 1; | |
| let substitution = matrix[i - 1][j - 1] + cost; | |
| let min = Math.min(deletion, insertion, substitution); | |
| // Check for transposition | |
| if ( | |
| i > 1 && | |
| j > 1 && | |
| sourceChar === target.charAt(j - 2) && | |
| source.charAt(i - 2) === targetChar | |
| ) { | |
| const transposition = matrix[i - 2][j - 2] + cost; | |
| if (transposition < min) { | |
| min = transposition; | |
| } | |
| } | |
| matrix[i][j] = min; | |
| // Abort if distance exceeds limit | |
| if (i === j && matrix[i][j] > limit) { | |
| return prepare(limit); | |
| } | |
| } | |
| } | |
| return prepare(matrix[sourceLength][targetLength]); | |
| /** | |
| * Prepares the result object with steps, relative distance, and similarity. | |
| * | |
| * @param steps - The calculated Damerau-Levenshtein distance. | |
| * @returns An object containing steps, relative distance, and similarity. | |
| */ | |
| function prepare(steps: number): { steps: number; relative: number; similarity: number } { | |
| const length = Math.max(sourceLength, targetLength); | |
| const relative = length === 0 ? 0 : steps / length; | |
| const similarity = 1 - relative; | |
| return { | |
| steps, | |
| relative, | |
| similarity, | |
| }; | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment