Skip to content

Instantly share code, notes, and snippets.

@idrise
Created October 20, 2024 13:21
Show Gist options
  • Select an option

  • Save idrise/6999e260fb6b77b07b156495de3552b6 to your computer and use it in GitHub Desktop.

Select an option

Save idrise/6999e260fb6b77b07b156495de3552b6 to your computer and use it in GitHub Desktop.
/**
* Calculates the Damerau-Levenshtein distance between two strings.
*
* The Damerau-Levenshtein distance is a measure of the similarity between two strings,
* which considers the minimum number of operations (insertions, deletions, substitutions,
* and transpositions of two adjacent characters) required to change one string into the other.
*
* @param source - The first string.
* @param target - The second string.
* @param limit - An optional limit to the maximum distance calculated.
* If the distance exceeds this limit, the calculation is aborted.
* @returns An object containing:
* - steps: The Damerau-Levenshtein distance between the two strings.
* - relative: The relative distance (steps divided by the maximum string length).
* - similarity: A value between 0 and 1 representing the similarity between the two strings.
*/
export function damerauLevenshteinDistance(
source: string,
target: string,
limit?: number
): {
steps: number;
relative: number;
similarity: number;
} {
const sourceLength: number = source.length;
const targetLength: number = target.length;
// Determine limit if not provided
limit = limit !== undefined ? limit : Math.max(sourceLength, targetLength) + 1;
// Early exit if length difference exceeds limit
if (Math.abs(sourceLength - targetLength) > limit) {
return prepare(limit);
}
// Early exits for empty strings
if (sourceLength === 0) {
return prepare(targetLength);
}
if (targetLength === 0) {
return prepare(sourceLength);
}
// Initialize matrix
const matrix: Uint16Array[] = Array.from({ length: sourceLength + 1 }, () =>
new Uint16Array(targetLength + 1)
);
for (let i = 0; i <= sourceLength; i++) {
matrix[i][0] = i;
}
for (let j = 0; j <= targetLength; j++) {
matrix[0][j] = j;
}
// Compute matrix
for (let i = 1; i <= sourceLength; i++) {
const sourceChar = source.charAt(i - 1);
for (let j = 1; j <= targetLength; j++) {
const targetChar = target.charAt(j - 1);
const cost = sourceChar === targetChar ? 0 : 1;
let deletion = matrix[i - 1][j] + 1;
let insertion = matrix[i][j - 1] + 1;
let substitution = matrix[i - 1][j - 1] + cost;
let min = Math.min(deletion, insertion, substitution);
// Check for transposition
if (
i > 1 &&
j > 1 &&
sourceChar === target.charAt(j - 2) &&
source.charAt(i - 2) === targetChar
) {
const transposition = matrix[i - 2][j - 2] + cost;
if (transposition < min) {
min = transposition;
}
}
matrix[i][j] = min;
// Abort if distance exceeds limit
if (i === j && matrix[i][j] > limit) {
return prepare(limit);
}
}
}
return prepare(matrix[sourceLength][targetLength]);
/**
* Prepares the result object with steps, relative distance, and similarity.
*
* @param steps - The calculated Damerau-Levenshtein distance.
* @returns An object containing steps, relative distance, and similarity.
*/
function prepare(steps: number): { steps: number; relative: number; similarity: number } {
const length = Math.max(sourceLength, targetLength);
const relative = length === 0 ? 0 : steps / length;
const similarity = 1 - relative;
return {
steps,
relative,
similarity,
};
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment