Skip to content

Instantly share code, notes, and snippets.

@Mark-McCracken
Last active August 31, 2023 08:48
Show Gist options
  • Save Mark-McCracken/84b5e5cf17f760ce6a55e7f9e8d3082c to your computer and use it in GitHub Desktop.
Save Mark-McCracken/84b5e5cf17f760ce6a55e7f9e8d3082c to your computer and use it in GitHub Desktop.
BigQuery string fuzzy matching damerau-levenshtein distance
CREATE OR REPLACE FUNCTION `functions.fuzzy_match_damerau_levenshtein_distance`(s1 STRING, s2 STRING) RETURNS INT64 LANGUAGE js AS """
/*
* Based off https://github.com/fabvalaaah/damerau-levenshtein-js
* input: Two strings to compare the edit distance of. You probably want to lowercase/trim these before passing to this function
* returns: Integer of the edit distance.
*/
const initMatrix = (s1, s2) => {
if (undefined == s1 || undefined == s2) {
return null;
}
let d = [];
for (let i = 0; i <= s1.length; i++) {
d[i] = [];
d[i][0] = i;
}
for (let j = 0; j <= s2.length; j++) {
d[0][j] = j;
}
return d;
};
const damerau = (i, j, s1, s2, d, cost) => {
if (i > 1 && j > 1 && s1[i - 1] === s2[j - 2] && s1[i - 2] === s2[j - 1]) {
d[i][j] = Math.min.apply(null, [d[i][j], d[i - 2][j - 2] + cost]);
}
};
if (
undefined == s1 ||
undefined == s2 ||
"string" !== typeof s1 ||
"string" !== typeof s2
) {
return -1;
}
let d = initMatrix(s1, s2);
if (null === d) {
return -1;
}
for (var i = 1; i <= s1.length; i++) {
let cost;
for (let j = 1; j <= s2.length; j++) {
if (s1.charAt(i - 1) === s2.charAt(j - 1)) {
cost = 0;
} else {
cost = 1;
}
d[i][j] = Math.min.apply(null, [
d[i - 1][j] + 1,
d[i][j - 1] + 1,
d[i - 1][j - 1] + cost
]);
damerau(i, j, s1, s2, d, cost);
}
}
return d[s1.length][s2.length];
""";
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment