Skip to content

Instantly share code, notes, and snippets.

@anubra266
Created October 18, 2024 14:38
Show Gist options
  • Save anubra266/75ba38e6f4c1842d87bceaeaf46ab25f to your computer and use it in GitHub Desktop.
Save anubra266/75ba38e6f4c1842d87bceaeaf46ab25f to your computer and use it in GitHub Desktop.
fuxxy search
class Search {
constructor(list, options = {}) {
this.list = list;
this.options = {
isCaseSensitive: options.isCaseSensitive || false,
includeScore: options.includeScore || false,
shouldSort: options.shouldSort || true,
includeMatches: options.includeMatches || false,
minMatchCharLength: options.minMatchCharLength || 1,
threshold: options.threshold || 0.6,
keys: options.keys || [],
useFuzzySearch: options.useFuzzySearch || false,
async: options.async || false,
useRegex: options.useRegex || false,
fieldNormWeight: options.fieldNormWeight || 1,
};
this.index = [];
this._createIndex();
}
_createIndex() {
// Build an index for faster searching
this.index = this.list.map((item, refIndex) => {
const entries = {};
for (const key of this.options.keys) {
const value = this._getValueByKey(item, key);
if (typeof value === "string") {
entries[key] = this.options.isCaseSensitive
? value
: value.toLowerCase();
}
}
return { item, refIndex, entries };
});
}
search(pattern) {
const searchPattern = this.options.isCaseSensitive
? pattern
: pattern.toLowerCase();
if (this.options.async) {
return this._searchAsync(searchPattern);
} else {
return this._searchSync(searchPattern);
}
}
_searchSync(searchPattern) {
const results = [];
for (const entry of this.index) {
const result = this._processEntry(entry, searchPattern);
if (result) results.push(result);
}
return this._formatResults(results);
}
async _searchAsync(searchPattern) {
const results = [];
await Promise.all(
this.index.map(async entry => {
const result = this._processEntry(entry, searchPattern);
if (result) results.push(result);
})
);
return this._formatResults(results);
}
_processEntry(entry, searchPattern) {
let isMatchFound = false;
let totalScore = 0;
const matches = [];
for (const key in entry.entries) {
const text = entry.entries[key];
let matchResult = null;
if (this.options.useRegex) {
matchResult = this._regexMatch(text, searchPattern);
} else if (this.options.useFuzzySearch) {
matchResult = this._fuzzyMatch(text, searchPattern);
} else {
matchResult = this._exactMatch(text, searchPattern);
}
if (matchResult.isMatch) {
isMatchFound = true;
totalScore += matchResult.score;
if (this.options.includeMatches && matchResult.indices) {
matches.push({
key,
indices: matchResult.indices,
value: this._getValueByKey(entry.item, key),
});
}
}
}
if (isMatchFound) {
const resultItem = {
item: entry.item,
refIndex: entry.refIndex,
};
if (this.options.includeScore) {
resultItem.score = totalScore;
// Calculate TF-IDF or other advanced scoring here
// resultItem.score = totalScore / this.options.keys.length;
}
if (this.options.includeMatches) {
resultItem.matches = matches;
}
return resultItem;
}
return null;
}
_exactMatch(text, pattern) {
const index = text.indexOf(pattern);
if (index !== -1) {
return {
isMatch: true,
score: index / text.length,
indices: [index, index + pattern.length - 1],
};
}
return { isMatch: false };
}
_fuzzyMatch(text, pattern) {
const distance = this._levenshteinDistance(pattern, text);
const maxLen = Math.max(pattern.length, text.length);
const similarity = (maxLen - distance) / maxLen;
const isMatch = similarity >= 1 - this.options.threshold;
return {
isMatch,
score: 1 - similarity,
};
}
_regexMatch(text, pattern) {
const regex = new RegExp(
pattern,
this.options.isCaseSensitive ? "g" : "gi"
);
const matches = [];
let match;
while ((match = regex.exec(text)) !== null) {
matches.push([match.index, match.index + match[0].length - 1]);
}
if (matches.length > 0) {
return {
isMatch: true,
score: 0, // Adjust scoring if needed
indices: matches,
};
}
return { isMatch: false };
}
_formatResults(results) {
if (this.options.shouldSort) {
results.sort((a, b) => {
if (this.options.includeScore) {
return a.score - b.score;
}
return a.refIndex - b.refIndex;
});
}
return results;
}
_getValueByKey(item, key) {
const keys = key.split(".");
let value = item;
for (const k of keys) {
if (value && k in value) {
value = value[k];
} else {
return null;
}
}
return value;
}
_levenshteinDistance(a, b) {
const an = a.length;
const bn = b.length;
if (an === 0) return bn;
if (bn === 0) return an;
const matrix = [];
// Initialize the matrix
for (let i = 0; i <= bn; i++) {
matrix[i] = [i];
}
for (let j = 0; j <= an; j++) {
matrix[0][j] = j;
}
// Compute the matrix
for (let i = 1; i <= bn; i++) {
for (let j = 1; j <= an; j++) {
if (b.charAt(i - 1) === a.charAt(j - 1)) {
matrix[i][j] = matrix[i - 1][j - 1];
} else {
matrix[i][j] =
1 +
Math.min(
matrix[i - 1][j], // Deletion
matrix[i][j - 1], // Insertion
matrix[i - 1][j - 1] // Substitution
);
}
}
}
return matrix[bn][an];
}
}
const list = [
{
title: "Old Man's War",
author: {
firstName: "John",
lastName: "Scalzi",
},
},
{
title: "The Lock Artist",
author: {
firstName: "Steve",
lastName: "Hamilton",
},
},
{
title: "HTML5",
author: {
firstName: "Remy",
lastName: "Sharp",
},
},
{
title: "Right Ho Jeeves",
author: {
firstName: "P.D",
lastName: "Woodhouse",
},
},
{
title: "The Code of the Wooster",
author: {
firstName: "P.D",
lastName: "Woodhouse",
},
},
{
title: "Thank You Jeeves",
author: {
firstName: "P.D",
lastName: "Woodhouse",
},
},
{
title: "The DaVinci Code",
author: {
firstName: "Dan",
lastName: "Brown",
},
},
{
title: "Angels & Demons",
author: {
firstName: "Dan",
lastName: "Brown",
},
},
{
title: "The Silmarillion",
author: {
firstName: "J.R.R",
lastName: "Tolkien",
},
},
{
title: "Syrup",
author: {
firstName: "Max",
lastName: "Barry",
},
},
{
title: "The Lost Symbol",
author: {
firstName: "Dan",
lastName: "Brown",
},
},
{
title: "The Book of Lies",
author: {
firstName: "Brad",
lastName: "Meltzer",
},
},
{
title: "Lamb",
author: {
firstName: "Christopher",
lastName: "Moore",
},
},
{
title: "Fool",
author: {
firstName: "Christopher",
lastName: "Moore",
},
},
{
title: "Incompetence",
author: {
firstName: "Rob",
lastName: "Grant",
},
},
{
title: "Fat",
author: {
firstName: "Rob",
lastName: "Grant",
},
},
{
title: "Colony",
author: {
firstName: "Rob",
lastName: "Grant",
},
},
{
title: "Backwards, Red Dwarf",
author: {
firstName: "Rob",
lastName: "Grant",
},
},
{
title: "The Grand Design",
author: {
firstName: "Stephen",
lastName: "Hawking",
},
},
{
title: "The Book of Samson",
author: {
firstName: "David",
lastName: "Maine",
},
},
{
title: "The Preservationist",
author: {
firstName: "David",
lastName: "Maine",
},
},
{
title: "Fallen",
author: {
firstName: "David",
lastName: "Maine",
},
},
{
title: "Monster 1959",
author: {
firstName: "David",
lastName: "Maine",
},
},
];
const searchOptions = {
keys: ["title", "author.firstName"],
useFuzzySearch: true,
threshold: 0.4,
includeScore: true,
};
const search = new Search(list, searchOptions);
const searchPattern = "Da";
const results = search.search(searchPattern);
console.log(JSON.stringify(results, null, 2));
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment