Created
October 18, 2024 14:38
-
-
Save anubra266/75ba38e6f4c1842d87bceaeaf46ab25f to your computer and use it in GitHub Desktop.
fuxxy search
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Search { | |
constructor(list, options = {}) { | |
this.list = list; | |
this.options = { | |
isCaseSensitive: options.isCaseSensitive || false, | |
includeScore: options.includeScore || false, | |
shouldSort: options.shouldSort || true, | |
includeMatches: options.includeMatches || false, | |
minMatchCharLength: options.minMatchCharLength || 1, | |
threshold: options.threshold || 0.6, | |
keys: options.keys || [], | |
useFuzzySearch: options.useFuzzySearch || false, | |
async: options.async || false, | |
useRegex: options.useRegex || false, | |
fieldNormWeight: options.fieldNormWeight || 1, | |
}; | |
this.index = []; | |
this._createIndex(); | |
} | |
_createIndex() { | |
// Build an index for faster searching | |
this.index = this.list.map((item, refIndex) => { | |
const entries = {}; | |
for (const key of this.options.keys) { | |
const value = this._getValueByKey(item, key); | |
if (typeof value === "string") { | |
entries[key] = this.options.isCaseSensitive | |
? value | |
: value.toLowerCase(); | |
} | |
} | |
return { item, refIndex, entries }; | |
}); | |
} | |
search(pattern) { | |
const searchPattern = this.options.isCaseSensitive | |
? pattern | |
: pattern.toLowerCase(); | |
if (this.options.async) { | |
return this._searchAsync(searchPattern); | |
} else { | |
return this._searchSync(searchPattern); | |
} | |
} | |
_searchSync(searchPattern) { | |
const results = []; | |
for (const entry of this.index) { | |
const result = this._processEntry(entry, searchPattern); | |
if (result) results.push(result); | |
} | |
return this._formatResults(results); | |
} | |
async _searchAsync(searchPattern) { | |
const results = []; | |
await Promise.all( | |
this.index.map(async entry => { | |
const result = this._processEntry(entry, searchPattern); | |
if (result) results.push(result); | |
}) | |
); | |
return this._formatResults(results); | |
} | |
_processEntry(entry, searchPattern) { | |
let isMatchFound = false; | |
let totalScore = 0; | |
const matches = []; | |
for (const key in entry.entries) { | |
const text = entry.entries[key]; | |
let matchResult = null; | |
if (this.options.useRegex) { | |
matchResult = this._regexMatch(text, searchPattern); | |
} else if (this.options.useFuzzySearch) { | |
matchResult = this._fuzzyMatch(text, searchPattern); | |
} else { | |
matchResult = this._exactMatch(text, searchPattern); | |
} | |
if (matchResult.isMatch) { | |
isMatchFound = true; | |
totalScore += matchResult.score; | |
if (this.options.includeMatches && matchResult.indices) { | |
matches.push({ | |
key, | |
indices: matchResult.indices, | |
value: this._getValueByKey(entry.item, key), | |
}); | |
} | |
} | |
} | |
if (isMatchFound) { | |
const resultItem = { | |
item: entry.item, | |
refIndex: entry.refIndex, | |
}; | |
if (this.options.includeScore) { | |
resultItem.score = totalScore; | |
// Calculate TF-IDF or other advanced scoring here | |
// resultItem.score = totalScore / this.options.keys.length; | |
} | |
if (this.options.includeMatches) { | |
resultItem.matches = matches; | |
} | |
return resultItem; | |
} | |
return null; | |
} | |
_exactMatch(text, pattern) { | |
const index = text.indexOf(pattern); | |
if (index !== -1) { | |
return { | |
isMatch: true, | |
score: index / text.length, | |
indices: [index, index + pattern.length - 1], | |
}; | |
} | |
return { isMatch: false }; | |
} | |
_fuzzyMatch(text, pattern) { | |
const distance = this._levenshteinDistance(pattern, text); | |
const maxLen = Math.max(pattern.length, text.length); | |
const similarity = (maxLen - distance) / maxLen; | |
const isMatch = similarity >= 1 - this.options.threshold; | |
return { | |
isMatch, | |
score: 1 - similarity, | |
}; | |
} | |
_regexMatch(text, pattern) { | |
const regex = new RegExp( | |
pattern, | |
this.options.isCaseSensitive ? "g" : "gi" | |
); | |
const matches = []; | |
let match; | |
while ((match = regex.exec(text)) !== null) { | |
matches.push([match.index, match.index + match[0].length - 1]); | |
} | |
if (matches.length > 0) { | |
return { | |
isMatch: true, | |
score: 0, // Adjust scoring if needed | |
indices: matches, | |
}; | |
} | |
return { isMatch: false }; | |
} | |
_formatResults(results) { | |
if (this.options.shouldSort) { | |
results.sort((a, b) => { | |
if (this.options.includeScore) { | |
return a.score - b.score; | |
} | |
return a.refIndex - b.refIndex; | |
}); | |
} | |
return results; | |
} | |
_getValueByKey(item, key) { | |
const keys = key.split("."); | |
let value = item; | |
for (const k of keys) { | |
if (value && k in value) { | |
value = value[k]; | |
} else { | |
return null; | |
} | |
} | |
return value; | |
} | |
_levenshteinDistance(a, b) { | |
const an = a.length; | |
const bn = b.length; | |
if (an === 0) return bn; | |
if (bn === 0) return an; | |
const matrix = []; | |
// Initialize the matrix | |
for (let i = 0; i <= bn; i++) { | |
matrix[i] = [i]; | |
} | |
for (let j = 0; j <= an; j++) { | |
matrix[0][j] = j; | |
} | |
// Compute the matrix | |
for (let i = 1; i <= bn; i++) { | |
for (let j = 1; j <= an; j++) { | |
if (b.charAt(i - 1) === a.charAt(j - 1)) { | |
matrix[i][j] = matrix[i - 1][j - 1]; | |
} else { | |
matrix[i][j] = | |
1 + | |
Math.min( | |
matrix[i - 1][j], // Deletion | |
matrix[i][j - 1], // Insertion | |
matrix[i - 1][j - 1] // Substitution | |
); | |
} | |
} | |
} | |
return matrix[bn][an]; | |
} | |
} | |
const list = [ | |
{ | |
title: "Old Man's War", | |
author: { | |
firstName: "John", | |
lastName: "Scalzi", | |
}, | |
}, | |
{ | |
title: "The Lock Artist", | |
author: { | |
firstName: "Steve", | |
lastName: "Hamilton", | |
}, | |
}, | |
{ | |
title: "HTML5", | |
author: { | |
firstName: "Remy", | |
lastName: "Sharp", | |
}, | |
}, | |
{ | |
title: "Right Ho Jeeves", | |
author: { | |
firstName: "P.D", | |
lastName: "Woodhouse", | |
}, | |
}, | |
{ | |
title: "The Code of the Wooster", | |
author: { | |
firstName: "P.D", | |
lastName: "Woodhouse", | |
}, | |
}, | |
{ | |
title: "Thank You Jeeves", | |
author: { | |
firstName: "P.D", | |
lastName: "Woodhouse", | |
}, | |
}, | |
{ | |
title: "The DaVinci Code", | |
author: { | |
firstName: "Dan", | |
lastName: "Brown", | |
}, | |
}, | |
{ | |
title: "Angels & Demons", | |
author: { | |
firstName: "Dan", | |
lastName: "Brown", | |
}, | |
}, | |
{ | |
title: "The Silmarillion", | |
author: { | |
firstName: "J.R.R", | |
lastName: "Tolkien", | |
}, | |
}, | |
{ | |
title: "Syrup", | |
author: { | |
firstName: "Max", | |
lastName: "Barry", | |
}, | |
}, | |
{ | |
title: "The Lost Symbol", | |
author: { | |
firstName: "Dan", | |
lastName: "Brown", | |
}, | |
}, | |
{ | |
title: "The Book of Lies", | |
author: { | |
firstName: "Brad", | |
lastName: "Meltzer", | |
}, | |
}, | |
{ | |
title: "Lamb", | |
author: { | |
firstName: "Christopher", | |
lastName: "Moore", | |
}, | |
}, | |
{ | |
title: "Fool", | |
author: { | |
firstName: "Christopher", | |
lastName: "Moore", | |
}, | |
}, | |
{ | |
title: "Incompetence", | |
author: { | |
firstName: "Rob", | |
lastName: "Grant", | |
}, | |
}, | |
{ | |
title: "Fat", | |
author: { | |
firstName: "Rob", | |
lastName: "Grant", | |
}, | |
}, | |
{ | |
title: "Colony", | |
author: { | |
firstName: "Rob", | |
lastName: "Grant", | |
}, | |
}, | |
{ | |
title: "Backwards, Red Dwarf", | |
author: { | |
firstName: "Rob", | |
lastName: "Grant", | |
}, | |
}, | |
{ | |
title: "The Grand Design", | |
author: { | |
firstName: "Stephen", | |
lastName: "Hawking", | |
}, | |
}, | |
{ | |
title: "The Book of Samson", | |
author: { | |
firstName: "David", | |
lastName: "Maine", | |
}, | |
}, | |
{ | |
title: "The Preservationist", | |
author: { | |
firstName: "David", | |
lastName: "Maine", | |
}, | |
}, | |
{ | |
title: "Fallen", | |
author: { | |
firstName: "David", | |
lastName: "Maine", | |
}, | |
}, | |
{ | |
title: "Monster 1959", | |
author: { | |
firstName: "David", | |
lastName: "Maine", | |
}, | |
}, | |
]; | |
const searchOptions = { | |
keys: ["title", "author.firstName"], | |
useFuzzySearch: true, | |
threshold: 0.4, | |
includeScore: true, | |
}; | |
const search = new Search(list, searchOptions); | |
const searchPattern = "Da"; | |
const results = search.search(searchPattern); | |
console.log(JSON.stringify(results, null, 2)); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment