Last active
June 30, 2019 22:30
-
-
Save willvincent/c619e0c2d96476513f1db6c80ad61a59 to your computer and use it in GitHub Desktop.
Reformat a query string so it's safe to use with tsquery() in postgres
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const magic_terms = { | |
and: '&', | |
or: '|', | |
not: '!', | |
OR: '|', | |
AND: '&', | |
NOT: '!', | |
'&': '&', | |
'|': '|', | |
'!': '!', | |
'(': '(', | |
')': ')', | |
} | |
const magic_values = new Set(Object.values(magic_terms)) | |
module.exports = exports = term => { | |
let paren_count = 0 | |
const res = [] | |
const bits = term | |
.replace(/!/g, ' NOT ') // Handle negation attached to a word | |
.replace(/\|/g, ' OR ') // Handle pipes as or | |
.replace(/[:*|*]/g, '') // remove wildcards, they're added automagically | |
.split(new RegExp('((?:".*?")|[()])')) // split apart search term into bits | |
for (const bit of bits) { | |
if (!bit) continue | |
const split_bits = | |
bit.substr(0, 1) === '"' && bit.substr(-1) === '"' | |
? [bit] | |
: bit.trim().split(' ') | |
for (let bit of split_bits) { | |
if (!bit) continue | |
if (bit in magic_terms) { | |
bit = magic_terms[bit] | |
let last = res.length > 0 ? res[res.length - 1] : '' | |
if (bit === ')') { | |
if (last === '(') { | |
paren_count -= 1 | |
res.pop() | |
} | |
if ( | |
paren_count && | |
magic_values.has(last) && | |
last !== '(' && | |
last !== '!' | |
) { | |
res.pop() | |
} | |
} | |
if (['|', '&'].includes(bit)) { | |
while (['|', '&'].includes(last)) { | |
res.pop() | |
last = res[res.length - 1] | |
} | |
} | |
if (['|', '&'].includes(bit) && last === '!') { | |
continue | |
} | |
if (bit === ')') { | |
paren_count -= 1 | |
} | |
if (bit === '(') { | |
if (paren_count < 0) paren_count = 0 | |
paren_count += 1 | |
} | |
if ((bit === ')' && paren_count >= 0) || bit !== ')') { | |
res.push(bit) | |
} | |
if (bit === ')') { | |
res.push('&') | |
} | |
continue | |
} | |
bit = bit.replace(/'/g, '') | |
if (bit.substr(0, 1) === '"' && bit.substr(-1) === '"') { | |
res.push(bit.replace(/"/g, "'")) | |
if (!magic_values.has(res[res.length - 1])) { | |
res.push('&') | |
} | |
} else if (bit.substr(0, 1) === '<' && bit.substr(-1) === '>') { | |
if ( | |
magic_values.has(res[res.length - 1]) && | |
res[res.length - 1] !== ')' | |
) { | |
res.pop() | |
} | |
res.push(bit) | |
} else { | |
res.push(`'${bit.replace(/'/g, '')}':*`) | |
if (!magic_values.has(res[res.length - 1])) { | |
res.push('&') | |
} | |
} | |
} | |
} | |
while (res.length > 0 && magic_values.has(res[res.length - 1])) { | |
const last = res[res.length - 1] | |
if (last === ')') break | |
if (last === '(') paren_count -= 1 | |
res.pop() | |
} | |
while (paren_count > 0) { | |
res.push(')') | |
paren_count -= 1 | |
} | |
if (['|', '&'].includes(res[0])) { | |
res.shift() | |
} | |
if (['|', '&'].includes(res[res.length - 1])) { | |
res.pop() | |
} | |
return res.join(' ') | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Inspired by Wolever's python example.
Known-good test cases:
I removed a few test cases from wolever's examples, as I didn't see much value in escaping a literal
|
symbol, and didn't have much luck getting them to work.