Last active
April 30, 2018 04:01
-
-
Save derek/8035740 to your computer and use it in GitHub Desktop.
Boyer–Moore–Horspool in JavaScript
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function boyer_moore_horspool(haystack, needle) { | |
var badMatchTable = {}; | |
var maxOffset = haystack.length - needle.length; | |
var offset = 0; | |
var last = needle.length - 1; | |
var scan; | |
// Generate the bad match table, which is the location of offsets | |
// to jump forward when a comparison fails | |
Array.prototype.forEach.call(needle, function (char, i) { | |
badMatchTable[char] = last - i; | |
}); | |
// Now look for the needle | |
while (offset <= maxOffset) { | |
// Search right-to-left, checking to see if the current offset at | |
// needle and haystack match. If they do, rewind 1, repeat, and if we | |
// eventually match the first character, return the offset. | |
for (scan=last; needle[scan] === haystack[scan+offset]; scan--) { | |
if (scan === 0) { | |
return offset; | |
} | |
} | |
offset += badMatchTable[haystack[offset + last]] || last; | |
} | |
return -1; | |
} | |
var stringLocation = boyer_moore_horspool('because sometimes algorithms are more fun than str.search()', 'algorithms'); | |
console.log(stringLocation); |
revised version for resolving some issues
- "babbbbbabb", "bbab" return -1
- needle.length <= 1 goes into an infinite loop
function boyer_moore_horspool(haystack, needle) {
var badMatchTable = {};
var maxOffset = haystack.length - needle.length;
var offset = 0;
var last = needle.length - 1;
var scan;
if (last < 0) return 0
// Generate the bad match table, which is the location of offsets
// to jump forward when a comparison fails
for (var i = 0; i < needle.length - 1; i++) {
badMatchTable[needle[i]] = last - i;
}
// Now look for the needle
while (offset <= maxOffset) {
// Search right-to-left, checking to see if the current offset at
// needle and haystack match. If they do, rewind 1, repeat, and if we
// eventually match the first character, return the offset.
for (scan=last; needle[scan] === haystack[scan+offset]; scan--) {
if (scan === 0) {
return offset;
}
}
offset += badMatchTable[haystack[offset + last]] || last || 1;
}
return -1;
}
If there is possibility of existence of needle string multiple times in haystack then how will this work?
this code is not active again?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
If
needle === ""
, this goes into an infinite loop; need an early return to prevent that edge case.