Created
September 8, 2021 14:57
-
-
Save AVGP/3b7bd11052399f3832684160071ee04f to your computer and use it in GitHub Desktop.
Experimental code to match robots.txt rules against a given path.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const patterns = [ | |
/* | |
'/path/yolo.png', | |
'/path', | |
'/something', | |
'/path/hello.png$', | |
'/path/hello$', | |
'/a*bc' | |
*/ | |
'/a*a' | |
]; | |
const path = '/ab' | |
//'/aabaabc'; // we need to get this working | |
//'/aabaabcebced'; // and this one too :D | |
//'/aabaabxcde'; // this should not work... | |
function doesRuleMatch(pattern, path) { | |
const patternChars = pattern.split(''); | |
// walk through the pattern, character by character... | |
for(let patternPos = 0; patternPos < patternChars.length; patternPos++) { | |
// if $ appears at the end of the pattern and the path length equals the pattern length, the pattern matches the path. | |
if(patternChars[patternPos] === '$' && patternPos === (patternChars.length - 1) && path.length == patternPos) { | |
return true; | |
} | |
// if a wildcard character appears in the pattern.. | |
if (patternChars[patternPos] === '*') { | |
// when the wildcard is the last character of the pattern, the pattern matches the path | |
if(patternPos == patternChars.length - 1) { | |
return true; | |
} | |
// skip to the next character in the pattern | |
const subPatternStartPos = patternPos + 1; | |
let subPatternPos = subPatternStartPos; | |
let subPatternChar = patternChars[subPatternPos]; | |
// find a position where it matches the path | |
for(let pathPos = patternPos; pathPos < path.length; pathPos++) { | |
// if the current path character is not the next pattern character and we're not looking at the first subpattern character, | |
// then reset to the first subpattern character and try this path character again. | |
// if the current path character is the next pattern character | |
// then increase the subpatternPos, update subPatternChar and if there are no more subpattern characters, return true. | |
if(path[pathPos] !== subPatternChar && subPatternPos > subPatternStartPos) { | |
pathPos--; // check this path character again | |
subPatternPos = subPatternStartPos; // set subPatternPos back to the first pattern position after the wildcard | |
subPatternChar = patternChars[subPatternPos]; // reset subPatternChar to the first pattern character after the wildcard | |
} else if(path[pathPos] === subPatternChar) { | |
subPatternPos++; | |
// if no more pattern characters are left, we know the pattern matches the path | |
if(subPatternPos == patternChars.length) { | |
return true; | |
} | |
subPatternChar = patternChars[subPatternPos]; // if there are pattern chars left over, we continue our search | |
} | |
} | |
} | |
else if(patternChars[patternPos] !== path[patternPos]) { | |
// if the pattern contains a different character than the path at the current position, the pattern doesn't match. | |
return false; | |
} | |
} | |
return true; | |
} | |
console.log(patterns.map((pattern) => doesRuleMatch(pattern, path))); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment