Created
January 19, 2020 16:08
-
-
Save Jekis/14c4cab60e87049c1c3b671381663ce3 to your computer and use it in GitHub Desktop.
Finds js code in the <script> tag.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* This is a regex builder. | |
* Final regular expression finds the js code in the <script> tag. | |
* It is complex because any js code will be found. | |
*/ | |
$jsCodeGroups = []; | |
// Match js comment. | |
$jsCodeGroups['comment-1'] = [ | |
'name' => 'c1', | |
'exp' => '//[^\n]*\n', | |
'match_chars' => '// Foo comment here', | |
]; | |
// Match js comment. | |
$jsCodeGroups['comment-2'] = [ | |
'name' => 'c2', | |
'exp' => '/\*.*?\*/', | |
'match_chars' => '/* Foo comment here */', | |
]; | |
// Match any character except single quote and double quote. | |
$jsCodeGroups['not-a-quote'] = [ | |
'name' => 'notq', | |
'exp' => '[^\'"]', // In regexp it must look like text: [^'"] | |
'match_chars' => 'var a = 1; alert(1);', | |
]; | |
// Match any js string wrapped in single quotes. | |
$jsCodeGroups['single-quotes-wrap'] = [ | |
'name' => 'sqw', | |
'exp' => "'(?:\\\\'|[^'])*'", // In regexp it must look like text: '(?:\\'|[^'])*' | |
'match_chars' => "'I\'m a foo string'", | |
]; | |
// Match any js string wrapped in double quotes. | |
$jsCodeGroups['double-quotes-wrap'] = [ | |
'name' => 'dqw', | |
'exp' => '"(?:\\\\"|[^"])*"', // In regexp it must look like text: "(?:\\"|[^"])*" | |
'match_chars' => '"Another \"foo string\""', | |
]; | |
// Match any new line. | |
$jsCodeGroups['new-line'] = [ | |
'name' => 'nl', | |
'exp' => '\n', // In regexp it must look like text: \n | |
'match_chars' => "\n", | |
]; | |
// Generate regular expression from all groups. | |
$jsCodeGroupsRegexps = []; | |
foreach ($jsCodeGroups as $gData) { | |
if (!empty($gData['name'])) { | |
$jsCodeGroupsRegexps[] = sprintf('(?<%s>%s)', $gData['name'], $gData['exp']); | |
} else { | |
$jsCodeGroupsRegexps[] = sprintf('(%s)', $gData['exp']); | |
} | |
} | |
$jsCodeGroupsRegexp = join('|', $jsCodeGroupsRegexps); | |
// And the final regular expression. | |
// TODO: <script> tag could have attributes. | |
$finalRegExp = sprintf('~<script>(?<js>(%s)*?)?</script>~si', $jsCodeGroupsRegexp); | |
// <script>(?<js>((?<c1>//[^\n]*\n)|(?<c2>/\*.*?\*/)|(?<notq>[^'"])|(?<sqw>'(?:\\'|[^'])*')|(?<dqw>"(?:\\"|[^"])*")|(?<nl>\n))*?)?</script> | |
$html = <<<HTML | |
<script> | |
/*start<!--*/ | |
var num = 1; | |
var singleQuote = 'foo <script> \'foo\' </script> foo'; | |
var doubleQuote = "foo <script> \"foo\" </script> foo"; | |
// foo <script> foo </script> foo | |
/* foo <script> foo </script> foo */ | |
/* | |
foo <script> foo </script> foo | |
*/ | |
alert(num); | |
/*-->end*/ | |
</script> | |
HTML; | |
printf("RegExp: %s\n", $finalRegExp); | |
if (preg_match($finalRegExp, $html, $matches)) { | |
if (isset($matches['js']) && strpos($matches['js'], 'start') && strpos($matches['js'], 'end')) { | |
echo 'Correct RegExp: js code found!'; | |
} else { | |
echo 'Wrong RegExp: found js code is incorrect.'; | |
} | |
} else { | |
echo 'Wrong RegExp: Nothing matched'; | |
} | |
exit; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment