Last active
October 20, 2020 16:53
-
-
Save vp777/1e6d5520c49cc4ca3d84e59c8a3296e6 to your computer and use it in GitHub Desktop.
A small script that attempts to identify when an http resource was last modified. It achieves that by either using the Last-Modified response header or through binary search on the If-Modified-Since/If-Unmodified-Since request headers.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#not sure how useful If-Modified-Since/If-Unmodified-Since would be if Last-Modified header is not available:) | |
resource=${1?Missing the URL of the resource} | |
accuracy=${2:-2} #by default, tries to identify the first 2 most significant metrics, the year and month | |
function replacer { | |
local result i pattern replacement | |
result=$1 | |
for i in `seq 2 2 $#`;do | |
pattern=$i | |
replacement=$((i+1)) | |
result=${result/"${!pattern}"/"${!replacement}"} | |
done | |
printf %s "$result" | |
} | |
function test_ifmod_since { | |
[[ $1 -eq $mod_resp_code_ifmod ]] && echo 1 || echo 0 | |
} | |
function test_ifunmod_since { | |
[[ $1 -eq $mod_resp_code_ifunmod ]] && echo 0 || echo 1 | |
} | |
user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36" | |
last_modified=$(curl -H "User-Agent: $user_agent" -o /dev/null "$resource" -v |& grep 'Last-Modified:') | |
if [[ ! -z $last_modified ]]; then | |
last_modified=${last_modified#*, } | |
day=${last_modified%% *} | |
last_modified=${last_modified#* } | |
month=${last_modified%% *} | |
last_modified=${last_modified#* } | |
year=${last_modified%% *} | |
echo "$year $month $day LM" | |
exit | |
fi | |
old_date="Sat, 1 Jan 2000 00:00:00 GMT" | |
read DAY YYYY MON DD <<<$(date +'%a %Y %b %d' -d '1 day ago') | |
current_date="$DAY, $DD $MON $YYYY 00:00:00 GMT" | |
base_resp_code_ifmod=$(curl -H "User-Agent: $user_agent" -s -o /dev/null -w "%{http_code}" -H "If-Modified-Since: ${old_date}" "$resource") | |
mod_resp_code_ifmod=$(curl -H "User-Agent: $user_agent" -s -o /dev/null -w "%{http_code}" -H "If-Modified-Since: ${current_date}" "$resource") | |
[[ $base_resp_code_ifmod -ne $mod_resp_code_ifmod ]] && { | |
echo "If-Modified-Since: supported ($base_resp_code_ifmod,$mod_resp_code_ifmod)" | |
testing_func=test_ifmod_since | |
header="If-Modified-Since" | |
} | |
base_resp_code_ifunmod=$(curl -H "User-Agent: $user_agent" -s -o /dev/null -w "%{http_code}" -H "If-Unmodified-Since: ${current_date}" "$resource") | |
mod_resp_code_ifunmod=$(curl -H "User-Agent: $user_agent" -s -o /dev/null -w "%{http_code}" -H "If-Unmodified-Since: ${old_date}" "$resource") | |
[[ $base_resp_code_ifunmod -ne $mod_resp_code_ifunmod ]] && { | |
echo "If-Unmodified-Since: supported ($base_resp_code_ifunmod,$mod_resp_code_ifunmod)" | |
testing_func=test_ifunmod_since | |
header="If-Unmodified-Since" | |
} | |
[[ -z $testing_func ]] && { | |
echo "Non of the headers appear to be supported, trying with If-Modified-Since" | |
testing_func=test_ifmod_since | |
header="If-Modified-Since" | |
} | |
header_template="$header: Mon, %DAY% %MONTH% %YEAR% %HOUR%:%MIN%:%SEC% GMT" | |
sec=({00..59}) | |
min=({00..59}) | |
hour=({00..23}) | |
days=({01..31}) | |
months=(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec) | |
years=($(eval echo {2000..$(date +"%Y")})) | |
var_iter_order=(years months days hour min sec) | |
pattern_iter_order=(%YEAR% %MONTH% %DAY% %HOUR% %MIN% %SEC%) | |
declare -A replacements=([${var_iter_order[0]}]=2020 [${var_iter_order[1]}]=Jan [${var_iter_order[2]}]=1) | |
for i in `seq 3 5`;do | |
replacements[${var_iter_order[$i]}]="00" | |
done | |
for i in `seq $accuracy 1 $((${#var_iter_order[@]}-1))`;do | |
current_var_name=${var_iter_order[$i]} | |
header_template=$(replacer "$header_template" "${pattern_iter_order[i]}" "${replacements[$current_var_name]}") | |
unset replacements[$current_var_name] | |
done | |
var_iter_order=("${var_iter_order[@]:0:$accuracy}") | |
pattern_iter_order=("${pattern_iter_order[@]:0:$accuracy}") | |
for ((i=0;i<${#var_iter_order[@]};i++));do | |
current_var_name=${var_iter_order[i]} | |
current_var=${current_var_name}[@] | |
current_var=("${!current_var}") | |
pre_ifmod_header=${header_template} | |
for j in `seq 1 1 $((${#var_iter_order[@]}-1))`;do | |
((relative_j=(i+j)%${#var_iter_order[@]})) | |
replacement_var=${var_iter_order[relative_j]} | |
pre_ifmod_header=$(replacer "$pre_ifmod_header" "${pattern_iter_order[relative_j]}" "${replacements[$replacement_var]}") | |
done | |
left=0 | |
right=$((${#current_var[@]}-1)) | |
while [[ $left -le $right ]];do | |
((middle=(left+right)/2)) | |
current_var_element=${current_var[middle]} | |
ifmod_header=$(replacer "$pre_ifmod_header" "${pattern_iter_order[i]}" "$current_var_element") | |
resp_code=$(curl -H "User-Agent: $user_agent" -s -o /dev/null -w "%{http_code}" -H "${ifmod_header}" "$resource") | |
if [[ $($testing_func $resp_code) == 1 ]]; then | |
right=$((middle-1)) | |
else | |
left=$((middle+1)) | |
fi | |
done | |
replacements[${current_var_name}]=${current_var[$((right>=0?right:0))]} | |
done | |
for var_name in "${var_iter_order[@]}";do | |
printf "${replacements[$var_name]} " | |
done | |
[[ $testing_func == test_ifmod_since ]] && printf IM || printf UM |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment