Last active
June 23, 2018 15:29
-
-
Save pavelch/c4a087f2824b3eadab41acaa865a323d to your computer and use it in GitHub Desktop.
Please provide source file and destination path
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Execute script example ./log_to_list.sh ~/Downloads/NASA_access_log_Jul95 ~/temp | |
# prevent Illegal characters in sort | |
export LC_CTYPE=C | |
input_file=$1 | |
output_path=$2 | |
sorted_uniq_views=$2/sorted_unique_views | |
urls_broken_relocated=$2/urls_broken_relocated | |
echo $(awk 'BEGIN {ORS = "\n"; FS = " "} {print NF}' ${input_file} | sort | uniq) | |
echo $(awk 'BEGIN {ORS = "\n"; FS = " "} {print $(NF-1)}' ${input_file} | sort | uniq) | |
echo $(awk 'BEGIN {ORS = "\n"} $9 ~ /302|304|400|403|404|500|501/ {print $6}' ${input_file} | sort | uniq) | |
views=($(awk 'BEGIN {ORS = "\n"; FS = " "; digs = "[0-9]+"} ($(NF-1) == 200) && ($NF ~ digs) {print $1":"$NF}' ${input_file})) | |
unique_views=($(printf "%s\n" "${views[@]}" | awk 'BEGIN {ORS = "\n"; FS = ":"} {arr[$1]+=$2} END {for (i in arr) {print i":"arr[i]}}')) | |
IFS=$'\n' | |
sorted=($(printf "%s\n" "${unique_views[@]}" | sort -t : -k 2,2n)) | |
echo "#### Length views:${#views[@]} uniq:${#unique_views[@]} sorted:${#sorted[@]}" | |
printf "%s\n" "${sorted[@]}" > ${sorted_uniq_views} | |
echo -e "\n\n################ Uniq views" | |
tail -10 ${sorted_uniq_views} | |
broken=($(awk 'BEGIN {ORS = "\n"} $9 ~ /302|304|400|403|404|500|501/ {print $7}' ${input_file})) | |
printf "%s\n" "${broken[@]}" | sort | uniq > ${urls_broken_relocated} | |
echo -e "\n\n################ Broken or relocated" | |
tail -10 ${urls_broken_relocated} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment