Last active
December 9, 2020 10:52
-
-
Save eusonlito/a2ac9d6ef1d92dd7c5df2708a326791f to your computer and use it in GitHub Desktop.
Download remote resources based on 404 errors on local Apache access log
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Copy this file into your project root | |
# Configure LOG path, DOMAIN and WEB folder (from project root) | |
# Set execution permissions with: chmod 755 404-download.sh | |
# View some page on your local environment | |
# Launch the script with ./404-download.sh | |
LOG="/var/log/apache2/access.log" | |
HOST="https://domain.com" | |
WEB="./public" | |
FILTER="\(jpeg\|jpg\|png\|gif\)" | |
LIMIT=500 | |
for url in $(grep ' 404 ' "$LOG" | grep "$FILTER" | grep -o 'GET [^ ]\+' | awk -F' ' '{print $2}' | uniq | tail -$LIMIT); do | |
if [ -f "$WEB$url" ]; then | |
continue | |
fi | |
dir="$(dirname $url)" | |
if [ ! -d "$dir" ]; then | |
install -d "$WEB$dir" | |
fi | |
echo "Downloading $HOST$url into $WEB$url" | |
curl --insecure --location "$HOST$url" -o "$WEB$url" 2>/dev/null | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment