Created
November 11, 2018 18:01
-
-
Save AppMkrATL/f6a6f511dca21dd9de28b3738f175a92 to your computer and use it in GitHub Desktop.
WGET Crawl Script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# wget --mirror --adjust-extension --page-requisites --execute robots=off --wait=30 --rand om-wait --convert-links --user-agent=Mozilla http://www.example.com | |
### V1 | |
# wget \ | |
# --recursive \ | |
# --no-clobber \ | |
# --page-requisites \ | |
# --html-extension \ | |
# --convert-links \ | |
# --restrict-file-names=windows \ | |
# --domains www.example.com \ | |
# --no-parent \ | |
# www.example.com | |
### V2 | |
# wget \ | |
# --recursive \ | |
# --no-clobber \ | |
# --page-requisites \ | |
# --html-extension \ | |
# --convert-links \ | |
# --execute robots=off \ | |
# --restrict-file-names=windows \ | |
# --domains www.example.com \ | |
# --no-parent \ | |
# www.example.com | |
# wget \ | |
# --user-agent='Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1 (KHTML, like Gecko) CriOS/52.0.2725.0 Mobile/13B143 Safari/601.1.46' \ | |
# --execute robots=off \ | |
# --recursive \ | |
# --mirror \ | |
# --wait=10 \ | |
# --random-wait \ | |
# www.example.com \ | |
# 2>&1 | grep '^--' | awk '{ print $3 }' | grep -v '\.\(css\|js\|png\|gif\|jpg\|JPG\)$' > www.example.com.txt | |
# wget \ | |
# --mirror \ | |
# --recursive \ | |
# --execute robots=off \ | |
# --user-agent='Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1 (KHTML, like Gecko) CriOS/52.0.2725.0 Mobile/13B143 Safari/601.1.46' \ | |
# --timestamping \ | |
# --page-requisites \ | |
# --html-extension \ | |
# --restrict-file-names=windows \ | |
# --wait=1 \ | |
# --random-wait \ | |
# --domains www.example.com \ | |
# --debug \ | |
# --output-file=sample.log \ | |
# --progress=dot \ | |
# --directory-prefix=sample \ | |
# www.example.com | |
# wget \ | |
# --mirror \ | |
# --recursive \ | |
# --execute robots=off \ | |
# --user-agent='Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1 (KHTML, like Gecko) CriOS/52.0.2725.0 Mobile/13B143 Safari/601.1.46' \ | |
# --timestamping \ | |
# --page-requisites \ | |
# --html-extension \ | |
# --restrict-file-names=windows \ | |
# --wait=1 \ | |
# --random-wait \ | |
# --domains www.example.com \ | |
# --progress=bar \ | |
# www.example.com | |
wget \ | |
--mirror \ | |
--recursive \ | |
--execute robots=off \ | |
--user-agent='Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2725.0 Safari/537.36' \ | |
--timestamping \ | |
--page-requisites \ | |
--html-extension \ | |
--restrict-file-names=windows \ | |
--random-wait \ | |
--convert-links \ | |
--domains www.example.com \ | |
www.example.com |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment