Created
July 7, 2021 19:05
-
-
Save MrDrMcCoy/3a1a714aa12a71eb17c69c00fbe8ed21 to your computer and use it in GitHub Desktop.
Scrape the-eye.eu using rclone
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
##### | |
# Scrape the-eye.eu using rclone | |
##### | |
trap 'exit 1' INT TERM KILL | |
rclone_opts=( | |
# --dry-run | |
--fast-list | |
--ignore-case | |
--log-file="rclone-eye.log" | |
--min-size="1" | |
--multi-thread-streams="0" | |
--progress | |
--retries-sleep="2s" | |
--retries="3" | |
--size-only | |
--tpslimit="2" | |
--transfers="1" | |
--use-mmap | |
--user-agent="eye02" | |
--verbose | |
## exclude filters | |
--filter="- **{astrology,freemason,horoscope,illuminati}**" | |
--filter="- **{conspiracy,gutenberg,occult}*/" | |
--filter="- **free*energy**" | |
--filter="- **index.html*" | |
--filter="- **mein*kamph**" | |
--filter="- **new*world*order**" | |
## include filters | |
--filter="+ **.[^.]*{css,htm,mht,svg,wiki,xml}[^.]*" | |
--filter="+ **.{7z,bz2,gz,img,iso,rar,t[bgx]z,tar,xz,zip,zipx,zst,z}" | |
--filter="+ **.{aif?,flac,mp3,m4a,og[ag],opus,wav,wma}" | |
--filter="+ **.{ass,idx,srt,sub}" | |
--filter="+ **.{aux,bbl,bib,blg,brf,bst,cls,dtx,fd,lat,tex}[^.]*" | |
--filter="+ **.{av,celtx,fcf,story,xav}" | |
--filter="+ **.{avi,divx,gifv,mp4,mov,mkv,ogv,webm,wmv}" | |
--filter="+ **.{az,chm,djv,eps,epub,ibook,fb2,hlp,lit,mdi,mobi,pdf,pml,[gp]s,tif,xps}[^.]*" | |
--filter="+ **.cb[rz]" | |
--filter="+ **.{bak,kml,man,notebook,rdf}" | |
--filter="+ **.{bbs,csv,faq,md,rst,txt}" | |
--filter="+ **.{bmp,gif,jp2,jpeg,jpg,png,webp}" | |
--filter="+ **.{db,mdb,mdt,pdb,sql}[^.]*" | |
--filter="+ **.{doc,od,page,ppt,rtf,wp,xls}[^.]*" | |
--filter="+ **.{bash,bat,js,py,r,rb,pl,sh}" | |
--filter="+ **.{ly,mus,mid,msc,mxl,sib}[^.]*" | |
--filter="+ **{changelog,install,license,notes,readme}" | |
## exclude everything else | |
--filter="- **" | |
) | |
while read -r host path ; do | |
[[ ${host} =~ ^\# ]] && continue # skip commented lines in below HEREDOC | |
echo "$(date) Syncing ${host}${path}..." | tee -a "rclone-eye.log" | |
mkdir -vp "${host}${path}" | |
rclone copy "${rclone_opts[@]}" \ | |
--http-url="https://${host}" ":http:${path}" "${host}${path}" | |
done <<EOF | |
## Add the hostnames and subpaths here, separated by a space. | |
## Lines beginning with '#' will be skipped. | |
## You may want to split large directories into their various subpaths to | |
## make resuming interrupted transfers easier. | |
the-eye.eu /hardwaretest/ | |
the-eye.eu /public/Books/ | |
the-eye.eu /public/Papers/ | |
the-eye.eu /public/Psychedelics/ | |
the-eye.eu /public/Psychoactives/ | |
the-eye.eu /public/Site-Dumps/RemoteCPU_Archive/ | |
the-eye.eu /public/Site-Dumps/adambibby.ca/ | |
the-eye.eu /public/Site-Dumps/library.uniteddiversity.coop/ | |
the-eye.eu /public/Site-Dumps/pssurvival.com/ | |
the-eye.eu /public/Site-Dumps/r0bin0705_8fzydz_share.nxtcloud.net/ | |
the-eye.eu /public/Site-Dumps/www.elaulademusica.com/ | |
the-eye.eu /public/Site-Dumps/www.jam-night.com/ | |
the-eye.eu /public/Site-Dumps/www.seabrite.com/ | |
the-eye.eu /public/Strategic Intelligence Network/ | |
the-eye.eu /public/WorldTracker.org/ | |
the-eye.eu /public/murdercube.com/ | |
EOF |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment