Revisions
-
davidalger revised this gist
Aug 4, 2020 . 1 changed file with 42 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,42 @@ #!/bin/bash set -euo pipefail FRONT_URL="${FRONT_URL:-https://app.exampleproject.test/}" echo "==> [$(date +%H:%M:%S)] waiting on readiness" ELAPSED_SECONDS=0 while : ; do ELAPSED_SECONDS=$(echo ${ELAPSED_SECONDS}+2 | bc) RESPONSE_CODE=$(curl -sI "${FRONT_URL}" 2>/dev/null | head -n1 | awk '{print $2}') if [ ${RESPONSE_CODE} -eq "200" ] || [ ${ELAPSED_SECONDS} -gt 1800 ]; then break; fi printf "." sleep 2 done echo echo "==> [$(date +%H:%M:%S)] ${FRONT_URL}" URL_LIST="$(curl -s "${FRONT_URL}" | grep -Eo 'href="[^\"]+"' \ | grep -Eo '(http|https)://[^#"]+' | grep .html | sort -n | uniq)"$'\n' for url in $(echo ${URL_LIST}); do echo "==> [$(date +%H:%M:%S)] ${url}" URL_LIST="${URL_LIST}$(curl -s "${url}" | grep -Eo 'href="[^\"]+"' \ | grep -Eo '(http|https)://[^#"]+' | grep .html | grep -E '(\?p=|\?color=|^[^\?]+$)')"$'\n' done URL_LIST="$(echo "${URL_LIST}" | sort -n | uniq)" for url in $(echo ${URL_LIST}); do echo "==> [$(date +%H:%M:%S)] ${url}" IMG_LIST="$(curl -s "${url}" | grep -Eo 'src="[^\"]+"' \ | grep -Eo '(http|https)://[^#"]+' | grep catalog/product/cache)"$'\n' || true for url in $(echo "${IMG_LIST}" | sort -n | uniq); do echo "==> [$(date +%H:%M:%S)] ${url}" curl -s ${url} >/dev/null || true & done wait done echo "==> [$(date +%H:%M:%S)] crawl complete" -
davidalger created this gist
Aug 4, 2020 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,34 @@ #!/bin/bash set -euo pipefail FRONT_URL="${FRONT_URL:-https://app.exampleproject.test/}" echo "==> [$(date +%H:%M:%S)] waiting on readiness" ELAPSED_SECONDS=0 while : ; do ELAPSED_SECONDS=$(echo ${ELAPSED_SECONDS}+2 | bc) RESPONSE_CODE=$(curl -sI "${FRONT_URL}" 2>/dev/null | head -n1 | awk '{print $2}') if [ ${RESPONSE_CODE} -eq "200" ] || [ ${ELAPSED_SECONDS} -gt 1800 ]; then break; fi printf "." sleep 2 done echo echo "==> [$(date +%H:%M:%S)] ${FRONT_URL}" URL_LIST="$(curl -s "${FRONT_URL}" | grep -Eo 'href="[^\"]+"' \ | grep -Eo '(http|https)://[^#"]+' | grep .html | sort -n | uniq)"$'\n' for url in $(echo ${URL_LIST}); do echo "==> [$(date +%H:%M:%S)] ${url}" URL_LIST="${URL_LIST}$(curl -s "${url}" | grep -Eo 'href="[^\"]+"' \ | grep -Eo '(http|https)://[^#"]+' | grep '.html$')"$'\n' done URL_LIST="$(echo "${URL_LIST}" | sort -n | uniq)" for url in $(echo ${URL_LIST}); do echo "==> [$(date +%H:%M:%S)] ${url}" curl -s ${url} >/dev/null || true done echo "==> [$(date +%H:%M:%S)] crawl complete"