Skip to content

Instantly share code, notes, and snippets.

@erikhansen
Forked from davidalger/warmer-cron.sh
Created August 4, 2020 16:54

Revisions

  1. @davidalger davidalger revised this gist Aug 4, 2020. 1 changed file with 42 additions and 0 deletions.
    42 changes: 42 additions & 0 deletions warmer-imgs.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,42 @@
    #!/bin/bash
    set -euo pipefail

    FRONT_URL="${FRONT_URL:-https://app.exampleproject.test/}"

    echo "==> [$(date +%H:%M:%S)] waiting on readiness"
    ELAPSED_SECONDS=0
    while : ; do
    ELAPSED_SECONDS=$(echo ${ELAPSED_SECONDS}+2 | bc)
    RESPONSE_CODE=$(curl -sI "${FRONT_URL}" 2>/dev/null | head -n1 | awk '{print $2}')
    if [ ${RESPONSE_CODE} -eq "200" ] || [ ${ELAPSED_SECONDS} -gt 1800 ]; then
    break;
    fi
    printf "."
    sleep 2
    done
    echo

    echo "==> [$(date +%H:%M:%S)] ${FRONT_URL}"
    URL_LIST="$(curl -s "${FRONT_URL}" | grep -Eo 'href="[^\"]+"' \
    | grep -Eo '(http|https)://[^#"]+' | grep .html | sort -n | uniq)"$'\n'

    for url in $(echo ${URL_LIST}); do
    echo "==> [$(date +%H:%M:%S)] ${url}"
    URL_LIST="${URL_LIST}$(curl -s "${url}" | grep -Eo 'href="[^\"]+"' \
    | grep -Eo '(http|https)://[^#"]+' | grep .html | grep -E '(\?p=|\?color=|^[^\?]+$)')"$'\n'
    done
    URL_LIST="$(echo "${URL_LIST}" | sort -n | uniq)"

    for url in $(echo ${URL_LIST}); do
    echo "==> [$(date +%H:%M:%S)] ${url}"
    IMG_LIST="$(curl -s "${url}" | grep -Eo 'src="[^\"]+"' \
    | grep -Eo '(http|https)://[^#"]+' | grep catalog/product/cache)"$'\n' || true

    for url in $(echo "${IMG_LIST}" | sort -n | uniq); do
    echo "==> [$(date +%H:%M:%S)] ${url}"
    curl -s ${url} >/dev/null || true &
    done
    wait
    done

    echo "==> [$(date +%H:%M:%S)] crawl complete"
  2. @davidalger davidalger created this gist Aug 4, 2020.
    34 changes: 34 additions & 0 deletions warmer-cron.sh
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,34 @@
    #!/bin/bash
    set -euo pipefail

    FRONT_URL="${FRONT_URL:-https://app.exampleproject.test/}"

    echo "==> [$(date +%H:%M:%S)] waiting on readiness"
    ELAPSED_SECONDS=0
    while : ; do
    ELAPSED_SECONDS=$(echo ${ELAPSED_SECONDS}+2 | bc)
    RESPONSE_CODE=$(curl -sI "${FRONT_URL}" 2>/dev/null | head -n1 | awk '{print $2}')
    if [ ${RESPONSE_CODE} -eq "200" ] || [ ${ELAPSED_SECONDS} -gt 1800 ]; then
    break;
    fi
    printf "."
    sleep 2
    done
    echo

    echo "==> [$(date +%H:%M:%S)] ${FRONT_URL}"
    URL_LIST="$(curl -s "${FRONT_URL}" | grep -Eo 'href="[^\"]+"' \
    | grep -Eo '(http|https)://[^#"]+' | grep .html | sort -n | uniq)"$'\n'

    for url in $(echo ${URL_LIST}); do
    echo "==> [$(date +%H:%M:%S)] ${url}"
    URL_LIST="${URL_LIST}$(curl -s "${url}" | grep -Eo 'href="[^\"]+"' \
    | grep -Eo '(http|https)://[^#"]+' | grep '.html$')"$'\n'
    done
    URL_LIST="$(echo "${URL_LIST}" | sort -n | uniq)"

    for url in $(echo ${URL_LIST}); do
    echo "==> [$(date +%H:%M:%S)] ${url}"
    curl -s ${url} >/dev/null || true
    done
    echo "==> [$(date +%H:%M:%S)] crawl complete"