Last active
August 7, 2023 07:31
-
-
Save jfut/eab53d92a91ec8f3867df7f76c4e302a to your computer and use it in GitHub Desktop.
wget-mirror-adjust
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# | |
# Mirror the page without extensions as it is. | |
# | |
# Copyright (c) 2023 Jun Futagawa (jfut) | |
# | |
# This software is released under the MIT License. | |
# http://opensource.org/licenses/mit-license.php | |
set -uo pipefail | |
TARGET_URL="${1:-}" | |
if [[ -z "${TARGET_URL}" ]]; then | |
echo "Usage $0 TARGET_URL" | |
exit 1 | |
fi | |
FQDN="${TARGET_URL}" | |
# https://example.org/dir/ -> example.org/dir/ | |
FQDN="${FQDN#*//}" | |
# example.org/dir/ -> example.org | |
FQDN="${FQDN%*/}" | |
BASE_DIR="$(pwd)/${FQDN}" | |
echo "# FQDN: ${FQDN}" | |
echo "# BASE_DIR: ${BASE_DIR}" | |
echo "# Mirror without --adjust-extension option" | |
MIRROR_NO_ADJUST_DIR="./${FQDN}.noadjust" | |
wget -m -p -np --directory-prefix="${MIRROR_NO_ADJUST_DIR}" -q --show-progress "${TARGET_URL}" | |
cp -a "./${MIRROR_NO_ADJUST_DIR}/${FQDN}" "${FQDN}" | |
echo "# Mirror with --adjust-extension option" | |
MIRROR_ADJUST_DIR="./${FQDN}.adjust" | |
wget -m -p -E -np --directory-prefix="${MIRROR_ADJUST_DIR}" -q --show-progress "${TARGET_URL}" | |
echo | |
echo -n "# Target: " | |
pushd "${MIRROR_ADJUST_DIR}/${FQDN}" | |
# Adjust no extension pages | |
DIR_LIST=$(find . -type d) | |
for DIR_NAME in ${DIR_LIST} | |
do | |
echo "${DIR_NAME}" | |
PARENT_DIR="${BASE_DIR}/${DIR_NAME}" | |
if [[ -f "${PARENT_DIR}" ]]; then | |
rm "${PARENT_DIR}" -f | |
fi | |
if [[ ! -d "${PARENT_DIR}" ]]; then | |
mkdir -p "${PARENT_DIR}" | |
fi | |
NO_EXTENSION_FILE="${DIR_NAME}.html" | |
if [[ -f "${NO_EXTENSION_FILE}" ]]; then | |
# echo "NO_EXTENSION_FILE: ${NO_EXTENSION_FILE}" | |
INDEX_FILE=$(realpath "${BASE_DIR}/${DIR_NAME}/index.html") | |
# ./doc/user.html -> /path/to/example.org/example.org/doc/user/index.html | |
echo "${NO_EXTENSION_FILE} -> ${INDEX_FILE}" | |
yes | cp -a "${NO_EXTENSION_FILE}" "${INDEX_FILE}" | |
fi | |
done | |
popd | |
# Cleanup | |
rm "${MIRROR_NO_ADJUST_DIR}" -rf | |
rm "${MIRROR_ADJUST_DIR}" -rf | |
echo | |
echo "# Result: ${BASE_DIR}" |
Author
jfut
commented
Aug 7, 2023
- Example: https://example.org/doc/user
- Save to: /path/to/example.org/example.org/doc/user/index.html
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment