Skip to content

Instantly share code, notes, and snippets.

@MrZoidberg
Created October 5, 2025 10:36
Show Gist options
  • Save MrZoidberg/217a4294dafe70162fb507a6100d874d to your computer and use it in GitHub Desktop.
Save MrZoidberg/217a4294dafe70162fb507a6100d874d to your computer and use it in GitHub Desktop.
Download big file in background
#!/usr/bin/env bash
# get_big.sh — resilient large-file downloader using wget
# Usage: ./get_big.sh <URL> [OUTPUT_DIR]
# Env (optional):
# LIMIT_RATE="10m" # throttle bandwidth (e.g., 5m, 2000k)
# SHA256="<expected_hash>" # verify integrity after download
set -euo pipefail
if [[ $# -lt 1 ]]; then
echo "Usage: $0 <URL> [OUTPUT_DIR]" >&2
exit 1
fi
URL="$1"
OUTDIR="${2:-$PWD}"
BASENAME="$(basename "${URL%%\?*}")" # strip querystring if present
OUTFILE="$OUTDIR/$BASENAME"
LOGFILE="$OUTDIR/wget-${BASENAME}.log"
mkdir -p "$OUTDIR"
require() {
command -v "$1" >/dev/null 2>&1 || { echo "Missing dependency: $1"; exit 1; }
}
require wget
require awk
require df
echo "==> URL: $URL"
echo "==> Output: $OUTFILE"
echo "==> Log: $LOGFILE"
# Try to get remote size (may be unavailable for some servers)
remote_size_bytes=""
if wget --spider -S --timeout=20 --tries=2 "$URL" 2> >(tee /dev/fd/3 >&2) 3>&2 | awk 'tolower($0) ~ /content-length/ {print $2; exit}' >/dev/null; then
remote_size_bytes="$(wget --spider -S --timeout=20 --tries=2 "$URL" 2>&1 \
| awk 'tolower($0) ~ /content-length/ {print $2; exit}')"
fi
# Check free space if we know the remote size
if [[ -n "$remote_size_bytes" ]]; then
free_bytes="$(df -Pk "$OUTDIR" | awk 'NR==2 {print $4*1024}')"
existing_bytes=0
[[ -f "$OUTFILE" ]] && existing_bytes="$(stat -c%s "$OUTFILE" 2>/dev/null || echo 0)"
remaining=$(( remote_size_bytes - existing_bytes ))
[[ $remaining -lt 0 ]] && remaining=0
echo "==> Remote size: $remote_size_bytes bytes"
echo "==> Free space: $free_bytes bytes"
echo "==> Remaining: $remaining bytes (after resume offset)"
if (( free_bytes < remaining + 104857600 )); then
echo "ERROR: Not enough free disk space (need remaining + 100MB buffer)." >&2
exit 1
fi
else
echo "==> Remote size unknown (server may not advertise Content-Length). Skipping space pre-check."
fi
# Build wget options
WGET_OPTS=(
"--continue" # resume partial downloads
"--timeout=30" # per-try timeout
"--tries=0" # infinite retries
"--waitretry=5" # wait between retries
"--read-timeout=30"
"--progress=dot:mega" # better logs in background
"--no-verbose"
"--show-progress" # shows progress if attached
"--retry-connrefused"
"--retry-on-host-error"
"--retry-on-http-error=429,500,502,503,504"
"--output-document=$OUTFILE"
)
# Optional rate limit
if [[ -n "${LIMIT_RATE:-}" ]]; then
WGET_OPTS+=("--limit-rate=$LIMIT_RATE")
echo "==> Rate limit: $LIMIT_RATE"
fi
# Start download in background that survives SSH logout
# Using 'nohup' + background (&) + 'disown' so it keeps running server-side.
echo "==> Starting download in background..."
(
umask 022
cd "$OUTDIR"
nohup wget "${WGET_OPTS[@]}" "$URL" >>"$LOGFILE" 2>&1 &
pid=$!
echo "$pid" > "$LOGFILE.pid"
disown "$pid" 2>/dev/null || true
echo "==> PID: $pid"
)
echo "==> Tail the log in real-time with:"
echo " tail -f '$LOGFILE'"
# Wait a moment to create the log and show the last few lines
sleep 1
[[ -f "$LOGFILE" ]] && { echo "==> Recent log lines:"; tail -n 10 "$LOGFILE"; }
# Optional integrity check if SHA256 provided
if [[ -n "${SHA256:-}" ]]; then
cat <<EOF
==> Integrity verification will be attempted when download completes.
Run this to verify later:
echo "$SHA256 $OUTFILE" | sha256sum -c -
EOF
fi
cat <<'EOF'
==> Notes
- The job runs independently of your SSH session (nohup + disown). You can safely log out.
- To resume after interruption, simply re-run this same script with the same arguments.
- Logs: check progress with `tail -f wget-<filename>.log`
- If you prefer an interactive session, you can also run inside tmux/screen.
EOF
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment