Created
August 22, 2025 19:36
-
-
Save filipeandre/1fd8ceec0ce95ac810fce0bd3bf53f7b to your computer and use it in GitHub Desktop.
Restore s3 bucket to previous version before today
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
set -euo pipefail | |
# Restore S3 objects updated today to their most recent version before today (Europe/Lisbon). | |
# Usage: | |
# ./restore.sh [--bucket BUCKET] [--yes] [--dry-run] | |
BUCKET="" | |
ASSUME_YES="false" | |
DRY_RUN="false" | |
while [[ $# -gt 0 ]]; do | |
case "$1" in | |
--bucket) BUCKET="$2"; shift 2;; | |
--yes|-y) ASSUME_YES="true"; shift;; | |
--dry-run) DRY_RUN="true"; shift;; | |
-h|--help) sed -n '1,120p' "$0"; exit 0;; | |
*) echo "Unknown arg: $1" >&2; exit 1;; | |
esac | |
done | |
for bin in aws jq date sort uniq; do | |
command -v "$bin" >/dev/null 2>&1 || { echo "Missing dependency: $bin" >&2; exit 1; } | |
done | |
START_TS=$(TZ="Europe/Lisbon" date -d "00:00 today" +%s) | |
END_TS=$(( START_TS + 86400 )) | |
echo "Bucket: $BUCKET" | |
echo "Lisbon start-of-today: $(TZ="Europe/Lisbon" date -d "@$START_TS") (epoch $START_TS)" | |
echo "Lisbon end-of-today: $(TZ="Europe/Lisbon" date -d "@$END_TS") (epoch $END_TS)" | |
echo | |
if [[ "$ASSUME_YES" != "true" ]]; then | |
read -r -p "Proceed to scan and restore objects in s3://$BUCKET? [y/N] " ans | |
[[ "$ans" =~ ^[Yy]$ ]] || { echo "Aborted."; exit 0; } | |
fi | |
workdir="$(mktemp -d)" | |
trap 'rm -rf "$workdir"' EXIT | |
keys_today_file="$workdir/keys_today.txt" | |
: > "$keys_today_file" | |
echo "Scanning bucket for objects touched today..." | |
NEXT_TOKEN="" | |
PAGE=1 | |
while :; do | |
if [[ -z "$NEXT_TOKEN" ]]; then | |
RESP=$(aws s3api list-object-versions --bucket "$BUCKET" --max-items 1000) | |
else | |
RESP=$(aws s3api list-object-versions --bucket "$BUCKET" --max-items 1000 --starting-token "$NEXT_TOKEN") | |
fi | |
# Output Key and LastModified as TSV for both Versions and DeleteMarkers | |
# (No date parsing in jq for jq 1.5 compatibility) | |
echo "$RESP" | jq -r ' | |
(.Versions // [])[] | [.Key, .LastModified] | @tsv | |
' >> "$workdir/versions.tsv" || true | |
echo "$RESP" | jq -r ' | |
(.DeleteMarkers // [])[] | [.Key, .LastModified] | @tsv | |
' >> "$workdir/deletemarkers.tsv" || true | |
NEXT_TOKEN=$(echo "$RESP" | jq -r '.NextToken // empty') | |
echo " Scanned page $PAGE..." | |
PAGE=$((PAGE+1)) | |
[[ -z "$NEXT_TOKEN" ]] && break | |
done | |
# Build the set of keys that had any activity today | |
touch "$workdir/versions.tsv" "$workdir/deletemarkers.tsv" | |
cat "$workdir/"*.tsv | while IFS=$'\t' read -r KEY LM; do | |
[[ -z "${KEY:-}" || -z "${LM:-}" ]] && continue | |
LM_EPOCH=$(date -d "$LM" +%s) | |
if (( LM_EPOCH >= START_TS && LM_EPOCH < END_TS )); then | |
printf '%s\n' "$KEY" | |
fi | |
done | sort -u > "$keys_today_file" | |
if [[ ! -s "$keys_today_file" ]]; then | |
echo "No objects were updated today. Nothing to do." | |
exit 0 | |
fi | |
echo | |
echo "Found $(wc -l < "$keys_today_file") keys touched today:" | |
head -n 20 "$keys_today_file" | |
[[ "$(wc -l < "$keys_today_file")" -gt 20 ]] && echo " ... (truncated)" | |
if [[ "$ASSUME_YES" != "true" ]]; then | |
read -r -p "Restore each to its most recent pre-today version? [y/N] " ans | |
[[ "$ans" =~ ^[Yy]$ ]] || { echo "Aborted."; exit 0; } | |
fi | |
echo | |
RESTORED=0 | |
SKIPPED=0 | |
FAILED=0 | |
while IFS= read -r KEY; do | |
echo "Processing key: $KEY" | |
VERS_JSON=$(aws s3api list-object-versions --bucket "$BUCKET" --prefix "$KEY") | |
# Select the latest OBJECT version (not delete marker) older than today | |
# We still avoid jq date parsing. Get all versions with their LastModified, then filter in bash. | |
# Extract as TSV: VersionId, LastModified | |
printf '%s\n' "$VERS_JSON" | jq -r --arg key "$KEY" ' | |
(.Versions // [])[] | select(.Key == $key) | [.VersionId, .LastModified] | @tsv | |
' > "$workdir/onekey.tsv" || true | |
BEST_VER="" | |
BEST_TS=0 | |
while IFS=$'\t' read -r VID LM; do | |
[[ -z "${VID:-}" || -z "${LM:-}" ]] && continue | |
LM_EPOCH=$(date -d "$LM" +%s) | |
if (( LM_EPOCH < START_TS && LM_EPOCH > BEST_TS )); then | |
BEST_TS=$LM_EPOCH | |
BEST_VER=$VID | |
fi | |
done < "$workdir/onekey.tsv" | |
if [[ -z "$BEST_VER" ]]; then | |
echo " No pre-today version found. Skipping." | |
SKIPPED=$((SKIPPED+1)) | |
continue | |
fi | |
echo " Found pre-today versionId=$BEST_VER (LastModified=$(date -d "@$BEST_TS"))" | |
if [[ "$DRY_RUN" == "true" ]]; then | |
echo " DRY-RUN: would restore by copying s3://$BUCKET/$KEY?versionId=$BEST_VER to current." | |
continue | |
fi | |
# Perform the restore via self-copy of the old version | |
if aws s3api copy-object \ | |
--bucket "$BUCKET" \ | |
--key "$KEY" \ | |
--copy-source "${BUCKET}/${KEY}?versionId=${BEST_VER}" \ | |
--metadata-directive COPY >/dev/null 2>&1; then | |
echo " Restored: s3://$BUCKET/$KEY (from versionId=$BEST_VER)" | |
RESTORED=$((RESTORED+1)) | |
else | |
echo " FAILED to restore s3://$BUCKET/$KEY" | |
FAILED=$((FAILED+1)) | |
fi | |
done < "$keys_today_file" | |
echo | |
echo "Done. Restored: $RESTORED, Skipped: $SKIPPED, Failed: $FAILED" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment