Last active
May 18, 2020 10:07
-
-
Save twooster/19505747decf4aa4659851d60ab9189d to your computer and use it in GitHub Desktop.
Download the S3 inventory for a bucket into a gigantic CSV
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -eo pipefail | |
test -z $AWS_PROFILE && echo "No AWS profile found in \$AWS_PROFILE" && exit 1 | |
BUCKET="$1" | |
if [[ -z "$BUCKET" ]] ; then | |
echo "You must specify the S3 bucket to download the inventory of as the first parameter" | |
exit 1 | |
fi | |
DATE="$2" | |
if [[ -z "$DATE" ]] ; then | |
echo "You must specify an inventory date as the second parameter, e.g. 2020-04-03" | |
exit 1 | |
fi | |
WORKSPACE="./workspace" | |
INVENTORY_CONFIGS="$( aws s3api list-bucket-inventory-configurations --bucket "$BUCKET" )" | |
OTHER_BUCKET="$( | |
echo "$INVENTORY_CONFIGS" | | |
jq '.InventoryConfigurationList[0].Destination.S3BucketDestination.Bucket | gsub("^arn:aws:s3:::"; "")' -r | |
)" | |
PREFIX="$( | |
echo "$INVENTORY_CONFIGS" | | |
jq --arg bucket "$BUCKET" '.InventoryConfigurationList[0] | "s3://\( .Destination.S3BucketDestination.Bucket | gsub("^arn:aws:s3:::"; "") )/\( .Destination.S3BucketDestination.Prefix )/\( $bucket )/\( .Id )"' -r | |
)" | |
echo "Using s3 prefix: $PREFIX" | |
echo "Clearing workspace..." | |
rm -vrf "$WORKSPACE" | |
mkdir -p "$WORKSPACE" | |
echo "Downloading manifest file..." | |
aws s3 cp "$PREFIX"/"$DATE"T00-00Z/manifest.json "$WORKSPACE/manifest.json" | |
DOWNLOAD_SIZE="$( cat "$WORKSPACE/manifest.json" | jq '( .files | reduce .[] as $i (0; . + $i.size) ) / 1000000' )" | |
DOWNLOAD_FILES=( $( cat "$WORKSPACE/manifest.json" | jq '.files[].key' -r ) ) | |
FULL_CSV="$WORKSPACE/files.csv" | |
TARGET_FILE="$WORKSPACE/partial.gz" | |
echo "Truncating $FULL_CSV" | |
echo -n > "$FULL_CSV" | |
echo "Downloading $DOWNLOAD_SIZE MB of gzipped inventory information..." | |
for file in "${DOWNLOAD_FILES[@]}" ; do | |
aws s3 cp "s3://$OTHER_BUCKET/$file" "$TARGET_FILE" | |
zcat "$TARGET_FILE" >> "$FULL_CSV" | |
rm "$TARGET_FILE" | |
done | |
echo "Done! Output in $FULL_CSV" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment