Created
January 15, 2025 23:18
-
-
Save chris/934bb42125f32335cc66afd53a98c272 to your computer and use it in GitHub Desktop.
Setup AWS Glue and Athena to query DynamoDB data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -e | |
# | |
# This script refreshes an Athena database for data from a DynamoDB table. It creates | |
# a DynamoDB S3 export of the DynamoDB table, then creates an AWS Glue crawler | |
# which when run, creates a Glue table. This can then be queried by Athena. | |
# Finally, it cleans up older exports and tables. | |
# | |
datestamp=$(date +%Y%m%d) | |
dynamodb_table_arn="arn:aws:dynamodb:us-east-2:YOURACCOUNT:table/YOURTABLENAME" | |
s3_bucket="my-dynamodb-exports" | |
s3_export_path="s3://${s3_bucket}/${datestamp}/AWSDynamoDB/" | |
glue_database_name="YOURTABLENAME-ddb-s3" | |
crawler_basename="My DynamoDB Crawler Name" | |
glue_table_prefix="YOURTABLENAME_${datestamp}_" | |
echo "Using new date stamp of: ${datestamp}" | |
echo "Starting DynamoDB S3 export." | |
# create a DynamoDB export to S3 and capture the export ARN | |
dynamo_export_arn=$(aws dynamodb export-table-to-point-in-time \ | |
--table-arn "${dynamodb_table_arn}" \ | |
--s3-bucket "${s3_bucket}" \ | |
--s3-prefix ${datestamp} \ | |
--export-format DYNAMODB_JSON \ | |
--export-type FULL_EXPORT \ | |
--s3-sse-algorithm AES256 \ | |
--output json \ | |
--no-cli-pager | jq -r '.ExportDescription.ExportArn') | |
echo "DynamoDB S3 Export ARN: ${dynamo_export_arn}" | |
echo "Waiting for DynamoDB S3 Export to complete (usually about 15-20 minutes)..." | |
# poll status to wait until done - run this periodically, until finds "COMPLETED" | |
# this usually takes about 15 minutes | |
while ! aws dynamodb describe-export --export-arn "${dynamo_export_arn}" | grep "COMPLETED"; do | |
echo -n "." | |
sleep 60 | |
done | |
echo "DynamoDB S3 Export completed" | |
# list the bucket to find the ID/full path we need for the next command | |
s3_export_id=$(aws s3 ls ${s3_export_path} | awk '{print $2}') | |
s3_path="${s3_export_path}${s3_export_id}data/" | |
echo "S3 Path for Glue Crawler: ${s3_path}" | |
# Create a new Glue table using a crawler - first create crawler, then run it to create the table | |
crawler_name="${crawler_basename} ${datestamp}" | |
aws glue create-crawler \ | |
--name "${crawler_name}" \ | |
--role arn:aws:iam::YOURACCOUNT:role/Glue-Dynamo-S3-Athena \ | |
--database-name "${glue_database_name}" \ | |
--table-prefix "${glue_table_prefix}" \ | |
--targets '{"S3Targets": [{"Path": "'${s3_path}'"}]}' \ | |
--no-cli-pager | |
echo "Created Glue crawler, name: ${crawler_name}" | |
aws glue start-crawler --name "${crawler_name}" | |
echo "Started Glue crawler - wait for this to finish before querying the table." | |
# | |
# clean up previous setup | |
# | |
# Find the previous S3 export and delete it - there should only be one, which this relies on | |
old_export=$(aws s3 ls s3://${s3_bucket}/ | grep -v ${datestamp} | awk '{print $2}') | |
# make sure old_export is not empty | |
if [ -z "$old_export" ]; then | |
echo "No old S3 export found, assuming nothing to clean up, exiting." | |
exit 0 | |
fi | |
echo "Deleting old S3 export: ${old_export}" | |
aws s3 rm "s3://${s3_bucket}/${old_export}" --recursive | |
# for the datestamp, remove the slash off the old_export dir name we just got | |
old_export_datestamp=${old_export%/} | |
# Delete older Glue crawler | |
aws glue delete-crawler --name "${crawler_basename} ${old_export_datestamp}" | |
echo "Deleted older Glue crawler." | |
# Delete older Glue table | |
aws glue delete-table --database-name ${glue_database_name} --name "naturedose_${old_export_datestamp}_data" | |
echo "Deleted older Glue table." | |
echo "Done - update your Athena queries to use the table: ${glue_table_prefix}data" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment