-
-
Save edwardsharp/d501af263728eceb361ebba80d7fe324 to your computer and use it in GitHub Desktop.
| require 'aws-sdk' | |
| class BucketSyncService | |
| attr_reader :from_bucket, :to_bucket, :logger | |
| attr_accessor :debug | |
| DEFAULT_ACL = "public-read" | |
| def initialize(from_bucket, to_bucket) | |
| @from_bucket = bucket_from_credentials(from_bucket) | |
| @to_bucket = bucket_from_credentials(to_bucket) | |
| end | |
| def perform(output=STDOUT) | |
| object_counts = {sync:0, skip:0} | |
| create_logger(output) | |
| logger.info "Starting sync." | |
| from_bucket.objects.each do |object| | |
| if object_needs_syncing?(object) | |
| sync(object) | |
| object_counts[:sync] += 1 | |
| else | |
| logger.debug "Skipped #{pp object}" | |
| object_counts[:skip] += 1 | |
| end | |
| end | |
| logger.info "Done. Synced #{object_counts[:sync]}, " + | |
| "skipped #{object_counts[:skip]}." | |
| end | |
| private | |
| def create_logger(output) | |
| @logger = Logger.new(output).tap do |l| | |
| l.level = debug ? Logger::DEBUG : Logger::INFO | |
| end | |
| end | |
| def sync(object) | |
| logger.debug "Syncing #{pp object}" | |
| to_bucket.object(object.key).copy_from(copy_source: "#{object.bucket_name}/#{object.key}", acl: DEFAULT_ACL) | |
| end | |
| def pp(object) | |
| content_length_in_kb = object.content_length / 1024 | |
| "#{object.key} #{content_length_in_kb}k " + | |
| "#{object.last_modified.strftime("%b %d %Y %H:%M")}" | |
| end | |
| def object_needs_syncing?(object) | |
| to_object = to_bucket.object(object.key) | |
| return true if !to_object.exists? | |
| return to_object.etag != object.etag | |
| end | |
| def bucket_from_credentials(bckt) | |
| bucket = Aws::S3::Bucket.new(bckt) | |
| unless bucket.exists? | |
| bucket = s3.bucket.create(bckt) | |
| end | |
| bucket | |
| end | |
| end | |
| =begin | |
| Example usage: | |
| Aws.config.update({ | |
| region: 'aws_region', | |
| credentials: Aws::Credentials.new('aws_access_key_id', 'aws_secret_access_key'), | |
| }) | |
| require "bucket_sync_service.rb" | |
| syncer = BucketSyncService.new("from-bucket", "to-bucket") | |
| syncer.debug = true # log each object | |
| syncer.perform | |
| =end |
Really helpful! It raises Aws::S3::Errors::NoSuchKey when the key includes non-ASCII characters like é, ü etc. though - have you found any way around that?
@Sprachprofi, hmm yeah you might try to CGI.escape the object.key in the sync method. but then you'd need to go back and un-escape everything. i'm not sure if the error you mention is getting raised from the source or destination key.
perhaps newer versions of the aws-sdk gem support this? do you run into encoding problems for non-ASCII keys in other places? there might not be good support for that and you might need to consider not using them in your bucket keys?? i dunno tho, i don't use AWS much these days.
Thanks @edwardsharp . how to handle if the source bucket and destination bucket has different credentials (access key & secret key)
Thanks @edwardsharp . how to handle if the source bucket and destination bucket has different credentials (access key & secret key)
i guess you could just define two different methods that wrap a call to AWS::S3.new, like how bucket_from_credentials does
Absolutely fantastic work, thank you @edwardsharp ! Confirmed working as of 6/23/17 using version 3.0.0.rc8 of aws-sdk