Skip to content

Instantly share code, notes, and snippets.

@almost
Created August 21, 2012 17:59

Revisions

  1. almost revised this gist Aug 24, 2012. 1 changed file with 2 additions and 1 deletion.
    3 changes: 2 additions & 1 deletion glacier.py
    Original file line number Diff line number Diff line change
    @@ -11,8 +11,9 @@
    # writer.write(someotherdata)
    # writer.close()
    # # Get the id of the newly created archive
    # archive_id = writer.get_archive_id()from boto.connection import AWSAuthConnection
    # archive_id = writer.get_archive_id()

    from boto.connection import AWSAuthConnection
    import urllib
    import hashlib
    import math
  2. almost revised this gist Aug 23, 2012. 1 changed file with 27 additions and 13 deletions.
    40 changes: 27 additions & 13 deletions glacier.py
    Original file line number Diff line number Diff line change
    @@ -11,10 +11,8 @@
    # writer.write(someotherdata)
    # writer.close()
    # # Get the id of the newly created archive
    # archive_id = writer.get_archive_id()
    # archive_id = writer.get_archive_id()from boto.connection import AWSAuthConnection


    from boto.connection import AWSAuthConnection
    import urllib
    import hashlib
    import math
    @@ -43,6 +41,13 @@ def _required_auth_capability(self):
    def get_vault(self, name):
    return GlacierVault(self, name)

    def make_request(self, method, path, headers=None, data='', host=None,
    auth_path=None, sender=None, override_num_retries=None):
    headers = headers or {}
    headers.setdefault("x-amz-glacier-version","2012-06-01")
    return super(GlacierConnection, self).make_request(method, path, headers, data, host, auth_path, sender, override_num_retries)


    class GlacierVault(object):
    def __init__(self, connection, name):
    self.connection = connection
    @@ -62,23 +67,35 @@ def retrieve_archive(self, archive, sns_topic=None, description=None):
    job.initiate()
    return job

    def make_request(self, method, extra_path, headers, data):
    def make_request(self, method, extra_path, headers=None, data=""):
    return self.connection.make_request(method, "/-/vaults/%s%s" % (urllib.quote(self.name),extra_path,), headers, data)

    def get_job(self, job_id):
    return GlacierJob(self, job_id=job_id)


    class GlacierJob(object):
    def __init__(self, vault, params):
    def __init__(self, vault, params=None, job_id=None):
    self.vault = vault
    self.params = params
    self.job_id = job_id

    def initiate(self):
    headers = {
    "x-amz-glacier-version": "2012-06-01",
    }
    response = self.vault.make_request("POST", "/jobs", headers, json.dumps(self.params))
    response = self.vault.make_request("POST", "/jobs", None, json.dumps(self.params))

    assert response.status == 202, "Start job expected 202 back (got %s): %r" % (response.status, response.read())

    response.read()

    self.job_id = response.getheader("x-amz-job-id")

    def get_output(self, range_from=None, range_to=None):
    headers = {}
    if range_from is not None or range_to is not None:
    assert range_from is not None and range_to is not None, "If you specify one of range_from or range_to you must specify the other"
    headers["Range"] = "bytes %d-%d" % (range_from, range_to)
    response = self.vault.make_request("GET", "/jobs/%s/output" % (urllib.quote(self.job_id),))
    assert response.status == 200, "Get output expects 200 responses (got %s): %r" % (response.status, response.read())
    return response

    def chunk_hashes(str):
    """
    @@ -122,7 +139,6 @@ def __init__(self, connection, vault, part_size=DEFAULT_PART_SIZE):
    self.connection = connection

    headers = {
    "x-amz-glacier-version": "2012-06-01",
    "x-amz-part-size": str(self.part_size)
    }
    response = self.connection.make_request(
    @@ -152,7 +168,6 @@ def send_part(self):
    self.tree_hashes.append(part_tree_hash)

    headers = {
    "x-amz-glacier-version": "2012-06-01",
    "Content-Range": "bytes %d-%d/*" % (self.uploaded_size, (self.uploaded_size+len(part))-1),
    "Content-Length": str(len(part)),
    "Content-Type": "application/octet-stream",
    @@ -186,7 +201,6 @@ def close(self):
    self.send_part()
    # Complete the multiplart glacier upload
    headers = {
    "x-amz-glacier-version": "2012-06-01",
    "x-amz-sha256-tree-hash": bytes_to_hex(tree_hash(self.tree_hashes)),
    "x-amz-archive-size": str(self.uploaded_size)
    }
  3. almost revised this gist Aug 21, 2012. 1 changed file with 3 additions and 1 deletion.
    4 changes: 3 additions & 1 deletion glacier.py
    Original file line number Diff line number Diff line change
    @@ -1,7 +1,9 @@
    # Thomas Parslow http://almostobsolete.net
    # Just a work in progress and adapted to what I need right now.
    # It does uploads (via a file-like object that you write to) and
    # I've started on downloads.
    # I've started on downloads. Needs the development version of Boto from Github.
    #
    # Example:
    #
    # glacierconn = GlacierConnection(AWS_ACCESS_KEY, AWS_SECRET_ACCESS_KEY)
    # writer = GlacierWriter(glacierconn, GLACIER_VAULT)
  4. almost created this gist Aug 21, 2012.
    204 changes: 204 additions & 0 deletions glacier.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,204 @@
    # Thomas Parslow http://almostobsolete.net
    # Just a work in progress and adapted to what I need right now.
    # It does uploads (via a file-like object that you write to) and
    # I've started on downloads.
    #
    # glacierconn = GlacierConnection(AWS_ACCESS_KEY, AWS_SECRET_ACCESS_KEY)
    # writer = GlacierWriter(glacierconn, GLACIER_VAULT)
    # writer.write(somedata)
    # writer.write(someotherdata)
    # writer.close()
    # # Get the id of the newly created archive
    # archive_id = writer.get_archive_id()


    from boto.connection import AWSAuthConnection
    import urllib
    import hashlib
    import math
    import json

    class GlacierConnection(AWSAuthConnection):

    def __init__(self, aws_access_key_id=None, aws_secret_access_key=None, region="us-east-1",
    is_secure=True, port=None, proxy=None, proxy_port=None,
    proxy_user=None, proxy_pass=None,
    host=None, debug=0, https_connection_factory=None,
    path='/', provider='aws', security_token=None,
    suppress_consec_slashes=True):
    if host is None:
    host = 'glacier.%s.amazonaws.com' % (region,)
    AWSAuthConnection.__init__(self, host,
    aws_access_key_id, aws_secret_access_key,
    is_secure, port, proxy, proxy_port, proxy_user, proxy_pass,
    debug=debug, https_connection_factory=https_connection_factory,
    path=path, provider=provider, security_token=security_token,
    suppress_consec_slashes=suppress_consec_slashes)

    def _required_auth_capability(self):
    return ["hmac-v4"]

    def get_vault(self, name):
    return GlacierVault(self, name)

    class GlacierVault(object):
    def __init__(self, connection, name):
    self.connection = connection
    self.name = name

    def retrieve_archive(self, archive, sns_topic=None, description=None):
    """
    Initiate a archive retrieval job to download the data from an
    archive.
    """
    params = {"Type": "archive-retrieval", "ArchiveId": archive}
    if sns_topic is not None:
    params["SNSTopic"] = sns_topic
    if description is not None:
    params["Description"] = description
    job = GlacierJob(self, params)
    job.initiate()
    return job

    def make_request(self, method, extra_path, headers, data):
    return self.connection.make_request(method, "/-/vaults/%s%s" % (urllib.quote(self.name),extra_path,), headers, data)


    class GlacierJob(object):
    def __init__(self, vault, params):
    self.vault = vault
    self.params = params

    def initiate(self):
    headers = {
    "x-amz-glacier-version": "2012-06-01",
    }
    response = self.vault.make_request("POST", "/jobs", headers, json.dumps(self.params))

    assert response.status == 202, "Start job expected 202 back (got %s): %r" % (response.status, response.read())


    def chunk_hashes(str):
    """
    Break up the byte-string into 1MB chunks and return sha256 hashes
    for each.
    """
    chunk = 1024*1024
    chunk_count = int(math.ceil(len(str)/float(chunk)))
    chunks = [str[i*chunk:(i+1)*chunk] for i in range(chunk_count)]
    return [hashlib.sha256(x).digest() for x in chunks]

    def tree_hash(hashes):
    """
    Given a hash of each 1MB chunk (from chunk_hashes) this will hash
    together adjacent hashes until it ends up with one big one. So a
    tree of hashes.
    """
    while len(hashes) > 1:
    hashes = [hashlib.sha256("".join(h[i:i+1])).digest() for i in range(i,2)]
    return hashes[0]

    def bytes_to_hex(str):
    return ''.join( [ "%02x" % ord( x ) for x in str] ).strip()

    class GlacierWriter(object):
    """
    Presents a file-like object for writing to a Amazon Glacier
    Archive. The data is written using the multi-part upload API.
    """
    DEFAULT_PART_SIZE = 128*1024*1024 #128MB
    def __init__(self, connection, vault, part_size=DEFAULT_PART_SIZE):
    self.part_size = part_size
    self.buffer_size = 0
    self.uploaded_size = 0
    self.buffer = []
    self.vault = vault
    self.tree_hashes = []
    self.archive_location = None
    self.closed = False

    self.connection = connection

    headers = {
    "x-amz-glacier-version": "2012-06-01",
    "x-amz-part-size": str(self.part_size)
    }
    response = self.connection.make_request(
    "POST",
    "/-/vaults/%s/multipart-uploads" % (urllib.quote(self.vault),),
    headers,
    "")
    assert response.status == 201, "Multipart-start should respond with a 201! (got %s): %r" % (response.status, response.read())
    response.read()
    self.upload_url = response.getheader("location")


    def send_part(self):
    buf = "".join(self.buffer)
    # Put back any data remaining over the part size into the
    # buffer
    if len(buf) < self.part_size:
    self.buffer = [buf[self.part_size:]]
    self.buffer_size = len(self.buffer[0])
    else:
    self.buffer = []
    self.buffer_size = 0
    # The part we will send
    part = buf[:self.part_size]
    # Create a request and sign it
    part_tree_hash = tree_hash(chunk_hashes(part))
    self.tree_hashes.append(part_tree_hash)

    headers = {
    "x-amz-glacier-version": "2012-06-01",
    "Content-Range": "bytes %d-%d/*" % (self.uploaded_size, (self.uploaded_size+len(part))-1),
    "Content-Length": str(len(part)),
    "Content-Type": "application/octet-stream",
    "x-amz-sha256-tree-hash": bytes_to_hex(part_tree_hash),
    "x-amz-content-sha256": hashlib.sha256(part).hexdigest()
    }

    response = self.connection.make_request(
    "PUT",
    self.upload_url,
    headers,
    part)

    assert response.status == 204, "Multipart upload part should respond with a 204! (got %s): %r" % (response.status, response.read())

    response.read()

    self.uploaded_size += len(part)

    def write(self, str):
    assert not self.closed, "Tried to write to a GlacierWriter that is already closed!"
    self.buffer.append(str)
    self.buffer_size += len(str)
    while self.buffer_size > self.part_size:
    self.send_part()

    def close(self):
    if self.closed:
    return
    if self.buffer_size > 0:
    self.send_part()
    # Complete the multiplart glacier upload
    headers = {
    "x-amz-glacier-version": "2012-06-01",
    "x-amz-sha256-tree-hash": bytes_to_hex(tree_hash(self.tree_hashes)),
    "x-amz-archive-size": str(self.uploaded_size)
    }
    response = self.connection.make_request(
    "POST",
    self.upload_url,
    headers,
    "")

    assert response.status == 201, "Multipart-complete should respond with a 201! (got %s): %r" % (response.status, response.read())
    response.read()
    self.archive_id = response.getheader("x-amz-archive-id")
    self.closed = True

    def get_archive_id(self):
    self.close()
    return self.archive_id