Last active
May 12, 2025 06:38
-
-
Save jacky9813/b827788c76911ab0d8dea3e9c14e02f9 to your computer and use it in GitHub Desktop.
Azure Storage Account - Rehydrate all archived blobs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Requires Python 3.12 or later | |
import itertools | |
import typing as t | |
import azure.core.credentials | |
import azure.core.pipeline.transport | |
import azure.identity | |
import azure.mgmt.storage | |
import azure.mgmt.storage.models | |
import azure.storage.blob | |
TENANT_ID = "12345678-90ab-cdef-1234-567980abcdef" | |
SUBSCRIPTION_ID = "12345678-90ab-cdef-1234-567980abcdef" | |
RESOURCE_GROUP = "Example-RG" | |
BATCH_LIMIT = 256 | |
STORAGE_ACCOUNTS = { | |
"mystorageaccount": ["container1", "container2"] | |
} | |
def set_tier_batch_request( | |
account: str, | |
container: str, | |
blob_path: str, | |
new_tier: t.Literal["Hot", "Cool", "Cold", "Archive"] = "Hot", | |
snapshot: str | None = None, | |
version_id: str | None = None, | |
request_headers: dict[str, str] | None = None, | |
auth_setter: t.Callable | None = None | |
) -> azure.core.pipeline.transport.HttpRequest: | |
""" | |
To use blob batch API in azure.storage.blob, we have to create | |
instances of azure.core.pipeline.transport.HttpRequest. | |
The azure.storage.blob._generated.operations._blob_operations.build_set_tier_request | |
creates an instance of azure.core.rest.HttpRequest, which is not | |
compatible with the pipeline ones. | |
""" | |
request_path = f'/{container}/{blob_path}?comp=tier' | |
if snapshot: | |
request_path = f'{request_path}&snapshot={snapshot}' | |
if version_id: | |
request_path = f'{request_path}&versionid={version_id}' | |
return azure.core.pipeline.transport.HttpRequest( | |
"PUT", | |
# f'https://{account}.blob.core.windows.net{request_path}', | |
request_path, | |
headers = { | |
**(request_headers or {}), | |
"x-ms-access-tier": new_tier | |
} | |
) | |
def get_account_key( | |
credentials: azure.core.credentials.TokenCredential, | |
account: str | |
) -> azure.mgmt.storage.models.StorageAccountKey: | |
storage_client = azure.mgmt.storage.StorageManagementClient( | |
credentials, SUBSCRIPTION_ID | |
) | |
keys_response = storage_client.storage_accounts.list_keys( | |
resource_group_name=RESOURCE_GROUP, | |
account_name=account | |
) | |
return keys_response.keys[0] # type: ignore | |
def list_archived_blobs( | |
credentials: azure.core.credentials.TokenCredential, | |
account: str, | |
container: str, | |
with_access_key: bool = False | |
) -> t.Generator[azure.storage.blob.BlobProperties, None, None]: | |
if with_access_key: | |
container_client = azure.storage.blob.ContainerClient( | |
account_url=f'https://{account}.blob.core.windows.net', | |
container_name=container, | |
credential=get_account_key(credentials, account).value | |
) | |
else: | |
container_client = azure.storage.blob.ContainerClient( | |
account_url=f'https://{account}.blob.core.windows.net', | |
container_name=container, | |
credential=credentials | |
) | |
yield from [ | |
blob | |
for blob in container_client.list_blobs() | |
if blob.blob_tier == azure.storage.blob.StandardBlobTier.ARCHIVE | |
] | |
def main(): | |
az_cred = azure.identity.AzureCliCredential( | |
# subscription=SUBSCRIPTION_ID, | |
tenant_id=TENANT_ID | |
) | |
for account, container_list in STORAGE_ACCOUNTS.items(): | |
# Sending pipelined requests | |
blob_client = azure.storage.blob.BlobServiceClient( | |
f'https://{account}.blob.core.windows.net/', | |
credential=get_account_key(az_cred, account).value or az_cred | |
# Prioritizing storage account key, as ,for some reason, I can't | |
# use AzureCliCredential directly. | |
) | |
requests = [ | |
set_tier_batch_request( | |
account, container, archived_blob.name, | |
new_tier="Hot" | |
) | |
for container in container_list | |
for archived_blob in list_archived_blobs( | |
az_cred, account, container, True) | |
] | |
for batch in itertools.batched(requests, BATCH_LIMIT): | |
# I'm using an undocumented function that already supports | |
# blob batch API, which requires HttpRequest in the | |
# azure.core.pipeline.transport module. | |
batch_response = blob_client._batch_send(*batch) | |
for response in batch_response: | |
# The response.status_code should be "202 Accepted". | |
pass | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment