Skip to content

Instantly share code, notes, and snippets.

@umbertogriffo
Last active October 7, 2025 12:04
Show Gist options
  • Save umbertogriffo/1c38ba4b826e2dd2f1e69e4529b09cc2 to your computer and use it in GitHub Desktop.
Save umbertogriffo/1c38ba4b826e2dd2f1e69e4529b09cc2 to your computer and use it in GitHub Desktop.
Asynchronous File Downloader with httpx and aiofiles.
"""
This Python script demonstrates how to download multiple files asynchronously using the httpx library for HTTP requests
and aiofiles for asynchronous file operations.
"""
import asyncio
import tempfile
import time
from pathlib import Path
import aiofiles
import httpx
async def download_file(client: httpx.AsyncClient, url: str, dest_folder: Path) -> Path:
"""
Download a single file asynchronously.
Args:
client (httpx.AsyncClient): An instance of httpx.AsyncClient for making HTTP requests.
url (str): The URL of the file to download.
dest_folder (Path): The destination folder to save the downloaded file.
"""
async with client.stream("GET", url) as response:
response.raise_for_status()
size_in_mb = int(response.headers.get("content-length")) // (1024*1024)
num_bytes_downloaded = response.num_bytes_downloaded
file_name = Path(response.url.path).name
file_path = dest_folder / file_name
# Use an asynchronous file API instead of synchronous open()
async with aiofiles.open(file_path, 'wb') as f:
async for chunk in response.aiter_bytes(1024*1204): # 1 MB chunks
await f.write(chunk)
downloaded = (response.num_bytes_downloaded - num_bytes_downloaded) // (1024*1024)
print(f"{file_name} - "
f"Downloaded {downloaded} MB of "
f"{size_in_mb} "
f"MB from {url}")
return file_path
async def download_files(client: httpx.AsyncClient, urls: list[str], dest_folder: Path) -> list[Path]:
"""
Download multiple files asynchronously.
Args:
client (httpx.AsyncClient): An instance of httpx.AsyncClient for making HTTP requests.
urls (list[str]): List of file URLs to download.
dest_folder (Path): The destination folder to save the downloaded files.
Note:
- All downloads run concurrently (not sequential).
- httpx handles async streaming efficiently (no need to load the whole file into memory).
- Connection pooling reduces overhead of repeated connections.
"""
tasks = [download_file(client, url, dest_folder) for url in urls]
results = await asyncio.gather(*tasks)
return results
async def main(urls: list[str]):
# Create a temporary workspace for files, where processing them and then delete
# them automatically when the context manager exits.
with tempfile.TemporaryDirectory() as dest_folder:
async with httpx.AsyncClient() as client:
results = await download_files(client, urls, Path(dest_folder))
print({"saved_files": [str(p) for p in results]})
if __name__ == '__main__':
urls = [
"https://onlinetestcase.com/wp-content/uploads/2023/06/1.1-MB-1.jpg",
"https://onlinetestcase.com/wp-content/uploads/2023/06/2.1-MB-1-scaled.jpg",
"https://onlinetestcase.com/wp-content/uploads/2023/06/6.1-MB.jpg",
"https://onlinetestcase.com/wp-content/uploads/2023/06/7.2-MB.jpg",
]
start_time = time.time()
asyncio.run(main(urls))
took = time.time() - start_time
print(f"--- Took {took:.2f} seconds ---")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment