Last active
January 3, 2025 09:19
-
-
Save yatsu/861a3bbb0927f38bcc181a31718cdf73 to your computer and use it in GitHub Desktop.
Search PyPI with glob pattern and lists the latest versions with available extras
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
in/env python | |
# This Python script allows you to search for packages on PyPI with a glob pattern | |
# and lists only the latest N major, minor, and patch versions of each package. | |
# Package extras are also displayed if available. | |
import argparse | |
import fnmatch | |
import json | |
import os | |
import re | |
import signal | |
import sys | |
from collections import defaultdict | |
import requests | |
PYPI_BASE_URL = "https://pypi.org/simple/" | |
def handle_sigint(signal: int, frame) -> None: | |
"""Suppress traceback on Ctrl-C.""" | |
print("\nProcess interrupted. Exiting...") | |
sys.exit(0) | |
# Attach the signal handler for Ctrl-C | |
signal.signal(signal.SIGINT, handle_sigint) | |
def fetch_pypi_simple_index() -> list[str]: | |
"""Fetch the list of all packages from PyPI Simple Index.""" | |
try: | |
match = re.match(r"https://([^/]+)", PYPI_BASE_URL) | |
if not match: | |
raise ValueError(f"Invalid URL format: {PYPI_BASE_URL}") | |
hostname = match.group(1) | |
https_proxy = os.getenv("HTTPS_PROXY") | |
proxies = ( | |
{ | |
"http": https_proxy, | |
"https": https_proxy, | |
} | |
if https_proxy | |
else None | |
) | |
response = requests.get(PYPI_BASE_URL, proxies=proxies, timeout=10) | |
response.raise_for_status() | |
# Extract package names from <a> tags | |
packages = re.findall(r'<a href="[^"]+">([^<]+)</a>', response.text) | |
return packages | |
except requests.RequestException as e: | |
print(f"Error fetching PyPI index: {e}") | |
return [] | |
def get_package_info(package_name: str) -> dict[str, dict | str] | None: | |
"""Fetch package info from PyPI API.""" | |
url = f"https://pypi.org/pypi/{package_name}/json" | |
try: | |
match = re.match(r"https://([^/]+)", url) | |
if not match: | |
raise ValueError(f"Invalid URL format: {url}") | |
hostname = match.group(1) | |
https_proxy = os.getenv("HTTPS_PROXY") | |
proxies = {"https": https_proxy} if https_proxy else None | |
response = requests.get(url, proxies=proxies, timeout=10) | |
response.raise_for_status() | |
return response.json() | |
except requests.RequestException: | |
# Some packages may not be found even if they exist in the index | |
return None | |
def parse_package_info(data: dict[str, dict | str]) -> tuple[list[str], list[str]]: | |
"""Extract versions and extras from package data.""" | |
versions = list(data["releases"].keys()) # type: ignore | |
extras = data["info"].get("provides_extra", []) # type: ignore | |
return versions, extras | |
def filter_versions( | |
versions: list[str], | |
max_major_versions: int, | |
max_minor_versions: int, | |
max_patch_versions: int, | |
) -> list[str]: | |
"""Limit versions to the latest N major, minor, and patch versions.""" | |
major_dict: dict[str, dict[str, list[str]]] = defaultdict(lambda: defaultdict(list)) | |
for version in versions: | |
match = re.match(r"(\d+)\.(\d+)\.(\d+)", version) # Match major.minor.patch | |
if match: | |
major, minor, patch = match.groups() | |
major_dict[major][minor].append(version) | |
# Limit to the latest N major versions | |
latest_major_versions = sorted(major_dict.keys(), key=int, reverse=True)[ | |
:max_major_versions | |
] | |
# Limit to the latest N minor versions per major version | |
filtered_versions: list[str] = [] | |
for major in latest_major_versions: | |
latest_minor_versions = sorted(major_dict[major].keys(), key=int, reverse=True)[ | |
:max_minor_versions | |
] | |
for minor in latest_minor_versions: | |
# Limit to the latest N patch versions per minor version | |
latest_patch_versions = sorted( | |
major_dict[major][minor], key=str, reverse=True | |
)[:max_patch_versions] | |
filtered_versions.extend(latest_patch_versions) | |
return filtered_versions | |
def main(args: argparse.Namespace) -> None: | |
"""Main function to search, fetch, parse, and display package info.""" | |
all_packages = fetch_pypi_simple_index() | |
if not all_packages: | |
print("Failed to fetch PyPI package index.") | |
return | |
matching_packages = fnmatch.filter(all_packages, args.pattern) | |
if not matching_packages: | |
print(f"No packages found matching pattern: {args.pattern}") | |
return | |
for package_name in matching_packages: | |
data = get_package_info(package_name) | |
if data: | |
versions, extras = parse_package_info(data) | |
filtered_versions = filter_versions( | |
versions, | |
max_major_versions=args.major, | |
max_minor_versions=args.minor, | |
max_patch_versions=args.patch, | |
) | |
extras_str = f"[{', '.join(extras)}]" if extras else "[]" | |
print(f"{package_name}: {', '.join(filtered_versions)} {extras_str}") | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser( | |
description="Search PyPI for packages matching a glob pattern." | |
) | |
parser.add_argument( | |
"pattern", help="Glob pattern to match package names (e.g., '*redis*')." | |
) | |
parser.add_argument( | |
"--major", | |
type=int, | |
default=3, | |
help="Number of major versions to display (default: 3).", | |
) | |
parser.add_argument( | |
"--minor", | |
type=int, | |
default=3, | |
help="Number of minor versions per major version (default: 3).", | |
) | |
parser.add_argument( | |
"--patch", | |
type=int, | |
default=1, | |
help="Number of patch versions per minor version (default: 1).", | |
) | |
args = parser.parse_args() | |
main(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment