Skip to content

Instantly share code, notes, and snippets.

@yatsu
Last active January 3, 2025 09:19
Show Gist options
  • Save yatsu/861a3bbb0927f38bcc181a31718cdf73 to your computer and use it in GitHub Desktop.
Save yatsu/861a3bbb0927f38bcc181a31718cdf73 to your computer and use it in GitHub Desktop.
Search PyPI with glob pattern and lists the latest versions with available extras
in/env python
# This Python script allows you to search for packages on PyPI with a glob pattern
# and lists only the latest N major, minor, and patch versions of each package.
# Package extras are also displayed if available.
import argparse
import fnmatch
import json
import os
import re
import signal
import sys
from collections import defaultdict
import requests
PYPI_BASE_URL = "https://pypi.org/simple/"
def handle_sigint(signal: int, frame) -> None:
"""Suppress traceback on Ctrl-C."""
print("\nProcess interrupted. Exiting...")
sys.exit(0)
# Attach the signal handler for Ctrl-C
signal.signal(signal.SIGINT, handle_sigint)
def fetch_pypi_simple_index() -> list[str]:
"""Fetch the list of all packages from PyPI Simple Index."""
try:
match = re.match(r"https://([^/]+)", PYPI_BASE_URL)
if not match:
raise ValueError(f"Invalid URL format: {PYPI_BASE_URL}")
hostname = match.group(1)
https_proxy = os.getenv("HTTPS_PROXY")
proxies = (
{
"http": https_proxy,
"https": https_proxy,
}
if https_proxy
else None
)
response = requests.get(PYPI_BASE_URL, proxies=proxies, timeout=10)
response.raise_for_status()
# Extract package names from <a> tags
packages = re.findall(r'<a href="[^"]+">([^<]+)</a>', response.text)
return packages
except requests.RequestException as e:
print(f"Error fetching PyPI index: {e}")
return []
def get_package_info(package_name: str) -> dict[str, dict | str] | None:
"""Fetch package info from PyPI API."""
url = f"https://pypi.org/pypi/{package_name}/json"
try:
match = re.match(r"https://([^/]+)", url)
if not match:
raise ValueError(f"Invalid URL format: {url}")
hostname = match.group(1)
https_proxy = os.getenv("HTTPS_PROXY")
proxies = {"https": https_proxy} if https_proxy else None
response = requests.get(url, proxies=proxies, timeout=10)
response.raise_for_status()
return response.json()
except requests.RequestException:
# Some packages may not be found even if they exist in the index
return None
def parse_package_info(data: dict[str, dict | str]) -> tuple[list[str], list[str]]:
"""Extract versions and extras from package data."""
versions = list(data["releases"].keys()) # type: ignore
extras = data["info"].get("provides_extra", []) # type: ignore
return versions, extras
def filter_versions(
versions: list[str],
max_major_versions: int,
max_minor_versions: int,
max_patch_versions: int,
) -> list[str]:
"""Limit versions to the latest N major, minor, and patch versions."""
major_dict: dict[str, dict[str, list[str]]] = defaultdict(lambda: defaultdict(list))
for version in versions:
match = re.match(r"(\d+)\.(\d+)\.(\d+)", version) # Match major.minor.patch
if match:
major, minor, patch = match.groups()
major_dict[major][minor].append(version)
# Limit to the latest N major versions
latest_major_versions = sorted(major_dict.keys(), key=int, reverse=True)[
:max_major_versions
]
# Limit to the latest N minor versions per major version
filtered_versions: list[str] = []
for major in latest_major_versions:
latest_minor_versions = sorted(major_dict[major].keys(), key=int, reverse=True)[
:max_minor_versions
]
for minor in latest_minor_versions:
# Limit to the latest N patch versions per minor version
latest_patch_versions = sorted(
major_dict[major][minor], key=str, reverse=True
)[:max_patch_versions]
filtered_versions.extend(latest_patch_versions)
return filtered_versions
def main(args: argparse.Namespace) -> None:
"""Main function to search, fetch, parse, and display package info."""
all_packages = fetch_pypi_simple_index()
if not all_packages:
print("Failed to fetch PyPI package index.")
return
matching_packages = fnmatch.filter(all_packages, args.pattern)
if not matching_packages:
print(f"No packages found matching pattern: {args.pattern}")
return
for package_name in matching_packages:
data = get_package_info(package_name)
if data:
versions, extras = parse_package_info(data)
filtered_versions = filter_versions(
versions,
max_major_versions=args.major,
max_minor_versions=args.minor,
max_patch_versions=args.patch,
)
extras_str = f"[{', '.join(extras)}]" if extras else "[]"
print(f"{package_name}: {', '.join(filtered_versions)} {extras_str}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Search PyPI for packages matching a glob pattern."
)
parser.add_argument(
"pattern", help="Glob pattern to match package names (e.g., '*redis*')."
)
parser.add_argument(
"--major",
type=int,
default=3,
help="Number of major versions to display (default: 3).",
)
parser.add_argument(
"--minor",
type=int,
default=3,
help="Number of minor versions per major version (default: 3).",
)
parser.add_argument(
"--patch",
type=int,
default=1,
help="Number of patch versions per minor version (default: 1).",
)
args = parser.parse_args()
main(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment