Skip to content

Instantly share code, notes, and snippets.

@crackcomm
Last active June 8, 2025 17:22
Show Gist options
  • Save crackcomm/6eb11ebaea3bedbb21b7a4089f4251df to your computer and use it in GitHub Desktop.
Save crackcomm/6eb11ebaea3bedbb21b7a4089f4251df to your computer and use it in GitHub Desktop.
import requests
import subprocess
import re
import argparse
import tarfile
import zipfile
import io
import os
from typing import List, Dict, Optional, Tuple, Any
# For Python < 3.11, tomllib is not available.
# We'll use the 'toml' package as a fallback.
try:
import tomllib
except ImportError:
try:
import toml as tomllib # type: ignore
print(
"Using third-party 'toml' library. For Python 3.11+, 'tomllib' is built-in."
)
except ImportError:
print("ERROR: 'tomllib' (Python 3.11+) or 'toml' (third-party) not found.")
print("Please install 'toml' if you are on Python < 3.11: pip install toml")
tomllib = None
# Mapping from PyPI requirement name (normalized) to Nixpkgs attribute name
# for python build systems.
PYPROJECT_NATIVE_BUILD_INPUT_MAP = {
"setuptools": "setuptools",
"setuptools-scm": "setuptools-scm",
"wheel": "wheel",
"flit-core": "flit-core",
"poetry-core": "poetry-core",
"hatchling": "hatchling",
"pdm-pep517": "pdm-pep517",
"maturin": "maturin",
"scikit-build-core": "scikit-build-core",
}
# Build tools whose version pins in pyproject.toml [build-system].requires
# will be automatically relaxed by generating a postPatch.
AUTO_RELAX_BUILD_TOOLS = ["setuptools", "wheel", "setuptools-scm"]
def fetch_pypi_data(package_name: str, version: Optional[str] = None) -> Optional[Dict]:
pypi_url = (
f"https://pypi.org/pypi/{package_name}/{version}/json"
if version
else f"https://pypi.org/pypi/{package_name}/json"
)
print(f"Fetching package metadata: {pypi_url}")
try:
response = requests.get(pypi_url, timeout=15)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
print(f"Error fetching PyPI data for '{package_name}': {e}")
return None
def get_sdist_info(pypi_data: Dict) -> Optional[Tuple[str, str]]:
sdist_info_dict = next(
(url for url in pypi_data.get("urls", []) if url.get("packagetype") == "sdist"),
None,
)
if not sdist_info_dict or not sdist_info_dict.get("url"):
print("Error: No sdist URL found in PyPI metadata.")
return None
return sdist_info_dict["url"], sdist_info_dict.get("filename", "Unknown_filename")
def get_sdist_sha256(sdist_url: str) -> Optional[str]:
print(f"Prefetching sdist from {sdist_url} to get its SHA256 hash...")
try:
process = subprocess.run(
["nix-prefetch-url", sdist_url],
capture_output=True,
text=True,
check=True,
timeout=300,
)
sha256 = process.stdout.strip()
if not (sha256 and len(sha256) == 52 and re.match(r"^[0-9a-z]+$", sha256)):
print(
f"Error: nix-prefetch-url returned an invalid SHA256 format: '{sha256}'"
)
return None
print(
f"Successfully fetched base32 NAR SHA256: {sha256} (for URL: {sdist_url})"
)
return sha256
except subprocess.CalledProcessError as e:
print(
f"Error running nix-prefetch-url: {e}\nCommand: {e.cmd}\nStderr: {e.stderr}"
)
return None
except subprocess.TimeoutExpired:
print(f"Error: nix-prefetch-url timed out for {sdist_url}")
return None
except FileNotFoundError:
print(
"Error: nix-prefetch-url command not found. Is Nix installed and in PATH?"
)
return None
def normalize_dep_name(dep_string: str) -> str:
name_with_specifiers = dep_string.split(";", 1)[0].strip()
delimiters_regex = r"\s*(?:[!=<>~]=?|\[|\()"
parts = re.split(delimiters_regex, name_with_specifiers, 1)
core_name = parts[0].strip()
normalized_name = core_name.lower().replace("_", "-").replace(".", "-")
normalized_name = re.sub(r"[^a-z0-9-]+", "", normalized_name)
normalized_name = re.sub(r"-+", "-", normalized_name).strip("-")
if not normalized_name:
placeholder_suffix = "".join(c if c.isalnum() else "_" for c in dep_string[:15])
return f"UNPARSED-DEP-{placeholder_suffix}"
return normalized_name
def inspect_sdist_for_build_system(
sdist_url: str, sdist_filename: str
) -> Tuple[str, List[str], str]:
"""
Downloads sdist, inspects for pyproject.toml, determines build system,
and generates postPatch commands if needed.
Returns (package_format, native_build_inputs, post_patch_script_string).
"""
if tomllib is None:
print(
"Warning: tomllib/toml not available. Cannot parse pyproject.toml. Assuming legacy setuptools build."
)
return "setuptools", ["setuptools", "wheel"], ""
print(f"Downloading sdist for inspection: {sdist_filename} from {sdist_url}")
try:
response = requests.get(sdist_url, timeout=60, stream=True)
response.raise_for_status()
sdist_content = response.content
except requests.RequestException as e:
print(f"Error downloading sdist for inspection: {e}")
return "setuptools", ["setuptools", "wheel"], ""
pyproject_toml_content = None
try:
# ... (archive extraction logic remains the same)
if sdist_filename.endswith((".tar.gz", ".tar.bz2", ".tgz", ".tbz2")):
with tarfile.open(fileobj=io.BytesIO(sdist_content), mode="r:*") as tar:
for member in tar.getmembers():
if member.isfile() and member.name.endswith("pyproject.toml"):
path_parts = os.path.normpath(member.name).split(os.sep)
if len(path_parts) == 2 and path_parts[1] == "pyproject.toml":
pyproject_toml_content = (
tar.extractfile(member).read().decode("utf-8")
) # type: ignore
break
elif sdist_filename.endswith(".zip"):
with zipfile.ZipFile(io.BytesIO(sdist_content), "r") as zip_f:
for member_name in zip_f.namelist():
if member_name.endswith("pyproject.toml"):
path_parts = os.path.normpath(member_name).split(os.sep)
if len(path_parts) == 2 and path_parts[1] == "pyproject.toml":
pyproject_toml_content = zip_f.read(member_name).decode(
"utf-8"
)
break
else:
print(
f"Unsupported sdist format: {sdist_filename}. Assuming legacy setuptools."
)
return "setuptools", ["setuptools", "wheel"], ""
except (tarfile.TarError, zipfile.BadZipFile, UnicodeDecodeError) as e:
print(f"Error processing sdist archive: {e}")
return "setuptools", ["setuptools", "wheel"], ""
if pyproject_toml_content:
print("Found pyproject.toml. Parsing for build system requirements.")
try:
toml_data = tomllib.loads(pyproject_toml_content)
build_system_info = toml_data.get("build-system", {})
original_requires = build_system_info.get("requires", [])
patch_tuples: List[Tuple[str, str]] = [] # (original_req, relaxed_req)
current_native_build_inputs_names: List[str] = []
if not original_requires:
print(
"pyproject.toml found, but no 'build-system.requires'. Defaulting to setuptools & wheel for nativeBuildInputs."
)
current_native_build_inputs_names = ["setuptools", "wheel"]
else:
for req_str in original_requires:
norm_req_name_only = normalize_dep_name(req_str)
if norm_req_name_only in AUTO_RELAX_BUILD_TOOLS:
is_pinned = (
bool(re.search(r"[=<>~!]", req_str))
and req_str.strip() != norm_req_name_only
)
if is_pinned:
print(
f"INFO: Found pinned build-time requirement: '{req_str}'."
)
print(
f" Will generate postPatch to relax it to '{norm_req_name_only}'."
)
patch_tuples.append((req_str, norm_req_name_only))
current_native_build_inputs_names.append(norm_req_name_only)
native_build_inputs_attrs: List[str] = []
for name in current_native_build_inputs_names:
if name in PYPROJECT_NATIVE_BUILD_INPUT_MAP:
attr = PYPROJECT_NATIVE_BUILD_INPUT_MAP[name]
if attr not in native_build_inputs_attrs:
native_build_inputs_attrs.append(attr)
# else: # Warning for unmapped build tools is good but can be noisy if they are not for Nix
# print(f"Note: Build system requirement '{name}' from pyproject.toml is not in our known map.")
has_specific_backend = any(
nbi_attr
in ["flit-core", "poetry-core", "hatchling", "pdm-pep517", "maturin"]
for nbi_attr in native_build_inputs_attrs
)
if not has_specific_backend:
if "setuptools" not in native_build_inputs_attrs:
native_build_inputs_attrs.append("setuptools")
if "wheel" not in native_build_inputs_attrs:
native_build_inputs_attrs.append("wheel")
native_build_inputs_attrs = sorted(list(set(native_build_inputs_attrs)))
if not native_build_inputs_attrs: # Should be covered by defaults
native_build_inputs_attrs = ["setuptools", "wheel"]
if original_requires:
print(
"Warning: 'build-system.requires' was present but no known tools mapped. Defaulting nativeBuildInputs."
)
post_patch_script = ""
if patch_tuples:
post_patch_script = " postPatch = ''\n"
for original_req, relaxed_req in patch_tuples:
original_req_escaped = original_req.replace(
"'", "'\\''"
) # Escape for shell
relaxed_req_escaped = relaxed_req.replace(
"'", "'\\''"
) # Escape for shell
post_patch_script += f" substituteInPlace pyproject.toml --replace '{original_req_escaped}' '{relaxed_req_escaped}'\n"
post_patch_script += " '';\n"
print(
f"INFO: Will add the following postPatch section to the Nix expression:\n{post_patch_script.strip()}"
)
print(
f"Determined nativeBuildInputs for pyproject.toml: {native_build_inputs_attrs}"
)
return "pyproject", native_build_inputs_attrs, post_patch_script.strip()
except Exception as e:
print(f"Error parsing pyproject.toml: {e}")
return "setuptools", ["setuptools", "wheel"], ""
else:
print(
"No pyproject.toml found at the sdist root. Assuming legacy setuptools build."
)
return "setuptools", ["setuptools", "wheel"], ""
def extract_dependencies(pypi_data: Dict) -> List[str]:
requires_dist = pypi_data.get("info", {}).get("requires_dist", [])
if not requires_dist:
return []
dependencies = set()
for dep_string in requires_dist:
if "extra ==" in dep_string:
continue
normalized = normalize_dep_name(dep_string)
if normalized and not normalized.startswith("UNPARSED-DEP-"):
dependencies.add(normalized)
return sorted(list(dependencies))
def map_license_to_nix(license_classifier: Optional[str]) -> str:
# ... (map_license_to_nix remains the same)
if not license_classifier:
return "licenses.unfree"
lc = license_classifier.lower()
if "mit" in lc:
return "licenses.mit"
if "apache software license version 2.0" in lc or "apache-2.0" in lc:
return "licenses.asl20"
if "mozilla public license 2.0" in lc or "mpl-2.0" in lc:
return "licenses.mpl20"
if "bsd 3-clause" in lc or "bsd-3-clause" in lc:
return "licenses.bsd3"
if "bsd 2-clause" in lc or "bsd-2-clause" in lc:
return "licenses.bsd2"
if (
"gnu general public license v3.0 or later" in lc
or "gpl-3.0-or-later" in lc
or "gplv3+" in lc
):
return "licenses.gpl3Plus"
if (
"gnu general public license v3.0 only" in lc
or "gpl-3.0-only" in lc
or "gplv3" in lc
):
return "licenses.gpl3Only"
if (
"gnu general public license v2.0 or later" in lc
or "gpl-2.0-or-later" in lc
or "gplv2+" in lc
):
return "licenses.gpl2Plus"
if (
"gnu general public license v2.0 only" in lc
or "gpl-2.0-only" in lc
or "gplv2" in lc
):
return "licenses.gpl2Only"
if (
"gnu lesser general public license v3.0 or later" in lc
or "lgpl-3.0-or-later" in lc
or "lgplv3+" in lc
):
return "licenses.lgpl3Plus"
if (
"gnu lesser general public license v3.0 only" in lc
or "lgpl-3.0-only" in lc
or "lgplv3" in lc
):
return "licenses.lgpl3Only"
if (
"gnu lesser general public license v2.1 or later" in lc
or "lgpl-2.1-or-later" in lc
or "lgpl21+" in lc
):
return "licenses.lgpl21Plus"
if (
"gnu lesser general public license v2.1 only" in lc
or "lgpl-2.1-only" in lc
or "lgpl21" in lc
):
return "licenses.lgpl21Only"
if "apache" in lc:
return "licenses.asl20"
if "bsd" in lc:
return "licenses.bsdOriginal"
if "gpl" in lc:
return "licenses.gpl3Plus"
if "lgpl" in lc:
return "licenses.lgpl3Plus"
if "mpl" in lc:
return "licenses.mpl20"
if "unlicense" in lc:
return "licenses.unlicense"
if "public domain" in lc:
return "licenses.publicDomain"
print(
f"Warning: Could not confidently map license: '{license_classifier}'. Defaulting to unfree."
)
return "licenses.unfree"
def get_license_from_classifiers(pypi_data: Dict) -> Optional[str]:
# ... (get_license_from_classifiers remains the same)
classifiers = pypi_data.get("info", {}).get("classifiers", [])
osi_approved_licenses = []
other_license_classifiers = []
for c in classifiers:
if c.startswith("License :: OSI Approved ::"):
osi_approved_licenses.append(c.split("::")[-1].strip())
elif "License ::" in c:
other_license_classifiers.append(c.split("::")[-1].strip())
if osi_approved_licenses:
for preferred in [
"MIT License",
"Apache Software License",
"Mozilla Public License 2.0",
"BSD License",
]:
if preferred in osi_approved_licenses:
return preferred
return osi_approved_licenses[0]
license_field = pypi_data.get("info", {}).get("license", "").strip()
if license_field:
normalized_license_field = license_field.lower()
if (
any(
kw in normalized_license_field
for kw in ["mit", "apache", "bsd", "gpl", "lgpl", "mpl", "unlicense"]
)
and len(normalized_license_field) < 30
and "::" not in license_field
):
print(f"Using license from 'info.license' field: '{license_field}'")
return license_field
if other_license_classifiers:
return other_license_classifiers[0]
if license_field and license_field != "UNKNOWN" and len(license_field) < 100:
print(
f"Using license from 'info.license' field as last resort: '{license_field}'"
)
return license_field
return None
def generate_nix_expression(
pname: str,
version: str,
sha256: str,
package_format: str,
native_build_inputs: List[str],
propagated_build_inputs: List[str],
nix_license_attr: str,
homepage: Optional[str],
summary: Optional[str],
post_patch_script: str, # New argument
) -> str:
native_bi_lines = ""
if native_build_inputs:
native_bi_lines = "nativeBuildInputs = [\n"
for dep in native_build_inputs:
native_bi_lines += f" {dep}\n"
native_bi_lines += " ];"
else:
native_bi_lines = "nativeBuildInputs = [];"
propagated_bi_lines = ""
if propagated_build_inputs:
propagated_bi_lines = "propagatedBuildInputs = [\n"
for dep in propagated_build_inputs:
propagated_bi_lines += f" {dep}\n"
propagated_bi_lines += " ];"
else:
propagated_bi_lines = "propagatedBuildInputs = [];"
escaped_summary = (
summary.replace('"', '\\"') if summary else f"Python package {pname}"
)
format_line = (
f'format = "{package_format}";'
if package_format and package_format != "setuptools"
else ""
)
expression_parts = [
"{ pself }:",
"",
"with pself;",
"buildPythonPackage rec {",
f' pname = "{pname}";',
f' version = "{version}";',
]
if format_line:
expression_parts.append(f" {format_line}")
expression_parts.extend(
[
"",
" src = fetchPypi {",
" inherit pname version;",
f' sha256 = "{sha256}";',
" };",
]
)
if post_patch_script: # Add postPatch if it exists and is not empty
expression_parts.append("") # Add a blank line before it
expression_parts.append(post_patch_script) # Add the script itself
expression_parts.extend(
[
"",
f" {native_bi_lines}",
"",
f" {propagated_bi_lines}",
"",
"",
" meta = {",
f' description = "{escaped_summary}";',
f' homepage = "{homepage or ""}";',
f" license = lib.{nix_license_attr};",
" };",
"}",
]
)
return "\n".join(expression_parts)
def main():
parser = argparse.ArgumentParser(
description="Generate a Nix expression for a Python package from PyPI."
)
parser.add_argument("pypi_package_name", help="Package name on PyPI.")
parser.add_argument(
"-v",
"--version",
help="Specific package version. Fetches latest if omitted.",
default=None,
)
parser.add_argument(
"-d",
"--output-dir",
help="Output directory for the Nix expression. Prints to stdout if omitted.",
default=None,
)
args = parser.parse_args()
if tomllib is None and not args.pypi_package_name.endswith(".py"):
print(
"Warning: 'tomllib' or 'toml' is not installed. Build system detection might be inaccurate."
)
pypi_name_for_lookup = args.pypi_package_name
nix_pname = normalize_dep_name(args.pypi_package_name)
print(
f"--- Generating Nix expression for: {pypi_name_for_lookup} (as {nix_pname} in Nix) ---"
)
pypi_data = fetch_pypi_data(pypi_name_for_lookup, args.version)
if not pypi_data:
return
actual_version = pypi_data["info"]["version"]
print(f"Using version: {actual_version}")
sdist_info = get_sdist_info(pypi_data)
if not sdist_info:
return
sdist_url, sdist_filename = sdist_info
print(f"Found sdist URL: {sdist_url}\nAssociated sdist filename: {sdist_filename}")
sha256 = get_sdist_sha256(sdist_url)
if not sha256:
return
package_format, native_build_inputs, post_patch_script = (
inspect_sdist_for_build_system(sdist_url, sdist_filename)
)
propagated_dependencies = extract_dependencies(pypi_data)
print(
f"Found runtime dependencies: {propagated_dependencies}"
if propagated_dependencies
else "No explicit runtime dependencies found."
)
raw_license_str = get_license_from_classifiers(pypi_data)
nix_license_attr = map_license_to_nix(raw_license_str)
print(
f"Raw license string: '{raw_license_str}'\nMapped to Nix license: lib.{nix_license_attr}"
)
homepage = pypi_data.get("info", {}).get("home_page")
summary = pypi_data.get("info", {}).get("summary")
nix_code = generate_nix_expression(
pname=nix_pname,
version=actual_version,
sha256=sha256,
package_format=package_format,
native_build_inputs=native_build_inputs,
propagated_build_inputs=propagated_dependencies,
nix_license_attr=nix_license_attr,
homepage=homepage,
summary=summary,
post_patch_script=post_patch_script,
)
if args.output_dir:
output_directory = args.output_dir
os.makedirs(output_directory, exist_ok=True)
output_file_path = os.path.join(output_directory, f"{nix_pname}.nix")
try:
with open(output_file_path, "w") as f:
f.write(nix_code)
print(f"\nSuccessfully wrote Nix expression to: {output_file_path}")
try:
print(f"Running nixfmt on {output_file_path}...")
fmt_proc = subprocess.run(
["nixfmt", output_file_path],
check=False,
capture_output=True,
text=True,
timeout=30,
)
if fmt_proc.returncode == 0:
print("nixfmt completed successfully.")
else:
print(
f"nixfmt failed or not found. RC: {fmt_proc.returncode}\nStderr: {fmt_proc.stderr.strip()}\nStdout: {fmt_proc.stdout.strip()}"
)
except FileNotFoundError:
print("nixfmt command not found. Skipping formatting.")
except subprocess.TimeoutExpired:
print("nixfmt timed out. Skipping formatting.")
except IOError as e:
print(
f"\nError writing to file {output_file_path}: {e}\n--- Generated Nix Expression (stdout fallback) ---\n{nix_code}"
)
else:
print("\n--- Generated Nix Expression ---\n" + nix_code)
print("\n>>> IMPORTANT: Review the generated Nix expression carefully! <<<")
# ... (final important messages remain the same)
effective_format_display = (
package_format
if package_format and package_format != "setuptools"
else "setuptools (default)"
)
print(f" - Package format: '{effective_format_display}'")
if post_patch_script:
print(
f" - A 'postPatch' section was automatically added to relax build tool requirements."
)
print(f" - Detected nativeBuildInputs: {native_build_inputs}")
print(f" - Detected propagatedBuildInputs: {propagated_dependencies}")
print(f" - Detected license: lib.{nix_license_attr} (from '{raw_license_str}')")
print(" - Verify all dependency names are correct for your Nix environment.")
print(
" - For packages with C extensions or other complex builds, you might need to add more to 'buildInputs' or 'nativeBuildInputs'."
)
print(" - Consider enabling 'doCheck = true;' and adding 'checkInputs'.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment