Last active
June 8, 2025 17:22
-
-
Save crackcomm/6eb11ebaea3bedbb21b7a4089f4251df to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import subprocess | |
import re | |
import argparse | |
import tarfile | |
import zipfile | |
import io | |
import os | |
from typing import List, Dict, Optional, Tuple, Any | |
# For Python < 3.11, tomllib is not available. | |
# We'll use the 'toml' package as a fallback. | |
try: | |
import tomllib | |
except ImportError: | |
try: | |
import toml as tomllib # type: ignore | |
print( | |
"Using third-party 'toml' library. For Python 3.11+, 'tomllib' is built-in." | |
) | |
except ImportError: | |
print("ERROR: 'tomllib' (Python 3.11+) or 'toml' (third-party) not found.") | |
print("Please install 'toml' if you are on Python < 3.11: pip install toml") | |
tomllib = None | |
# Mapping from PyPI requirement name (normalized) to Nixpkgs attribute name | |
# for python build systems. | |
PYPROJECT_NATIVE_BUILD_INPUT_MAP = { | |
"setuptools": "setuptools", | |
"setuptools-scm": "setuptools-scm", | |
"wheel": "wheel", | |
"flit-core": "flit-core", | |
"poetry-core": "poetry-core", | |
"hatchling": "hatchling", | |
"pdm-pep517": "pdm-pep517", | |
"maturin": "maturin", | |
"scikit-build-core": "scikit-build-core", | |
} | |
# Build tools whose version pins in pyproject.toml [build-system].requires | |
# will be automatically relaxed by generating a postPatch. | |
AUTO_RELAX_BUILD_TOOLS = ["setuptools", "wheel", "setuptools-scm"] | |
def fetch_pypi_data(package_name: str, version: Optional[str] = None) -> Optional[Dict]: | |
pypi_url = ( | |
f"https://pypi.org/pypi/{package_name}/{version}/json" | |
if version | |
else f"https://pypi.org/pypi/{package_name}/json" | |
) | |
print(f"Fetching package metadata: {pypi_url}") | |
try: | |
response = requests.get(pypi_url, timeout=15) | |
response.raise_for_status() | |
return response.json() | |
except requests.RequestException as e: | |
print(f"Error fetching PyPI data for '{package_name}': {e}") | |
return None | |
def get_sdist_info(pypi_data: Dict) -> Optional[Tuple[str, str]]: | |
sdist_info_dict = next( | |
(url for url in pypi_data.get("urls", []) if url.get("packagetype") == "sdist"), | |
None, | |
) | |
if not sdist_info_dict or not sdist_info_dict.get("url"): | |
print("Error: No sdist URL found in PyPI metadata.") | |
return None | |
return sdist_info_dict["url"], sdist_info_dict.get("filename", "Unknown_filename") | |
def get_sdist_sha256(sdist_url: str) -> Optional[str]: | |
print(f"Prefetching sdist from {sdist_url} to get its SHA256 hash...") | |
try: | |
process = subprocess.run( | |
["nix-prefetch-url", sdist_url], | |
capture_output=True, | |
text=True, | |
check=True, | |
timeout=300, | |
) | |
sha256 = process.stdout.strip() | |
if not (sha256 and len(sha256) == 52 and re.match(r"^[0-9a-z]+$", sha256)): | |
print( | |
f"Error: nix-prefetch-url returned an invalid SHA256 format: '{sha256}'" | |
) | |
return None | |
print( | |
f"Successfully fetched base32 NAR SHA256: {sha256} (for URL: {sdist_url})" | |
) | |
return sha256 | |
except subprocess.CalledProcessError as e: | |
print( | |
f"Error running nix-prefetch-url: {e}\nCommand: {e.cmd}\nStderr: {e.stderr}" | |
) | |
return None | |
except subprocess.TimeoutExpired: | |
print(f"Error: nix-prefetch-url timed out for {sdist_url}") | |
return None | |
except FileNotFoundError: | |
print( | |
"Error: nix-prefetch-url command not found. Is Nix installed and in PATH?" | |
) | |
return None | |
def normalize_dep_name(dep_string: str) -> str: | |
name_with_specifiers = dep_string.split(";", 1)[0].strip() | |
delimiters_regex = r"\s*(?:[!=<>~]=?|\[|\()" | |
parts = re.split(delimiters_regex, name_with_specifiers, 1) | |
core_name = parts[0].strip() | |
normalized_name = core_name.lower().replace("_", "-").replace(".", "-") | |
normalized_name = re.sub(r"[^a-z0-9-]+", "", normalized_name) | |
normalized_name = re.sub(r"-+", "-", normalized_name).strip("-") | |
if not normalized_name: | |
placeholder_suffix = "".join(c if c.isalnum() else "_" for c in dep_string[:15]) | |
return f"UNPARSED-DEP-{placeholder_suffix}" | |
return normalized_name | |
def inspect_sdist_for_build_system( | |
sdist_url: str, sdist_filename: str | |
) -> Tuple[str, List[str], str]: | |
""" | |
Downloads sdist, inspects for pyproject.toml, determines build system, | |
and generates postPatch commands if needed. | |
Returns (package_format, native_build_inputs, post_patch_script_string). | |
""" | |
if tomllib is None: | |
print( | |
"Warning: tomllib/toml not available. Cannot parse pyproject.toml. Assuming legacy setuptools build." | |
) | |
return "setuptools", ["setuptools", "wheel"], "" | |
print(f"Downloading sdist for inspection: {sdist_filename} from {sdist_url}") | |
try: | |
response = requests.get(sdist_url, timeout=60, stream=True) | |
response.raise_for_status() | |
sdist_content = response.content | |
except requests.RequestException as e: | |
print(f"Error downloading sdist for inspection: {e}") | |
return "setuptools", ["setuptools", "wheel"], "" | |
pyproject_toml_content = None | |
try: | |
# ... (archive extraction logic remains the same) | |
if sdist_filename.endswith((".tar.gz", ".tar.bz2", ".tgz", ".tbz2")): | |
with tarfile.open(fileobj=io.BytesIO(sdist_content), mode="r:*") as tar: | |
for member in tar.getmembers(): | |
if member.isfile() and member.name.endswith("pyproject.toml"): | |
path_parts = os.path.normpath(member.name).split(os.sep) | |
if len(path_parts) == 2 and path_parts[1] == "pyproject.toml": | |
pyproject_toml_content = ( | |
tar.extractfile(member).read().decode("utf-8") | |
) # type: ignore | |
break | |
elif sdist_filename.endswith(".zip"): | |
with zipfile.ZipFile(io.BytesIO(sdist_content), "r") as zip_f: | |
for member_name in zip_f.namelist(): | |
if member_name.endswith("pyproject.toml"): | |
path_parts = os.path.normpath(member_name).split(os.sep) | |
if len(path_parts) == 2 and path_parts[1] == "pyproject.toml": | |
pyproject_toml_content = zip_f.read(member_name).decode( | |
"utf-8" | |
) | |
break | |
else: | |
print( | |
f"Unsupported sdist format: {sdist_filename}. Assuming legacy setuptools." | |
) | |
return "setuptools", ["setuptools", "wheel"], "" | |
except (tarfile.TarError, zipfile.BadZipFile, UnicodeDecodeError) as e: | |
print(f"Error processing sdist archive: {e}") | |
return "setuptools", ["setuptools", "wheel"], "" | |
if pyproject_toml_content: | |
print("Found pyproject.toml. Parsing for build system requirements.") | |
try: | |
toml_data = tomllib.loads(pyproject_toml_content) | |
build_system_info = toml_data.get("build-system", {}) | |
original_requires = build_system_info.get("requires", []) | |
patch_tuples: List[Tuple[str, str]] = [] # (original_req, relaxed_req) | |
current_native_build_inputs_names: List[str] = [] | |
if not original_requires: | |
print( | |
"pyproject.toml found, but no 'build-system.requires'. Defaulting to setuptools & wheel for nativeBuildInputs." | |
) | |
current_native_build_inputs_names = ["setuptools", "wheel"] | |
else: | |
for req_str in original_requires: | |
norm_req_name_only = normalize_dep_name(req_str) | |
if norm_req_name_only in AUTO_RELAX_BUILD_TOOLS: | |
is_pinned = ( | |
bool(re.search(r"[=<>~!]", req_str)) | |
and req_str.strip() != norm_req_name_only | |
) | |
if is_pinned: | |
print( | |
f"INFO: Found pinned build-time requirement: '{req_str}'." | |
) | |
print( | |
f" Will generate postPatch to relax it to '{norm_req_name_only}'." | |
) | |
patch_tuples.append((req_str, norm_req_name_only)) | |
current_native_build_inputs_names.append(norm_req_name_only) | |
native_build_inputs_attrs: List[str] = [] | |
for name in current_native_build_inputs_names: | |
if name in PYPROJECT_NATIVE_BUILD_INPUT_MAP: | |
attr = PYPROJECT_NATIVE_BUILD_INPUT_MAP[name] | |
if attr not in native_build_inputs_attrs: | |
native_build_inputs_attrs.append(attr) | |
# else: # Warning for unmapped build tools is good but can be noisy if they are not for Nix | |
# print(f"Note: Build system requirement '{name}' from pyproject.toml is not in our known map.") | |
has_specific_backend = any( | |
nbi_attr | |
in ["flit-core", "poetry-core", "hatchling", "pdm-pep517", "maturin"] | |
for nbi_attr in native_build_inputs_attrs | |
) | |
if not has_specific_backend: | |
if "setuptools" not in native_build_inputs_attrs: | |
native_build_inputs_attrs.append("setuptools") | |
if "wheel" not in native_build_inputs_attrs: | |
native_build_inputs_attrs.append("wheel") | |
native_build_inputs_attrs = sorted(list(set(native_build_inputs_attrs))) | |
if not native_build_inputs_attrs: # Should be covered by defaults | |
native_build_inputs_attrs = ["setuptools", "wheel"] | |
if original_requires: | |
print( | |
"Warning: 'build-system.requires' was present but no known tools mapped. Defaulting nativeBuildInputs." | |
) | |
post_patch_script = "" | |
if patch_tuples: | |
post_patch_script = " postPatch = ''\n" | |
for original_req, relaxed_req in patch_tuples: | |
original_req_escaped = original_req.replace( | |
"'", "'\\''" | |
) # Escape for shell | |
relaxed_req_escaped = relaxed_req.replace( | |
"'", "'\\''" | |
) # Escape for shell | |
post_patch_script += f" substituteInPlace pyproject.toml --replace '{original_req_escaped}' '{relaxed_req_escaped}'\n" | |
post_patch_script += " '';\n" | |
print( | |
f"INFO: Will add the following postPatch section to the Nix expression:\n{post_patch_script.strip()}" | |
) | |
print( | |
f"Determined nativeBuildInputs for pyproject.toml: {native_build_inputs_attrs}" | |
) | |
return "pyproject", native_build_inputs_attrs, post_patch_script.strip() | |
except Exception as e: | |
print(f"Error parsing pyproject.toml: {e}") | |
return "setuptools", ["setuptools", "wheel"], "" | |
else: | |
print( | |
"No pyproject.toml found at the sdist root. Assuming legacy setuptools build." | |
) | |
return "setuptools", ["setuptools", "wheel"], "" | |
def extract_dependencies(pypi_data: Dict) -> List[str]: | |
requires_dist = pypi_data.get("info", {}).get("requires_dist", []) | |
if not requires_dist: | |
return [] | |
dependencies = set() | |
for dep_string in requires_dist: | |
if "extra ==" in dep_string: | |
continue | |
normalized = normalize_dep_name(dep_string) | |
if normalized and not normalized.startswith("UNPARSED-DEP-"): | |
dependencies.add(normalized) | |
return sorted(list(dependencies)) | |
def map_license_to_nix(license_classifier: Optional[str]) -> str: | |
# ... (map_license_to_nix remains the same) | |
if not license_classifier: | |
return "licenses.unfree" | |
lc = license_classifier.lower() | |
if "mit" in lc: | |
return "licenses.mit" | |
if "apache software license version 2.0" in lc or "apache-2.0" in lc: | |
return "licenses.asl20" | |
if "mozilla public license 2.0" in lc or "mpl-2.0" in lc: | |
return "licenses.mpl20" | |
if "bsd 3-clause" in lc or "bsd-3-clause" in lc: | |
return "licenses.bsd3" | |
if "bsd 2-clause" in lc or "bsd-2-clause" in lc: | |
return "licenses.bsd2" | |
if ( | |
"gnu general public license v3.0 or later" in lc | |
or "gpl-3.0-or-later" in lc | |
or "gplv3+" in lc | |
): | |
return "licenses.gpl3Plus" | |
if ( | |
"gnu general public license v3.0 only" in lc | |
or "gpl-3.0-only" in lc | |
or "gplv3" in lc | |
): | |
return "licenses.gpl3Only" | |
if ( | |
"gnu general public license v2.0 or later" in lc | |
or "gpl-2.0-or-later" in lc | |
or "gplv2+" in lc | |
): | |
return "licenses.gpl2Plus" | |
if ( | |
"gnu general public license v2.0 only" in lc | |
or "gpl-2.0-only" in lc | |
or "gplv2" in lc | |
): | |
return "licenses.gpl2Only" | |
if ( | |
"gnu lesser general public license v3.0 or later" in lc | |
or "lgpl-3.0-or-later" in lc | |
or "lgplv3+" in lc | |
): | |
return "licenses.lgpl3Plus" | |
if ( | |
"gnu lesser general public license v3.0 only" in lc | |
or "lgpl-3.0-only" in lc | |
or "lgplv3" in lc | |
): | |
return "licenses.lgpl3Only" | |
if ( | |
"gnu lesser general public license v2.1 or later" in lc | |
or "lgpl-2.1-or-later" in lc | |
or "lgpl21+" in lc | |
): | |
return "licenses.lgpl21Plus" | |
if ( | |
"gnu lesser general public license v2.1 only" in lc | |
or "lgpl-2.1-only" in lc | |
or "lgpl21" in lc | |
): | |
return "licenses.lgpl21Only" | |
if "apache" in lc: | |
return "licenses.asl20" | |
if "bsd" in lc: | |
return "licenses.bsdOriginal" | |
if "gpl" in lc: | |
return "licenses.gpl3Plus" | |
if "lgpl" in lc: | |
return "licenses.lgpl3Plus" | |
if "mpl" in lc: | |
return "licenses.mpl20" | |
if "unlicense" in lc: | |
return "licenses.unlicense" | |
if "public domain" in lc: | |
return "licenses.publicDomain" | |
print( | |
f"Warning: Could not confidently map license: '{license_classifier}'. Defaulting to unfree." | |
) | |
return "licenses.unfree" | |
def get_license_from_classifiers(pypi_data: Dict) -> Optional[str]: | |
# ... (get_license_from_classifiers remains the same) | |
classifiers = pypi_data.get("info", {}).get("classifiers", []) | |
osi_approved_licenses = [] | |
other_license_classifiers = [] | |
for c in classifiers: | |
if c.startswith("License :: OSI Approved ::"): | |
osi_approved_licenses.append(c.split("::")[-1].strip()) | |
elif "License ::" in c: | |
other_license_classifiers.append(c.split("::")[-1].strip()) | |
if osi_approved_licenses: | |
for preferred in [ | |
"MIT License", | |
"Apache Software License", | |
"Mozilla Public License 2.0", | |
"BSD License", | |
]: | |
if preferred in osi_approved_licenses: | |
return preferred | |
return osi_approved_licenses[0] | |
license_field = pypi_data.get("info", {}).get("license", "").strip() | |
if license_field: | |
normalized_license_field = license_field.lower() | |
if ( | |
any( | |
kw in normalized_license_field | |
for kw in ["mit", "apache", "bsd", "gpl", "lgpl", "mpl", "unlicense"] | |
) | |
and len(normalized_license_field) < 30 | |
and "::" not in license_field | |
): | |
print(f"Using license from 'info.license' field: '{license_field}'") | |
return license_field | |
if other_license_classifiers: | |
return other_license_classifiers[0] | |
if license_field and license_field != "UNKNOWN" and len(license_field) < 100: | |
print( | |
f"Using license from 'info.license' field as last resort: '{license_field}'" | |
) | |
return license_field | |
return None | |
def generate_nix_expression( | |
pname: str, | |
version: str, | |
sha256: str, | |
package_format: str, | |
native_build_inputs: List[str], | |
propagated_build_inputs: List[str], | |
nix_license_attr: str, | |
homepage: Optional[str], | |
summary: Optional[str], | |
post_patch_script: str, # New argument | |
) -> str: | |
native_bi_lines = "" | |
if native_build_inputs: | |
native_bi_lines = "nativeBuildInputs = [\n" | |
for dep in native_build_inputs: | |
native_bi_lines += f" {dep}\n" | |
native_bi_lines += " ];" | |
else: | |
native_bi_lines = "nativeBuildInputs = [];" | |
propagated_bi_lines = "" | |
if propagated_build_inputs: | |
propagated_bi_lines = "propagatedBuildInputs = [\n" | |
for dep in propagated_build_inputs: | |
propagated_bi_lines += f" {dep}\n" | |
propagated_bi_lines += " ];" | |
else: | |
propagated_bi_lines = "propagatedBuildInputs = [];" | |
escaped_summary = ( | |
summary.replace('"', '\\"') if summary else f"Python package {pname}" | |
) | |
format_line = ( | |
f'format = "{package_format}";' | |
if package_format and package_format != "setuptools" | |
else "" | |
) | |
expression_parts = [ | |
"{ pself }:", | |
"", | |
"with pself;", | |
"buildPythonPackage rec {", | |
f' pname = "{pname}";', | |
f' version = "{version}";', | |
] | |
if format_line: | |
expression_parts.append(f" {format_line}") | |
expression_parts.extend( | |
[ | |
"", | |
" src = fetchPypi {", | |
" inherit pname version;", | |
f' sha256 = "{sha256}";', | |
" };", | |
] | |
) | |
if post_patch_script: # Add postPatch if it exists and is not empty | |
expression_parts.append("") # Add a blank line before it | |
expression_parts.append(post_patch_script) # Add the script itself | |
expression_parts.extend( | |
[ | |
"", | |
f" {native_bi_lines}", | |
"", | |
f" {propagated_bi_lines}", | |
"", | |
"", | |
" meta = {", | |
f' description = "{escaped_summary}";', | |
f' homepage = "{homepage or ""}";', | |
f" license = lib.{nix_license_attr};", | |
" };", | |
"}", | |
] | |
) | |
return "\n".join(expression_parts) | |
def main(): | |
parser = argparse.ArgumentParser( | |
description="Generate a Nix expression for a Python package from PyPI." | |
) | |
parser.add_argument("pypi_package_name", help="Package name on PyPI.") | |
parser.add_argument( | |
"-v", | |
"--version", | |
help="Specific package version. Fetches latest if omitted.", | |
default=None, | |
) | |
parser.add_argument( | |
"-d", | |
"--output-dir", | |
help="Output directory for the Nix expression. Prints to stdout if omitted.", | |
default=None, | |
) | |
args = parser.parse_args() | |
if tomllib is None and not args.pypi_package_name.endswith(".py"): | |
print( | |
"Warning: 'tomllib' or 'toml' is not installed. Build system detection might be inaccurate." | |
) | |
pypi_name_for_lookup = args.pypi_package_name | |
nix_pname = normalize_dep_name(args.pypi_package_name) | |
print( | |
f"--- Generating Nix expression for: {pypi_name_for_lookup} (as {nix_pname} in Nix) ---" | |
) | |
pypi_data = fetch_pypi_data(pypi_name_for_lookup, args.version) | |
if not pypi_data: | |
return | |
actual_version = pypi_data["info"]["version"] | |
print(f"Using version: {actual_version}") | |
sdist_info = get_sdist_info(pypi_data) | |
if not sdist_info: | |
return | |
sdist_url, sdist_filename = sdist_info | |
print(f"Found sdist URL: {sdist_url}\nAssociated sdist filename: {sdist_filename}") | |
sha256 = get_sdist_sha256(sdist_url) | |
if not sha256: | |
return | |
package_format, native_build_inputs, post_patch_script = ( | |
inspect_sdist_for_build_system(sdist_url, sdist_filename) | |
) | |
propagated_dependencies = extract_dependencies(pypi_data) | |
print( | |
f"Found runtime dependencies: {propagated_dependencies}" | |
if propagated_dependencies | |
else "No explicit runtime dependencies found." | |
) | |
raw_license_str = get_license_from_classifiers(pypi_data) | |
nix_license_attr = map_license_to_nix(raw_license_str) | |
print( | |
f"Raw license string: '{raw_license_str}'\nMapped to Nix license: lib.{nix_license_attr}" | |
) | |
homepage = pypi_data.get("info", {}).get("home_page") | |
summary = pypi_data.get("info", {}).get("summary") | |
nix_code = generate_nix_expression( | |
pname=nix_pname, | |
version=actual_version, | |
sha256=sha256, | |
package_format=package_format, | |
native_build_inputs=native_build_inputs, | |
propagated_build_inputs=propagated_dependencies, | |
nix_license_attr=nix_license_attr, | |
homepage=homepage, | |
summary=summary, | |
post_patch_script=post_patch_script, | |
) | |
if args.output_dir: | |
output_directory = args.output_dir | |
os.makedirs(output_directory, exist_ok=True) | |
output_file_path = os.path.join(output_directory, f"{nix_pname}.nix") | |
try: | |
with open(output_file_path, "w") as f: | |
f.write(nix_code) | |
print(f"\nSuccessfully wrote Nix expression to: {output_file_path}") | |
try: | |
print(f"Running nixfmt on {output_file_path}...") | |
fmt_proc = subprocess.run( | |
["nixfmt", output_file_path], | |
check=False, | |
capture_output=True, | |
text=True, | |
timeout=30, | |
) | |
if fmt_proc.returncode == 0: | |
print("nixfmt completed successfully.") | |
else: | |
print( | |
f"nixfmt failed or not found. RC: {fmt_proc.returncode}\nStderr: {fmt_proc.stderr.strip()}\nStdout: {fmt_proc.stdout.strip()}" | |
) | |
except FileNotFoundError: | |
print("nixfmt command not found. Skipping formatting.") | |
except subprocess.TimeoutExpired: | |
print("nixfmt timed out. Skipping formatting.") | |
except IOError as e: | |
print( | |
f"\nError writing to file {output_file_path}: {e}\n--- Generated Nix Expression (stdout fallback) ---\n{nix_code}" | |
) | |
else: | |
print("\n--- Generated Nix Expression ---\n" + nix_code) | |
print("\n>>> IMPORTANT: Review the generated Nix expression carefully! <<<") | |
# ... (final important messages remain the same) | |
effective_format_display = ( | |
package_format | |
if package_format and package_format != "setuptools" | |
else "setuptools (default)" | |
) | |
print(f" - Package format: '{effective_format_display}'") | |
if post_patch_script: | |
print( | |
f" - A 'postPatch' section was automatically added to relax build tool requirements." | |
) | |
print(f" - Detected nativeBuildInputs: {native_build_inputs}") | |
print(f" - Detected propagatedBuildInputs: {propagated_dependencies}") | |
print(f" - Detected license: lib.{nix_license_attr} (from '{raw_license_str}')") | |
print(" - Verify all dependency names are correct for your Nix environment.") | |
print( | |
" - For packages with C extensions or other complex builds, you might need to add more to 'buildInputs' or 'nativeBuildInputs'." | |
) | |
print(" - Consider enabling 'doCheck = true;' and adding 'checkInputs'.") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment