Skip to content

Instantly share code, notes, and snippets.

@HarmonicHemispheres
Created September 24, 2024 23:39
Show Gist options
  • Save HarmonicHemispheres/4ddb0de77fa9b14b14fe0f462ad894fc to your computer and use it in GitHub Desktop.
Save HarmonicHemispheres/4ddb0de77fa9b14b14fe0f462ad894fc to your computer and use it in GitHub Desktop.
Generates a Software Bill of Materials (SBOM) in Markdown format by parsing `pyproject.toml` and `poetry.lock` files of a Python project.
"""
Software Bill of Materials (SBOM) Generator
This script generates a Software Bill of Materials (SBOM) in Markdown format by parsing
`pyproject.toml` and `poetry.lock` files of a Python project. It fetches metadata from PyPI
for each dependency and compiles a comprehensive SBOM detailing components, versions,
licenses, and more.
## Prerequisites
pip install tomli requests packaging tabulate
## Usage
python generate_sbom.py
"""
import tomli
import requests
import os
import sys
from datetime import datetime
from packaging import version
from tabulate import tabulate
# Constants
PYPROJECT_FILE = 'pyproject.toml'
POETRY_LOCK_FILE = 'poetry.lock'
PYPI_API_URL = 'https://pypi.org/pypi/{}/json'
# Function to parse TOML files
def parse_toml(file_path):
try:
with open(file_path, 'rb') as f:
return tomli.load(f)
except FileNotFoundError:
print(f"Error: {file_path} not found.")
sys.exit(1)
except tomli.TOMLDecodeError as e:
print(f"Error parsing {file_path}: {e}")
sys.exit(1)
# Function to get dependencies from poetry.lock
def get_dependencies_from_lock(lock_data):
packages = lock_data.get('package', [])
dependencies = {}
for package in packages:
name = package.get('name')
package_version = package.get('version')
# Skip packages without a name or version
if not name or not package_version:
continue
dependencies[name] = {
'version': package_version,
'dependencies': package.get('dependencies', {}),
'category': package.get('category'),
'optional': package.get('optional', False),
'python-versions': package.get('python-versions'),
}
return dependencies
# Function to fetch package metadata from PyPI
def fetch_pypi_metadata(package_name, package_version):
url = PYPI_API_URL.format(package_name)
try:
response = requests.get(url, timeout=10)
if response.status_code == 200:
data = response.json()
info = data.get('info', {})
releases = data.get('releases', {})
release_info = releases.get(package_version, [])
# Get hash if available
hash_sha256 = None
if release_info:
hash_sha256 = release_info[0].get('digests', {}).get('sha256')
# Get dependencies
requires_dist = info.get('requires_dist', [])
dependencies = []
for req in requires_dist or []:
dep = req.split(';')[0].strip()
if dep:
dependencies.append(dep)
return {
'name': info.get('name'),
'version': package_version,
'summary': info.get('summary'),
'home_page': info.get('home_page'),
'project_url': info.get('project_url'),
'license': info.get('license'),
'requires_python': info.get('requires_python'),
'dependencies': dependencies,
'release_date': info.get('release_date'), # May require additional parsing
'hash_sha256': hash_sha256
}
else:
print(f"Warning: Failed to fetch data for {package_name}=={package_version}")
return {}
except requests.RequestException as e:
print(f"Warning: Exception occurred while fetching {package_name}: {e}")
return {}
# Function to generate SBOM in Markdown
def generate_sbom_markdown(sbom_data, project_metadata):
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"SBOM_{timestamp}.md"
with open(filename, 'w', encoding='utf-8') as f:
f.write(f"# Software Bill of Materials (SBOM)\n\n")
f.write(f"**Project:** {project_metadata.get('tool', {}).get('poetry', {}).get('name', 'Unknown')}\n\n")
f.write(f"**Generated on:** {datetime.now().isoformat()}\n\n")
# 1. Component Information
f.write("## 1. Component Information\n\n")
table = []
headers = ["Component Name", "Version", "Supplier/Vendor", "Component Type"]
for comp in sbom_data:
table.append([
comp.get('name'),
comp.get('version'),
"PyPI",
"Open-Source"
])
f.write(tabulate(table, headers=headers, tablefmt="github"))
f.write("\n\n")
# 2. Identifiers and Metadata
f.write("## 2. Identifiers and Metadata\n\n")
table = []
headers = ["Component Name", "Unique Identifier (PURL)", "License", "SHA-256 Hash"]
for comp in sbom_data:
purl = f"pkg:pypi/{comp.get('name')}@{comp.get('version')}"
table.append([
comp.get('name'),
purl,
comp.get('license') or "N/A",
comp.get('hash_sha256') or "N/A"
])
f.write(tabulate(table, headers=headers, tablefmt="github"))
f.write("\n\n")
# 3. Dependencies
f.write("## 3. Dependencies\n\n")
for comp in sbom_data:
f.write(f"### {comp.get('name')}=={comp.get('version')}\n\n")
dependencies = comp.get('dependencies') or []
if dependencies:
f.write("**Direct Dependencies:**\n\n")
for dep in dependencies:
f.write(f"- {dep}\n")
else:
f.write("No direct dependencies.\n")
f.write("\n")
# 4. Vulnerability Information
f.write("## 4. Vulnerability Information\n\n")
f.write("*Note: Vulnerability data is not fetched in this script. Integrate with a vulnerability database like [OSS Index](https://ossindex.sonatype.org/) for comprehensive information.*\n\n")
# 5. Operational Details
f.write("## 5. Operational Details\n\n")
table = []
headers = ["Component Name", "Source Repository", "Release Date"]
for comp in sbom_data:
table.append([
comp.get('name'),
comp.get('home_page') or "N/A",
"N/A" # Release date parsing not implemented
])
f.write(tabulate(table, headers=headers, tablefmt="github"))
f.write("\n\n")
# 6. Compliance Information
f.write("## 6. Compliance Information\n\n")
table = []
headers = ["Component Name", "License"]
for comp in sbom_data:
table.append([
comp.get('name'),
comp.get('license') or "N/A"
])
f.write(tabulate(table, headers=headers, tablefmt="github"))
f.write("\n\n")
# 7. Format and Standards
f.write("## 7. Format and Standards\n\n")
f.write("*This SBOM is generated in Markdown format. For interoperability, consider exporting in SPDX or CycloneDX formats.*\n\n")
# 8. Security and Provenance
f.write("## 8. Security and Provenance\n\n")
table = []
headers = ["Component Name", "Home Page", "Provenance"]
for comp in sbom_data:
table.append([
comp.get('name'),
comp.get('home_page') or "N/A",
"Verified via PyPI" if comp.get('hash_sha256') else "N/A"
])
f.write(tabulate(table, headers=headers, tablefmt="github"))
f.write("\n\n")
# 9. Asset Classification
f.write("## 9. Asset Classification\n\n")
f.write("*This section requires manual classification based on the project's context.*\n\n")
# 10. Lifecycle Information
f.write("## 10. Lifecycle Information\n\n")
table = []
headers = ["Component Name", "Release Date", "Maintenance Status"]
for comp in sbom_data:
table.append([
comp.get('name'),
"N/A", # Release date parsing not implemented
"Active" # Simplified assumption
])
f.write(tabulate(table, headers=headers, tablefmt="github"))
f.write("\n\n")
# 11. Additional Context
f.write("## 11. Additional Context\n\n")
f.write("*Add any supplementary notes or explanations about specific components here.*\n\n")
# 12. Automated Tools Integration
f.write("## 12. Automated Tools Integration\n\n")
f.write("*This SBOM was generated using a custom Python script. For automated workflows, integrate this script into your CI/CD pipeline.*\n\n")
f.write("---\n")
f.write("### **Standards and Formats**\n\n")
f.write("- **SPDX (Software Package Data Exchange):** A widely adopted standard for SBOMs.\n")
f.write("- **CycloneDX:** Focused on security and supply chain management.\n")
f.write("- **SWID Tags:** XML-based tags for software asset management.\n\n")
f.write("### **Importance of SBOMs**\n\n")
f.write("- **Enhance Security:** Identify and address vulnerabilities.\n")
f.write("- **Ensure Compliance:** Manage licensing requirements.\n")
f.write("- **Improve Transparency:** Visibility into the software supply chain.\n")
f.write("- **Facilitate Maintenance:** Streamline updates and maintenance.\n")
print(f"SBOM generated and saved to {filename}")
# Main function
def main():
# Parse pyproject.toml
project_data = parse_toml(PYPROJECT_FILE)
# Parse poetry.lock
if not os.path.exists(POETRY_LOCK_FILE):
print(f"Error: {POETRY_LOCK_FILE} not found. Ensure you have run `poetry lock`.")
sys.exit(1)
with open(POETRY_LOCK_FILE, 'rb') as f:
lock_data = tomli.load(f)
dependencies = get_dependencies_from_lock(lock_data)
# Fetch metadata for each dependency
sbom_data = []
for pkg_name, pkg_info in dependencies.items():
pkg_version = pkg_info.get('version')
metadata = fetch_pypi_metadata(pkg_name, pkg_version)
if metadata:
sbom_data.append(metadata)
# Generate SBOM Markdown
generate_sbom_markdown(sbom_data, project_data)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment