Created
September 24, 2024 23:39
-
-
Save HarmonicHemispheres/4ddb0de77fa9b14b14fe0f462ad894fc to your computer and use it in GitHub Desktop.
Generates a Software Bill of Materials (SBOM) in Markdown format by parsing `pyproject.toml` and `poetry.lock` files of a Python project.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Software Bill of Materials (SBOM) Generator | |
This script generates a Software Bill of Materials (SBOM) in Markdown format by parsing | |
`pyproject.toml` and `poetry.lock` files of a Python project. It fetches metadata from PyPI | |
for each dependency and compiles a comprehensive SBOM detailing components, versions, | |
licenses, and more. | |
## Prerequisites | |
pip install tomli requests packaging tabulate | |
## Usage | |
python generate_sbom.py | |
""" | |
import tomli | |
import requests | |
import os | |
import sys | |
from datetime import datetime | |
from packaging import version | |
from tabulate import tabulate | |
# Constants | |
PYPROJECT_FILE = 'pyproject.toml' | |
POETRY_LOCK_FILE = 'poetry.lock' | |
PYPI_API_URL = 'https://pypi.org/pypi/{}/json' | |
# Function to parse TOML files | |
def parse_toml(file_path): | |
try: | |
with open(file_path, 'rb') as f: | |
return tomli.load(f) | |
except FileNotFoundError: | |
print(f"Error: {file_path} not found.") | |
sys.exit(1) | |
except tomli.TOMLDecodeError as e: | |
print(f"Error parsing {file_path}: {e}") | |
sys.exit(1) | |
# Function to get dependencies from poetry.lock | |
def get_dependencies_from_lock(lock_data): | |
packages = lock_data.get('package', []) | |
dependencies = {} | |
for package in packages: | |
name = package.get('name') | |
package_version = package.get('version') | |
# Skip packages without a name or version | |
if not name or not package_version: | |
continue | |
dependencies[name] = { | |
'version': package_version, | |
'dependencies': package.get('dependencies', {}), | |
'category': package.get('category'), | |
'optional': package.get('optional', False), | |
'python-versions': package.get('python-versions'), | |
} | |
return dependencies | |
# Function to fetch package metadata from PyPI | |
def fetch_pypi_metadata(package_name, package_version): | |
url = PYPI_API_URL.format(package_name) | |
try: | |
response = requests.get(url, timeout=10) | |
if response.status_code == 200: | |
data = response.json() | |
info = data.get('info', {}) | |
releases = data.get('releases', {}) | |
release_info = releases.get(package_version, []) | |
# Get hash if available | |
hash_sha256 = None | |
if release_info: | |
hash_sha256 = release_info[0].get('digests', {}).get('sha256') | |
# Get dependencies | |
requires_dist = info.get('requires_dist', []) | |
dependencies = [] | |
for req in requires_dist or []: | |
dep = req.split(';')[0].strip() | |
if dep: | |
dependencies.append(dep) | |
return { | |
'name': info.get('name'), | |
'version': package_version, | |
'summary': info.get('summary'), | |
'home_page': info.get('home_page'), | |
'project_url': info.get('project_url'), | |
'license': info.get('license'), | |
'requires_python': info.get('requires_python'), | |
'dependencies': dependencies, | |
'release_date': info.get('release_date'), # May require additional parsing | |
'hash_sha256': hash_sha256 | |
} | |
else: | |
print(f"Warning: Failed to fetch data for {package_name}=={package_version}") | |
return {} | |
except requests.RequestException as e: | |
print(f"Warning: Exception occurred while fetching {package_name}: {e}") | |
return {} | |
# Function to generate SBOM in Markdown | |
def generate_sbom_markdown(sbom_data, project_metadata): | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
filename = f"SBOM_{timestamp}.md" | |
with open(filename, 'w', encoding='utf-8') as f: | |
f.write(f"# Software Bill of Materials (SBOM)\n\n") | |
f.write(f"**Project:** {project_metadata.get('tool', {}).get('poetry', {}).get('name', 'Unknown')}\n\n") | |
f.write(f"**Generated on:** {datetime.now().isoformat()}\n\n") | |
# 1. Component Information | |
f.write("## 1. Component Information\n\n") | |
table = [] | |
headers = ["Component Name", "Version", "Supplier/Vendor", "Component Type"] | |
for comp in sbom_data: | |
table.append([ | |
comp.get('name'), | |
comp.get('version'), | |
"PyPI", | |
"Open-Source" | |
]) | |
f.write(tabulate(table, headers=headers, tablefmt="github")) | |
f.write("\n\n") | |
# 2. Identifiers and Metadata | |
f.write("## 2. Identifiers and Metadata\n\n") | |
table = [] | |
headers = ["Component Name", "Unique Identifier (PURL)", "License", "SHA-256 Hash"] | |
for comp in sbom_data: | |
purl = f"pkg:pypi/{comp.get('name')}@{comp.get('version')}" | |
table.append([ | |
comp.get('name'), | |
purl, | |
comp.get('license') or "N/A", | |
comp.get('hash_sha256') or "N/A" | |
]) | |
f.write(tabulate(table, headers=headers, tablefmt="github")) | |
f.write("\n\n") | |
# 3. Dependencies | |
f.write("## 3. Dependencies\n\n") | |
for comp in sbom_data: | |
f.write(f"### {comp.get('name')}=={comp.get('version')}\n\n") | |
dependencies = comp.get('dependencies') or [] | |
if dependencies: | |
f.write("**Direct Dependencies:**\n\n") | |
for dep in dependencies: | |
f.write(f"- {dep}\n") | |
else: | |
f.write("No direct dependencies.\n") | |
f.write("\n") | |
# 4. Vulnerability Information | |
f.write("## 4. Vulnerability Information\n\n") | |
f.write("*Note: Vulnerability data is not fetched in this script. Integrate with a vulnerability database like [OSS Index](https://ossindex.sonatype.org/) for comprehensive information.*\n\n") | |
# 5. Operational Details | |
f.write("## 5. Operational Details\n\n") | |
table = [] | |
headers = ["Component Name", "Source Repository", "Release Date"] | |
for comp in sbom_data: | |
table.append([ | |
comp.get('name'), | |
comp.get('home_page') or "N/A", | |
"N/A" # Release date parsing not implemented | |
]) | |
f.write(tabulate(table, headers=headers, tablefmt="github")) | |
f.write("\n\n") | |
# 6. Compliance Information | |
f.write("## 6. Compliance Information\n\n") | |
table = [] | |
headers = ["Component Name", "License"] | |
for comp in sbom_data: | |
table.append([ | |
comp.get('name'), | |
comp.get('license') or "N/A" | |
]) | |
f.write(tabulate(table, headers=headers, tablefmt="github")) | |
f.write("\n\n") | |
# 7. Format and Standards | |
f.write("## 7. Format and Standards\n\n") | |
f.write("*This SBOM is generated in Markdown format. For interoperability, consider exporting in SPDX or CycloneDX formats.*\n\n") | |
# 8. Security and Provenance | |
f.write("## 8. Security and Provenance\n\n") | |
table = [] | |
headers = ["Component Name", "Home Page", "Provenance"] | |
for comp in sbom_data: | |
table.append([ | |
comp.get('name'), | |
comp.get('home_page') or "N/A", | |
"Verified via PyPI" if comp.get('hash_sha256') else "N/A" | |
]) | |
f.write(tabulate(table, headers=headers, tablefmt="github")) | |
f.write("\n\n") | |
# 9. Asset Classification | |
f.write("## 9. Asset Classification\n\n") | |
f.write("*This section requires manual classification based on the project's context.*\n\n") | |
# 10. Lifecycle Information | |
f.write("## 10. Lifecycle Information\n\n") | |
table = [] | |
headers = ["Component Name", "Release Date", "Maintenance Status"] | |
for comp in sbom_data: | |
table.append([ | |
comp.get('name'), | |
"N/A", # Release date parsing not implemented | |
"Active" # Simplified assumption | |
]) | |
f.write(tabulate(table, headers=headers, tablefmt="github")) | |
f.write("\n\n") | |
# 11. Additional Context | |
f.write("## 11. Additional Context\n\n") | |
f.write("*Add any supplementary notes or explanations about specific components here.*\n\n") | |
# 12. Automated Tools Integration | |
f.write("## 12. Automated Tools Integration\n\n") | |
f.write("*This SBOM was generated using a custom Python script. For automated workflows, integrate this script into your CI/CD pipeline.*\n\n") | |
f.write("---\n") | |
f.write("### **Standards and Formats**\n\n") | |
f.write("- **SPDX (Software Package Data Exchange):** A widely adopted standard for SBOMs.\n") | |
f.write("- **CycloneDX:** Focused on security and supply chain management.\n") | |
f.write("- **SWID Tags:** XML-based tags for software asset management.\n\n") | |
f.write("### **Importance of SBOMs**\n\n") | |
f.write("- **Enhance Security:** Identify and address vulnerabilities.\n") | |
f.write("- **Ensure Compliance:** Manage licensing requirements.\n") | |
f.write("- **Improve Transparency:** Visibility into the software supply chain.\n") | |
f.write("- **Facilitate Maintenance:** Streamline updates and maintenance.\n") | |
print(f"SBOM generated and saved to {filename}") | |
# Main function | |
def main(): | |
# Parse pyproject.toml | |
project_data = parse_toml(PYPROJECT_FILE) | |
# Parse poetry.lock | |
if not os.path.exists(POETRY_LOCK_FILE): | |
print(f"Error: {POETRY_LOCK_FILE} not found. Ensure you have run `poetry lock`.") | |
sys.exit(1) | |
with open(POETRY_LOCK_FILE, 'rb') as f: | |
lock_data = tomli.load(f) | |
dependencies = get_dependencies_from_lock(lock_data) | |
# Fetch metadata for each dependency | |
sbom_data = [] | |
for pkg_name, pkg_info in dependencies.items(): | |
pkg_version = pkg_info.get('version') | |
metadata = fetch_pypi_metadata(pkg_name, pkg_version) | |
if metadata: | |
sbom_data.append(metadata) | |
# Generate SBOM Markdown | |
generate_sbom_markdown(sbom_data, project_data) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment