Skip to content

Instantly share code, notes, and snippets.

@rmhrisk
Created March 18, 2024 22:45

Revisions

  1. rmhrisk created this gist Mar 18, 2024.
    114 changes: 114 additions & 0 deletions webpki-ca-countries.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,114 @@
    import pandas as pd
    import requests
    from cryptography import x509
    from cryptography.hazmat.backends import default_backend
    from io import StringIO
    from cryptography.hazmat.primitives import hashes
    import matplotlib.pyplot as plt

    def download_csv(url):
    response = requests.get(url)
    response.raise_for_status()
    return StringIO(response.text)

    def compute_fingerprint(pem_data):
    try:
    cert = x509.load_pem_x509_certificate(pem_data.encode(), default_backend())
    return cert.fingerprint(hashes.SHA256()).hex().upper()
    except Exception as e:
    print(f"Error computing fingerprint: {e}")
    return None

    def extract_country_from_certificate(pem_data):
    try:
    cert = x509.load_pem_x509_certificate(pem_data.encode(), default_backend())
    issuer_countries = [i.value for i in cert.issuer.get_attributes_for_oid(x509.NameOID.COUNTRY_NAME)]
    return ",".join(set(issuer_countries))
    except Exception as e:
    print(f"Error extracting country: {e}")
    return ""

    def generate_pie_chart_with_legend(ca_countries):
    # Transform the ca_countries into a DataFrame
    country_counts = pd.Series(ca_countries).value_counts().rename_axis('Country').reset_index(name='Counts')

    # Increase the figure size to make more room for the pie chart and the legend
    fig, ax = plt.subplots(figsize=(15, 7))

    # Create the pie chart with the autopct set to display percentages
    wedges, _, autotexts = ax.pie(
    country_counts['Counts'],
    startangle=140,
    autopct='%1.1f%%',
    textprops=dict(color="w")
    )

    # Draw a circle at the center to make it a donut chart
    plt.gca().add_artist(plt.Circle((0, 0), 0.70, color='white'))

    # Set legend with country names and percentages, placed on the right side
    legend_labels = [f"{country}: {perc:.2f}%" for country, perc in zip(country_counts['Country'], country_counts['Counts'])]
    ax.legend(wedges, legend_labels, title="Country", loc="center left", bbox_to_anchor=(1.1, 0.5))

    # Adjust figure to prevent cutoff of legend or labels
    plt.subplots_adjust(left=0.1, bottom=0.1, right=0.75)

    # Set the title and show the plot
    plt.title('Country Distribution of Certificate Authorities')
    plt.show()

    def generate_trusted_ca_markdown_table_from_url(ca_url, roots_url):
    ca_csv_data = download_csv(ca_url)
    ca_data = pd.read_csv(ca_csv_data)
    ca_data = ca_data[ca_data['Certificate Record Type'] == 'Root Certificate']

    roots_csv_data = download_csv(roots_url)
    roots_data = pd.read_csv(roots_csv_data)
    roots_data['Computed SHA-256 Fingerprint'] = roots_data['PEM'].apply(compute_fingerprint)
    fingerprint_to_country = dict(zip(roots_data['Computed SHA-256 Fingerprint'], roots_data['PEM'].apply(extract_country_from_certificate)))

    trusted_roots = {}
    ca_countries = {}

    for _, row in ca_data.iterrows():
    ca_owner = row['CA Owner']
    fingerprint = row.get('SHA-256 Fingerprint',
    '')
    country = fingerprint_to_country.get(fingerprint, "Unknown") # Use "Unknown" for CAs without a country
    status = row['Status of Root Cert']

    # Only include CAs that are trusted by at least one program
    if any(trust in status for trust in ["Apple: Included", "Google Chrome: Included", "Microsoft: Included", "Mozilla: Included"]):
    if ca_owner not in trusted_roots:
    trusted_roots[ca_owner] = set()
    ca_countries[ca_owner] = country if country else "Unknown"

    # Check for inclusion by each program
    if "Apple: Included" in status:
    trusted_roots[ca_owner].add("Apple")
    if "Google Chrome: Included" in status:
    trusted_roots[ca_owner].add("Google Chrome")
    if "Microsoft: Included" in status:
    trusted_roots[ca_owner].add("Microsoft")
    if "Mozilla: Included" in status:
    trusted_roots[ca_owner].add("Mozilla")
    # Generating markdown table
    markdown_table = "CA Owner | Countries | Apple | Google Chrome | Microsoft | Mozilla\n"
    markdown_table += "--- | --- | --- | --- | --- | ---\n"
    for ca_owner, stores in trusted_roots.items():
    countries = ca_countries.get(ca_owner, "Unknown")
    row = [ca_owner, countries] + ["✓" if store in stores else "" for store in ["Apple", "Google Chrome", "Microsoft", "Mozilla"]]
    markdown_table += " | ".join(row) + "\n"
    markdown_table += f"\nTotal CAs: {len(trusted_roots)}\n"
    print(markdown_table)

    # Convert ca_countries to a list and then to a Series object for value counts
    ca_countries_list = list(ca_countries.values())
    generate_pie_chart_with_legend(ca_countries_list)

    # URLs for the datasets
    ca_url = 'https://ccadb.my.salesforce-sites.com/ccadb/AllCertificateRecordsCSVFormatv2'
    roots_url = 'https://ccadb.my.salesforce-sites.com/mozilla/IncludedRootsDistrustTLSSSLPEMCSV?TrustBitsInclude=Websites'

    # Generate the markdown table and plot the pie chart with legend
    generate_trusted_ca_markdown_table_from_url(ca_url, roots_url)