rmhrisk · March 18, 2024 22:45 · Mar 18, 2024
diff --git a/webpki-ca-countries.py b/webpki-ca-countries.py
@@ -0,0 +1,114 @@
+import pandas as pd
+import requests
+from cryptography import x509
+from cryptography.hazmat.backends import default_backend
+from io import StringIO
+from cryptography.hazmat.primitives import hashes
+import matplotlib.pyplot as plt
+
+def download_csv(url):
+    response = requests.get(url)
+    response.raise_for_status()
+    return StringIO(response.text)
+
+def compute_fingerprint(pem_data):
+    try:
+        cert = x509.load_pem_x509_certificate(pem_data.encode(), default_backend())
+        return cert.fingerprint(hashes.SHA256()).hex().upper()
+    except Exception as e:
+        print(f"Error computing fingerprint: {e}")
+        return None
+
+def extract_country_from_certificate(pem_data):
+    try:
+        cert = x509.load_pem_x509_certificate(pem_data.encode(), default_backend())
+        issuer_countries = [i.value for i in cert.issuer.get_attributes_for_oid(x509.NameOID.COUNTRY_NAME)]
+        return ",".join(set(issuer_countries))
+    except Exception as e:
+        print(f"Error extracting country: {e}")
+        return ""
+
+def generate_pie_chart_with_legend(ca_countries):
+    # Transform the ca_countries into a DataFrame
+    country_counts = pd.Series(ca_countries).value_counts().rename_axis('Country').reset_index(name='Counts')
+
+    # Increase the figure size to make more room for the pie chart and the legend
+    fig, ax = plt.subplots(figsize=(15, 7))
+
+    # Create the pie chart with the autopct set to display percentages
+    wedges, _, autotexts = ax.pie(
+        country_counts['Counts'], 
+        startangle=140, 
+        autopct='%1.1f%%',
+        textprops=dict(color="w")
+    )
+
+    # Draw a circle at the center to make it a donut chart
+    plt.gca().add_artist(plt.Circle((0, 0), 0.70, color='white'))
+
+    # Set legend with country names and percentages, placed on the right side
+    legend_labels = [f"{country}: {perc:.2f}%" for country, perc in zip(country_counts['Country'], country_counts['Counts'])]
+    ax.legend(wedges, legend_labels, title="Country", loc="center left", bbox_to_anchor=(1.1, 0.5))
+
+    # Adjust figure to prevent cutoff of legend or labels
+    plt.subplots_adjust(left=0.1, bottom=0.1, right=0.75)
+
+    # Set the title and show the plot
+    plt.title('Country Distribution of Certificate Authorities')
+    plt.show()
+
+def generate_trusted_ca_markdown_table_from_url(ca_url, roots_url):
+    ca_csv_data = download_csv(ca_url)
+    ca_data = pd.read_csv(ca_csv_data)
+    ca_data = ca_data[ca_data['Certificate Record Type'] == 'Root Certificate']
+
+    roots_csv_data = download_csv(roots_url)
+    roots_data = pd.read_csv(roots_csv_data)
+    roots_data['Computed SHA-256 Fingerprint'] = roots_data['PEM'].apply(compute_fingerprint)
+    fingerprint_to_country = dict(zip(roots_data['Computed SHA-256 Fingerprint'], roots_data['PEM'].apply(extract_country_from_certificate)))
+
+    trusted_roots = {}
+    ca_countries = {}
+
+    for _, row in ca_data.iterrows():
+        ca_owner = row['CA Owner']
+        fingerprint = row.get('SHA-256 Fingerprint',
+        '')
+        country = fingerprint_to_country.get(fingerprint, "Unknown")  # Use "Unknown" for CAs without a country
+        status = row['Status of Root Cert']
+
+        # Only include CAs that are trusted by at least one program
+        if any(trust in status for trust in ["Apple: Included", "Google Chrome: Included", "Microsoft: Included", "Mozilla: Included"]):
+            if ca_owner not in trusted_roots:
+                trusted_roots[ca_owner] = set()
+            ca_countries[ca_owner] = country if country else "Unknown"
+
+            # Check for inclusion by each program
+            if "Apple: Included" in status:
+                trusted_roots[ca_owner].add("Apple")
+            if "Google Chrome: Included" in status:
+                trusted_roots[ca_owner].add("Google Chrome")
+            if "Microsoft: Included" in status:
+                trusted_roots[ca_owner].add("Microsoft")
+            if "Mozilla: Included" in status:
+                trusted_roots[ca_owner].add("Mozilla")
+   # Generating markdown table
+    markdown_table = "CA Owner | Countries | Apple | Google Chrome | Microsoft | Mozilla\n"
+    markdown_table += "--- | --- | --- | --- | --- | ---\n"
+    for ca_owner, stores in trusted_roots.items():
+        countries = ca_countries.get(ca_owner, "Unknown")
+        row = [ca_owner, countries] + ["✓" if store in stores else "" for store in ["Apple", "Google Chrome", "Microsoft", "Mozilla"]]
+        markdown_table += " | ".join(row) + "\n"
+    markdown_table += f"\nTotal CAs: {len(trusted_roots)}\n"
+    print(markdown_table)
+
+    # Convert ca_countries to a list and then to a Series object for value counts
+    ca_countries_list = list(ca_countries.values())
+    generate_pie_chart_with_legend(ca_countries_list)
+
+# URLs for the datasets
+ca_url = 'https://ccadb.my.salesforce-sites.com/ccadb/AllCertificateRecordsCSVFormatv2'
+roots_url = 'https://ccadb.my.salesforce-sites.com/mozilla/IncludedRootsDistrustTLSSSLPEMCSV?TrustBitsInclude=Websites'
+
+# Generate the markdown table and plot the pie chart with legend
+generate_trusted_ca_markdown_table_from_url(ca_url, roots_url)