Last active
April 9, 2025 20:17
-
-
Save Godefroy/ff9083a7871fb55e54a83593c0f442d9 to your computer and use it in GitHub Desktop.
Detect email providers from a CSV with an "email" column
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Requirements: | |
# - dnspython (`pip install dnspython`) | |
# CSV: | |
# - filename: emails.csv | |
# - column: "email" | |
import csv | |
import dns.resolver | |
from collections import Counter | |
known_providers = { | |
'google': ['google', 'gmail'], | |
'microsoft': ['outlook', 'hotmail', 'office365'], | |
'yahoo': ['yahoo'], | |
'protonmail': ['protonmail'], | |
'ovh': ['ovh'], | |
'orange': ['orange'], | |
'laposte': ['laposte'], | |
'free': ['free'], | |
'infomaniak': ['infomaniak'], | |
'yopmail': ['yopmail'], | |
'cloudflare': ['cloudflare'], | |
} | |
def get_domain(email): | |
domain = email.split('@')[-1].strip().strip("'").strip('"') | |
return domain | |
def get_mx_provider(domain): | |
try: | |
answers = dns.resolver.resolve(domain, 'MX') | |
mx_records = [r.exchange.to_text().lower() for r in answers] | |
for mx in mx_records: | |
for provider, patterns in known_providers.items(): | |
if any(p in mx for p in patterns): | |
return provider | |
return "others" | |
except Exception as e: | |
return "others" | |
def read_emails_from_csv(file_path, column_name="email"): | |
emails = [] | |
with open(file_path, newline='', encoding='utf-8') as csvfile: | |
reader = csv.DictReader(csvfile) | |
for row in reader: | |
if column_name in row and row[column_name]: | |
emails.append(row[column_name].strip()) | |
return emails | |
def main(): | |
file_path = "emails.csv" | |
emails = read_emails_from_csv(file_path) | |
providers = [] | |
for email in emails: | |
domain = get_domain(email) | |
provider = get_mx_provider(domain) | |
providers.append(provider) | |
total = len(providers) | |
counter = Counter(providers) | |
print("Proportions des providers :") | |
for provider, count in counter.items(): | |
pourcentage = (count / total) * 100 | |
print(f"{provider}: {count} ({pourcentage:.2f}%)") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment