Skip to content

Instantly share code, notes, and snippets.

@jmb
Created July 14, 2018 21:58
Show Gist options
  • Save jmb/424e8e113f2a546349ff60d07f4eab3a to your computer and use it in GitHub Desktop.
Save jmb/424e8e113f2a546349ff60d07f4eab3a to your computer and use it in GitHub Desktop.
Script to extract all email addresses from all emails in all folders of an IMAP account.
"""Create a connection to an IMAP server and find ALL email addresses
Original script by abought: https://gist.github.com/abought/15a1e08705b121c1b7bd
References:
http://www.voidynullness.net/blog/2013/07/25/gmail-email-with-python-via-imap/
and
https://yuji.wordpress.com/2011/06/22/python-imaplib-imap-example-with-gmail/
"""
__author__ = 'jmb'
import email
import imaplib
import getpass
import sys
import re
import os
# EDIT these as required:
FILENAME = 'out.txt'
DEFAULT_MAIL_SERVER = 'imap.server'
# No user parameters below this line
# ADDR_PATTERN = re.compile('<(.*?)>') # Finds email as <[email protected]>
# Find ALL email address in all fields:
ADDR_PATTERN = re.compile(r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)")
def connect(user, pwd, server=DEFAULT_MAIL_SERVER):
"""Connect to [the specified] mail server. Return an open connection"""
conn = imaplib.IMAP4_SSL(server)
try:
conn.login(user, pwd)
except imaplib.IMAP4.error:
print "Failed to login"
sys.exit(1)
return conn
def print_folders(conn):
"""Print a list of open mailbox folders"""
for f in conn.list():
print " ", f
def get_folder(conn, folder_name):
"""Fetch a specific folder (or label) from server"""
if conn.state == "SELECTED":
# Explicitly close any previously opened folders; may not be necessary
conn.close()
rv, data = conn.select(folder_name)
if rv != 'OK':
print "Could not open specified folder. Known labels:"
print_folders(conn)
return conn
def get_email_ids(conn, query='ALL'):
"""Get the numeric IDs for all emails in a given folder"""
if conn.state != "SELECTED":
raise imaplib.IMAP4.error("Cannot search without selecting a folder")
rv, data = conn.uid('search', None, query)
if rv != 'OK':
print "Could not fetch email ids" # for some reason...
return []
return data[0].split()
def fetch_message(conn, msg_uid):
"""
Fetch a specific message uid (not sequential id!) from the given folder;
return the parsed message. User must ensure that specified
message ID exists in that folder.
"""
# TODO: Could we fetch just the envelope of the response to save bandwidth?
rv, data = conn.uid('fetch', msg_uid, "(RFC822)")
if rv != 'OK':
print "ERROR fetching message #", msg_uid
return {}
return email.message_from_string(data[0][1]) # dict-like object
def get_recipients(msg_parsed):
"""Given a parsed message, extract and return recipient list"""
recipients = []
addr_fields = ['From', 'To', 'Cc', 'Bcc']
for f in addr_fields:
rfield = msg_parsed.get(f, "") # Empty string if field not present
rlist = re.findall(ADDR_PATTERN, rfield)
recipients.extend(rlist)
return recipients
if __name__ == "__main__":
username = raw_input("Full email address: ")
password = getpass.getpass()
# Connect
mail_conn = connect(username, password)
# Open output file
file = open(FILENAME, "a")
# Go through each folder
for f in mail_conn.list()[1]:
folder = f.split()[2].strip('"')
if folder == ".":
continue
mail_conn = get_folder(mail_conn, folder)
msg_uid_list = get_email_ids(mail_conn)
print "Scanning folder: ", folder, " with ", len(msg_uid_list), " messages"
# Fetch a list of recipients
all_recipients = []
for msg_uid in msg_uid_list:
msg = fetch_message(mail_conn, msg_uid)
recip_list = get_recipients(msg)
all_recipients.extend(recip_list)
print "Writing", len(set(all_recipients)), "email addresses to file", file.name
output = ""
for address in set(all_recipients):
output = output + address + "\n"
file.write(output)
file.flush()
os.fsync(file.fileno())
file.close()
print("\nWritten to file: " + FILENAME)
try:
mail_conn.close() # Close currently selected folder (if any)
finally:
mail_conn.logout()
@8bitsia
Copy link

8bitsia commented May 8, 2025

Hi, thank you for script! I've been searching for a while and this is exactly what I'm looking for.
I tried to convert this to python 3 but I ran into a problem.
I can connect to my imap server just fine, but then I gen an error on
folder = f.split()[2].strip('"')
it says:
TypeError: a bytes-like object is required, not 'str'

EDIT: REMOVED

@8bitsia
Copy link

8bitsia commented May 8, 2025

Woohoo! Got it working.
For anyone else who might be looking for this:

"""Create a connection to an IMAP server and find ALL email addresses
Original script by abought: https://gist.github.com/abought/15a1e08705b121c1b7bd
References:
http://www.voidynullness.net/blog/2013/07/25/gmail-email-with-python-via-imap/
and
https://yuji.wordpress.com/2011/06/22/python-imaplib-imap-example-with-gmail/
"""
__author__ = 'jmb'

import email
import imaplib
import getpass
import sys
import re
import os


# EDIT these as required:
FILENAME = 'out.txt'
DEFAULT_MAIL_SERVER = 'imap.server'

# No user parameters below this line
# ADDR_PATTERN = re.compile('<(.*?)>')  # Finds email as <[email protected]>
# Find ALL email address in all fields:
ADDR_PATTERN = re.compile(r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)")


def connect(user, pwd, server=DEFAULT_MAIL_SERVER):
    """Connect to [the specified] mail server. Return an open connection"""
    conn = imaplib.IMAP4_SSL(server)
    try:
        conn.login(user, pwd)
    except imaplib.IMAP4.error:
        print("Failed to login")
        sys.exit(1)
    return conn


def print_folders(conn):
    """Print a list of open mailbox folders"""
    for f in conn.list():
        print(" ", f)


def get_folder(conn, folder_name):
    """Fetch a specific folder (or label) from server"""

    if conn.state == "SELECTED":
        # Explicitly close any previously opened folders; may not be necessary
        conn.close()

    rv, data = conn.select(folder_name)
    if rv != 'OK':
        print("Could not open specified folder. Known labels:")
        print_folders(conn)
    return conn


def get_email_ids(conn, query='ALL'):
    """Get the numeric IDs for all emails in a given folder"""
    if conn.state != "SELECTED":
        raise imaplib.IMAP4.error("Cannot search without selecting a folder")

    rv, data = conn.uid('search', None, query)
    if rv != 'OK':
        print("Could not fetch email ids")  # for some reason...
        return []

    return data[0].split()


def fetch_message(conn, msg_uid):
    """
    Fetch a specific message uid (not sequential id!) from the given folder;
    return the parsed message. User must ensure that specified
    message ID exists in that folder.
    """
    # TODO: Could we fetch just the envelope of the response to save bandwidth?
    rv, data = conn.uid('fetch', msg_uid, "(RFC822)")
    if rv != 'OK':
        print("ERROR fetching message #", msg_uid)
        return {}

    #return email.message_from_string(data[0][1])  # dict-like object
    return email.message_from_bytes(data[0][1])

from email.utils import getaddresses

def get_recipients(msg_parsed):
    """Given a parsed message, extract and return recipient list"""
    addr_fields = ['From', 'To', 'Cc', 'Bcc']
    recipients = []

    for f in addr_fields:
        field_data = msg_parsed.get_all(f, [])
        parsed_addrs = getaddresses(field_data)
        for name, addr in parsed_addrs:
            if addr:
                recipients.append(addr.lower())

    return recipients


#def get_recipients(msg_parsed):
#    """Given a parsed message, extract and return recipient list"""
#    recipients = []
#    addr_fields = ['From', 'To', 'Cc', 'Bcc']
#
#    for f in addr_fields:
#        rfield = msg_parsed.get(f, "")  # Empty string if field not present
#        rlist = re.findall(ADDR_PATTERN, rfield)
#        recipients.extend(rlist)
#
#    return recipients

def parse_folder_name(line):
    parts = line.decode().split(' "." ')
    if len(parts) != 2:
        print(f"Warning: Could not parse folder name from: {line}")
        return None
    return parts[1]  # Already quoted if needed



if __name__ == "__main__":
    username = input("Full email address: ")
    password = getpass.getpass()

    # Connect
    mail_conn = connect(username, password)

    # Open output file
    file = open(FILENAME, "a")

    # Go through each folder
    for f in mail_conn.list()[1]:
        #folder = f.split()[2].strip('"')
        #folder = f.decode().split()[2].strip('"')
        folder = parse_folder_name(f)
        if not folder or folder == ".":
            continue
        
        mail_conn = get_folder(mail_conn, folder)
        if mail_conn.state != "SELECTED":
            continue  # Skip folder if selection failed

        
        
        msg_uid_list = get_email_ids(mail_conn)
        print("Scanning folder: ", folder, " with ", len(msg_uid_list), " messages")

        # Fetch a list of recipients
        all_recipients = []
        for msg_uid in msg_uid_list:
            msg = fetch_message(mail_conn, msg_uid)
            recip_list = get_recipients(msg)
            all_recipients.extend(recip_list)

        print("Writing", len(set(all_recipients)), "email addresses to file", file.name)
        output = ""
        for address in set(all_recipients):
            output = output + address + "\n"
        file.write(output)
        file.flush()
        os.fsync(file.fileno())

    file.close()
    print(("\nWritten to file: " + FILENAME))

    try:
        mail_conn.close()  # Close currently selected folder (if any)
    finally:
        mail_conn.logout()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment