Skip to content

Instantly share code, notes, and snippets.

@mjseeley
Forked from baali/dlAttachments.py
Last active October 17, 2020 12:32
Show Gist options
  • Save mjseeley/5e182c0c29dde014cfac to your computer and use it in GitHub Desktop.
Save mjseeley/5e182c0c29dde014cfac to your computer and use it in GitHub Desktop.
Forked to add downloading of files with the same name and different contents. | WARNING: [messy code]
# Download ALL attachments from GMail
# 1. Script needs to be run via console not in an IDE, getpass.getpass() will fail otherwise.
# https://docs.python.org/2/library/getpass.html
# 2. Make sure you have IMAP enabled in your GMail settings.
# https://support.google.com/mail/troubleshooter/1668960?hl=en
# 3. If you are using 2 step verification you may need an APP Password.
# https://support.google.com/accounts/answer/185833
# 4. Reference information for GMail IMAP extension can be found here.
# https://developers.google.com/gmail/imap_extensions
import email
import hashlib
import getpass
import imaplib
import os
from collections import defaultdict, Counter
import platform
fileNameCounter = Counter()
fileNameHashes = defaultdict(set)
NewMsgIDs = set()
ProcessedMsgIDs = set()
def recover(resumeFile):
if os.path.exists(resumeFile):
print('Recovery file found resuming...')
with open(resumeFile) as f:
processedIds = f.read()
for ProcessedId in processedIds.split(','):
ProcessedMsgIDs.add(ProcessedId)
else:
print('No Recovery file found.')
open(resumeFile, 'a').close()
def GenerateMailMessages(userName, password, resumeFile):
imapSession = imaplib.IMAP4_SSL('imap.gmail.com')
typ, accountDetails = imapSession.login(userName, password)
print(typ)
print(accountDetails)
if typ != 'OK':
print('Not able to sign in!')
raise
imapSession.select('[Gmail]/All Mail')
typ, data = imapSession.search(None, '(X-GM-RAW "has:attachment")')
# typ, data = imapSession.search(None, 'ALL')
if typ != 'OK':
print('Error searching Inbox.')
raise
# Iterating over all emails
for msgId in data[0].split():
NewMsgIDs.add(msgId)
typ, messageParts = imapSession.fetch(msgId, '(RFC822)')
if typ != 'OK':
print('Error fetching mail.')
raise
emailBody = messageParts[0][1]
if msgId not in ProcessedMsgIDs:
yield email.message_from_string(emailBody)
ProcessedMsgIDs.add(msgId)
with open(resumeFile, "a") as resume:
resume.write('{id},'.format(id=msgId))
imapSession.close()
imapSession.logout()
def SaveAttachmentsFromMailMessage(message, directory):
for part in message.walk():
if part.get_content_maintype() == 'multipart':
# print(part.as_string())
continue
if part.get('Content-Disposition') is None:
# print(part.as_string())
continue
fileName = part.get_filename()
if fileName is not None:
fileName = ''.join(fileName.splitlines())
if fileName:
# print('Processing: {file}'.format(file=fileName))
payload = part.get_payload(decode=True)
if payload:
x_hash = hashlib.md5(payload).hexdigest()
if x_hash in fileNameHashes[fileName]:
print('\tSkipping duplicate file: {file}'.format(file=fileName))
continue
fileNameCounter[fileName] += 1
fileStr, fileExtension = os.path.splitext(fileName)
if fileNameCounter[fileName] > 1:
new_fileName = '{file}({suffix}){ext}'.format(suffix=fileNameCounter[fileName],
file=fileStr, ext=fileExtension)
print('\tRenaming and storing: {file} to {new_file}'.format(file=fileName,
new_file=new_fileName))
else:
new_fileName = fileName
print('\tStoring: {file}'.format(file=fileName))
fileNameHashes[fileName].add(x_hash)
file_path = os.path.join(directory, new_fileName)
if os.path.exists(file_path):
print('\tExists in destination: {file}'.format(file=new_fileName))
continue
try:
with open(file_path, 'wb') as fp:
fp.write(payload)
except:
print('Could not store: {file} it has a shitty file name or path under {op_sys}.'.format(
file=file_path,
op_sys=platform.system()))
else:
print('Attachment {file} was returned as type: {ftype} skipping...'.format(file=fileName,
ftype=type(payload)))
continue
if __name__ == '__main__':
resumeFile = file_path = os.path.join('resume.txt')
userName = raw_input('Enter your GMail username: ')
password = getpass.getpass('Enter your password: ')
recover(resumeFile)
if 'attachments' not in os.listdir(os.getcwd()):
os.mkdir('attachments')
for msg in GenerateMailMessages(userName, password, resumeFile):
SaveAttachmentsFromMailMessage(msg, 'attachments')
os.remove(file_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment