Created
May 17, 2024 07:41
-
-
Save vadirajks/0f26c2095813b1253dbda71ec0709aa7 to your computer and use it in GitHub Desktop.
gmail_attachment_download
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#python3 /custom-scripts/gmail_attachment_download.py --download_dir /opt/gmail_attachment_download/ --target_subject ".*spending.*" --start_date "16-May-2024" --end_date "18-May-2024" | |
#python3 /custom-scripts/gmail_attachment_download.py --download_dir /opt/gmail_attachment_download/ --target_subject ".*spending.*" | |
#python3 /custom-scripts/gmail_attachment_download.py -h | |
import imaplib | |
import email | |
import os | |
import argparse | |
import re | |
from datetime import datetime, timedelta | |
# Function to download attachments | |
def download_attachments(download_dir, target_subject_pattern, start_date, end_date): | |
print("Connecting to Gmail server...") | |
# IMAP configuration | |
imap_host = 'imap.gmail.com' | |
imap_port = 993 | |
# You need to provide your own credentials here | |
username = '' | |
app_password = '' | |
# Connect to the server | |
mail = imaplib.IMAP4_SSL(imap_host, imap_port) | |
mail.login(username, app_password) | |
# Select the mailbox | |
mail.select('inbox') | |
print("Searching for emails between", start_date, "and", end_date) | |
# Search for emails between start_date and end_date | |
result, data = mail.search(None, '(SINCE "{}") (BEFORE "{}")'.format(start_date, end_date)) | |
for num in data[0].split(): | |
# Fetch the email | |
result, email_data = mail.fetch(num, '(RFC822)') | |
raw_email = email_data[0][1] | |
msg = email.message_from_bytes(raw_email) | |
# Get the email subject | |
subject = msg.get('Subject') | |
# Check if the email subject matches the target subject pattern using regex | |
if re.search(target_subject_pattern, subject): | |
print(f"Found email with subject matching '{target_subject_pattern}'.") | |
# Check if the email has attachments | |
for part in msg.walk(): | |
if part.get_content_maintype() == 'multipart': | |
continue | |
if part.get('Content-Disposition'): | |
filename = part.get_filename() | |
if filename: | |
# Extract attachment timestamp from email headers | |
timestamp = email.utils.parsedate_to_datetime(msg['Date']) | |
if timestamp: | |
timestamp_str = timestamp.strftime('%Y-%m-%dT%H%M%S') | |
# Download attachment | |
filepath = os.path.join(download_dir, f"{filename}_{timestamp_str}.csv") # Include original timestamp in filename | |
print("Downloading attachment to:", filepath) | |
with open(filepath, 'wb') as f: | |
f.write(part.get_payload(decode=True)) | |
print(f"Attachment '{filename}' downloaded successfully to {filepath}") | |
# Set modification time of the file | |
os.utime(filepath, (timestamp.timestamp(), timestamp.timestamp())) | |
# Logout and close connection | |
mail.logout() | |
print("Script finished.") | |
def main(): | |
parser = argparse.ArgumentParser(description="Download attachments from Gmail.") | |
parser.add_argument("--download_dir", required=True, help="Directory to download attachments") | |
parser.add_argument("--target_subject_pattern", required=True, help="Regular expression pattern to match the target email subject") | |
parser.add_argument("--start_date", default=(datetime.today() - timedelta(days=1)).strftime('%d-%b-%Y'), | |
help="Start date for email search (default: yesterday) in format 'DD-MMM-YYYY'") | |
parser.add_argument("--end_date", default=(datetime.today() + timedelta(days=1)).strftime('%d-%b-%Y'), | |
help="End date for email search (default: tomorrow) in format 'DD-MMM-YYYY'") | |
args = parser.parse_args() | |
# Validate required arguments | |
if not args.download_dir or not args.target_subject_pattern: | |
parser.print_help() | |
parser.exit(1) | |
download_attachments(args.download_dir, args.target_subject_pattern, args.start_date, args.end_date) | |
if __name__ == "__main__": | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#python3 /custom-scripts/gmail_attachment_download.py --download_dir /opt/gmail_attachment_download/ --target_subject "spending" --start_date "16-May-2024" --end_date "18-May-2024" | |
#python3 /custom-scripts/gmail_attachment_download.py --download_dir /opt/gmail_attachment_download/ --target_subject "spending" | |
#python3 /custom-scripts/gmail_attachment_download.py -h | |
import imaplib | |
import email | |
import os | |
import argparse | |
from datetime import datetime, timedelta | |
# Function to download attachments | |
def download_attachments(download_dir, target_subject, start_date, end_date): | |
print("Connecting to Gmail server...") | |
# IMAP configuration | |
imap_host = 'imap.gmail.com' | |
imap_port = 993 | |
# You need to provide your own credentials here | |
username = '' | |
app_password = '' | |
# Connect to the server | |
mail = imaplib.IMAP4_SSL(imap_host, imap_port) | |
mail.login(username, app_password) | |
# Select the mailbox | |
mail.select('inbox') | |
print("Searching for emails between", start_date, "and", end_date) | |
# Search for emails between start_date and end_date | |
result, data = mail.search(None, '(SINCE "{}") (BEFORE "{}")'.format(start_date, end_date)) | |
for num in data[0].split(): | |
# Fetch the email | |
result, email_data = mail.fetch(num, '(RFC822)') | |
raw_email = email_data[0][1] | |
msg = email.message_from_bytes(raw_email) | |
# Get the email subject | |
subject = msg.get('Subject') | |
# Check if the email subject matches the target subject | |
if subject == target_subject: | |
print(f"Found email with subject '{target_subject}'.") | |
# Check if the email has attachments | |
for part in msg.walk(): | |
if part.get_content_maintype() == 'multipart': | |
continue | |
if part.get('Content-Disposition'): | |
filename = part.get_filename() | |
if filename: | |
# Extract attachment timestamp from email headers | |
timestamp = email.utils.parsedate_to_datetime(msg['Date']) | |
if timestamp: | |
timestamp_str = timestamp.strftime('%Y-%m-%dT%H%M%S') | |
# Download attachment | |
filepath = os.path.join(download_dir, f"{filename}_{timestamp_str}.csv") # Include original timestamp in filename | |
print("Downloading attachment to:", filepath) | |
with open(filepath, 'wb') as f: | |
f.write(part.get_payload(decode=True)) | |
print(f"Attachment '{filename}' downloaded successfully to {filepath}") | |
# Set modification time of the file | |
os.utime(filepath, (timestamp.timestamp(), timestamp.timestamp())) | |
# Logout and close connection | |
mail.logout() | |
print("Script finished.") | |
def main(): | |
parser = argparse.ArgumentParser(description="Download attachments from Gmail.") | |
parser.add_argument("--download_dir", required=True, help="Directory to download attachments") | |
parser.add_argument("--target_subject", required=True, help="Target email subject") | |
parser.add_argument("--start_date", default=(datetime.today() - timedelta(days=1)).strftime('%d-%b-%Y'), | |
help="Start date for email search (default: yesterday) in format 'DD-MMM-YYYY'") | |
parser.add_argument("--end_date", default=(datetime.today() + timedelta(days=1)).strftime('%d-%b-%Y'), | |
help="End date for email search (default: tomorrow) in format 'DD-MMM-YYYY'") | |
args = parser.parse_args() | |
# Validate required arguments | |
if not args.download_dir or not args.target_subject: | |
parser.print_help() | |
parser.exit(1) | |
download_attachments(args.download_dir, args.target_subject, args.start_date, args.end_date) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment