Skip to content

Instantly share code, notes, and snippets.

@felipeandres254
Created July 1, 2017 02:59
Show Gist options
  • Save felipeandres254/728d64b7b46534116222a8423d9eafb0 to your computer and use it in GitHub Desktop.
Save felipeandres254/728d64b7b46534116222a8423d9eafb0 to your computer and use it in GitHub Desktop.
Split a Gmail MBOX file into individual mail files
#!/usr/bin/env python3
import os, re, sys, time, datetime
from dateutil.parser import parse
from dateutil.tz import tzutc
def seg2hms( value ):
return str(datetime.timedelta(seconds=value))
def datasize( value ):
idx, prefix = 0, ["", "K", "M", "G", "T"]
while value>1024:
idx, value = idx+1, value/1024
return "{:.2f} {}B".format(value, prefix[idx])
def print_info( mbox, msize, current, processed, total, start ):
percent = 100*processed//total if total!=0 else 0
ellapsed = int(time.time()) - start
eta = int(ellapsed*(total-processed)/processed) if processed!=0 else 0
print("\033[1;1H" + " "*60)
print(" Mailbox {} ({:d})".format(mbox, msize).ljust(60, " "))
print(" Current email {}".format(datasize(current)).ljust(60, " "))
print(" Ellapsed / ETA {} / {}".format(seg2hms(ellapsed), seg2hms(eta)).ljust(60, " "))
print(" Processed ({}) {:d}% of {}".format(datasize(processed), percent, datasize(total)).ljust(60, " "))
print((" "*60 + "\n")*2)
def split_mbox( mbox ):
idx, CURRENT, PROCESSED = 0, 0, 0
TOTAL = os.path.getsize(mbox + ".mbox")
START = int(time.time())
if not os.path.isdir( mbox ):
os.mkdir(mbox)
with open(mbox + ".mbox", "r") as f:
for line in f:
if line.startswith("From "):
idx, CURRENT = idx+1, 0
open(mbox + "/" + ("{:05d}".format(idx)), "w").write(line)
else:
open(mbox + "/" + ("{:05d}".format(idx)), "a").write(line)
CURRENT += len(line)
PROCESSED += len(line)
print_info( mbox, idx, CURRENT, PROCESSED, TOTAL, START )
os.system("clear")
if __name__!="__main__" or len(sys.argv)!=2:
exit()
if not os.path.exists(sys.argv[1] + ".mbox"):
print("'{}' Not found! :/".format(sys.argv[1]))
exit()
split_mbox( sys.argv[1] )
requests==2.12.2
python-dateutil==2.6.0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment