#!/usr/bin/python
# Standard libraries
import os
import re
import uuid
import email
import imaplib
import datetime
import threading
# Non-standard libraries (Must be installed)
import imapclient
# Script settings
imaplib._MAXLINE = 1000000 # Disregard imaplib's 10K byte limit by overriding to 1M
basedir = "E-mails" # Base directory for downloaded mails
alphanumrx = re.compile('[\W_]+') # Regex for removing non-alphanumeric characters from a string
server = "imap-mail.outlook.com"
port = 993
user = ""
password = ""
# Utility functions
def split_seq(seq, num_pieces):
start = 0
for i in range(num_pieces):
stop = start + len(seq[i::num_pieces])
yield seq[start:stop]
start = stop
# Class definition
class IMAPDumper:
def __init__(self, server, port, username, password):
# Connection info
self.server = server
self.port = port
self.username = username
self.password = password
self.mailbox = None # Current mailbox, used when reconnecting
self.mboxdir = None
self.connect()
def connect(self):
self.conn = imapclient.IMAPClient(self.server, self.port, ssl=True)
self.conn.login(self.username, self.password)
if self.mailbox is not None:
self.conn.select_folder(self.mailbox, readonly=True)
def reconnect(self, error):
print("Connection failed: %s" % error)
print("Reconnecting")
self.connect()
def enumerateMailboxes(self):
print("Getting mailbox list")
mailboxes = self.conn.list_folders()
# Filter out strings I don't understand
mailboxes2 = []
for mailbox in mailboxes:
if not isinstance(mailbox, str):
mailboxes2.append(mailbox[2])
return mailboxes2
def enumerateMails(self, mailbox):
# Get UIDs for mails in mailbox
print("Enumerating mails in %s" % mailbox)
self.setMailbox(mailbox)
while True:
try:
maillist = self.conn.search(['ALL'])
break
except (imaplib.error, imapclient.IMAPClient.Error) as ex:
self.reconnect(ex)
# Create a directory to download the mails into if it doesn't already exist
if not os.path.exists(self.mboxdir):
os.makedirs(self.mboxdir)
print(" Added %d to queue" % len(maillist))
return maillist
def setMailbox(self, mailbox):
self.conn.select_folder(mailbox, readonly=True)
self.mailbox = mailbox
self.mboxdir = os.path.join(basedir, self.username, self.mailbox)
def fetchMail(self, mailid):
while True:
try:
filename = os.path.join(self.mboxdir, str(mailid) + ".txt")
# Check if mail is already downloaded
if not os.path.exists(filename) or os.path.getsize(filename) == 0:
# Download and save e-mail
mail = self.conn.fetch(mailid, ['BODY.PEEK[]'], None)[mailid]['BODY[]']
with open(filename, "w", encoding="utf-8") as f:
f.write(mail)
# Save attachments to seperate file
self.saveAttachments(mailid, mail)
break
except Exception as ex:
raise
#self.reconnect(ex)
def saveAttachments(self, uid, mailtext):
mail = email.message_from_string(mailtext)
for part in mail.walk():
# Determine if part is an attachment
if part.get_content_maintype() == 'multipart' or part.get('Content-Disposition') is None:
continue
# Get extension and generate random filename (.bin on unknown MIME type)
filename = part.get_filename()
if filename is None:
filename = str(uuid.uuid4()) + ".bin"
else:
ext = alphanumrx.sub('', filename.split(".")[-1])
filename = str(uuid.uuid4()) + "." + ext
# Write attachment to file
filepath = os.path.join(self.mboxdir, str(uid) + "_" + filename)
with open(filepath, 'bw') as f:
try:
f.write(part.get_payload(decode=True))
except AssertionError as ex:
pass
# Worker class for multithreading
class Worker(threading.Thread):
def __init__(self, server, port, user, password, mailbox, maillist):
threading.Thread.__init__(self)
self.maillist = maillist
def run(self):
self.dumper = IMAPDumper(server, port, user, password)
self.dumper.setMailbox(mailbox)
for mailid in self.maillist:
print("Fetching %d" % mailid)
self.dumper.fetchMail(mailid)
# Run script
print("Starting...")
dumper = IMAPDumper(server, port, user, password)
mailboxes = dumper.enumerateMailboxes()
for mailbox in mailboxes:
maillist = dumper.enumerateMails(mailbox)
### THREADED APPROACH
print("Starting worker threads")
threads = []
for list in split_seq(maillist, 25):
threads.append(Worker(server, port, user, password, mailbox, list))
for thread in threads:
thread.start()
for thread in threads:
thread.join()
### UNTHREADED APPROACH
#mailcounter = 0
#for mailid in maillist:
# mailcounter += 1
# print("[%d/%d] Fetching %d\r" % (mailcounter, len(maillist), mailid), end="")
# dumper.fetchMail(mailid)