#!/usr/bin/env python """An IMAP filter. An IMAP message box is scanned and all non-scored messages are scored and (where necessary) filtered. The original filter design owed much to isbg by Roger Binns (http://www.rogerbinns.com/isbg). Usage: sb_imapfilter [options] note: option values with spaces in them must be enclosed in double quotes options: -p dbname : pickled training database filename -d dbname : dbm training database filename -t : train contents of spam folder and ham folder -c : classify inbox -h : display this message -v : verbose mode -P : security option to prompt for imap password, rather than look in options["imap", "password"] -e y/n : expunge/purge messages on exit (y) or not (n) -i debuglvl : a somewhat mysterious imaplib debugging level (4 is a good level, and suitable for bug reports) -l minutes : period of time between filtering operations -b : Launch a web browser showing the user interface. (If not specified, and neither the -c or -t options are used, then this will default to the value in your configuration file). -o section:option:value : set [section, option] in the options database to value Examples: Classify inbox, with dbm database sb_imapfilter -c -d bayes.db Train Spam and Ham, then classify inbox, with dbm database sb_imapfilter -t -c -d bayes.db Train Spam and Ham only, with pickled database sb_imapfilter -t -p bayes.db Warnings: o This is alpha software! The filter is currently being developed and tested. We do *not* recommend using it on a production system unless you are confident that you can get your mail back if you lose it. On the other hand, we do recommend that you test it for us and let us know if anything does go wrong. o By default, the filter does *not* delete, modify or move any of your mail. Due to quirks in how imap works, new versions of your mail are modified and placed in new folders, but the originals are still available. These are flagged with the /Deleted flag so that you know that they can be removed. Your mailer may not show these messages by default, but there should be an option to do so. *However*, if your mailer automatically purges/expunges (i.e. permanently deletes) mail flagged as such, *or* if you set the imap_expunge option to True, then this mail will be irretrievably lost. """ todo = """ To Do: o IMAPMessage and IMAPFolder currently carry out very simple checks of responses received from IMAP commands, but if the response is not "OK", then the filter terminates. Handling of these errors could be much nicer. o IMAP over SSL is relatively untested. o Develop a test script, like spambayes/test/test_pop3proxy.py that runs through some tests (perhaps with a *real* imap server, rather than a dummy one). This would make it easier to carry out the tests against each server whenever a change is made. o IMAP supports authentication via other methods than the plain-text password method that we are using at the moment. Neither of the servers I have access to offer any alternative method, however. If someone's does, then it would be nice to offer this. o Usernames should be able to be literals as well as quoted strings. This might help if the username/password has special characters like accented characters. o Suggestions? """ # This module is part of the spambayes project, which is Copyright 2002-3 # The Python Software Foundation and is covered by the Python Software # Foundation license. __author__ = "Tony Meyer , Tim Stone" __credits__ = "All the Spambayes folk." from __future__ import generators try: True, False except NameError: # Maintain compatibility with Python 2.2 True, False = 1, 0 import socket import os import re import time import sys import getopt import types import traceback import email import email.Parser from getpass import getpass from email.Header import Header from email.Utils import parsedate try: import cStringIO as StringIO except ImportError: import StringIO from spambayes.Options import options, get_pathname_option from spambayes import tokenizer, storage, message, Dibbler from spambayes.UserInterface import UserInterfaceServer from spambayes.ImapUI import IMAPUserInterface from spambayes.Version import get_version_string from imaplib import IMAP4 from imaplib import Time2Internaldate try: if options["imap", "use_ssl"]: from imaplib import IMAP4_SSL as BaseIMAP else: from imaplib import IMAP4 as BaseIMAP except ImportError: from imaplib import IMAP4 as BaseIMAP # global IMAPlib object global imap imap = None # A flag can have any character in the ascii range 32-126 # except for (){ %*"\ FLAG_CHARS = "" for i in range(32, 127): if not chr(i) in ['(', ')', '{', ' ', '%', '*', '"', '\\']: FLAG_CHARS += chr(i) FLAG = r"\\?[" + re.escape(FLAG_CHARS) + r"]+" # The empty flag set "()" doesn't match, so that extract returns # data["FLAGS"] == None FLAGS_RE = re.compile(r"(FLAGS) (\((" + FLAG + r" )*(" + FLAG + r")\))") INTERNALDATE_RE = re.compile(r"(INTERNALDATE) (\"\d{1,2}\-[A-Za-z]{3,3}\-" + r"\d{2,4} \d{2,2}\:\d{2,2}\:\d{2,2} " + r"[\+\-]\d{4,4}\")") RFC822_RE = re.compile(r"(RFC822) (\{[\d]+\})") BODY_PEEK_RE = re.compile(r"(BODY\[\]) (\{[\d]+\})") RFC822_HEADER_RE = re.compile(r"(RFC822.HEADER) (\{[\d]+\})") UID_RE = re.compile(r"(UID) ([\d]+)") FETCH_RESPONSE_RE = re.compile(r"([0-9]+) \(([" + \ re.escape(FLAG_CHARS) + r"\"\{\}\(\)\\ ]*)\)?") LITERAL_RE = re.compile(r"^\{[\d]+\}$") def _extract_fetch_data(response): '''Extract data from the response given to an IMAP FETCH command.''' # Response might be a tuple containing literal data # At the moment, we only handle one literal per response. This # may need to be improved if our code ever asks for something # more complicated (like RFC822.Header and RFC822.Body) if type(response) == types.TupleType: literal = response[1] response = response[0] else: literal = None # the first item will always be the message number mo = FETCH_RESPONSE_RE.match(response) data = {} if mo is None: print """IMAP server gave strange fetch response. Please report this as a bug.""" print response else: data["message_number"] = mo.group(1) response = mo.group(2) # We support the following FETCH items: # FLAGS # INTERNALDATE # RFC822 # UID # RFC822.HEADER # BODY.PEEK # All others are ignored. for r in [FLAGS_RE, INTERNALDATE_RE, RFC822_RE, UID_RE, RFC822_HEADER_RE, BODY_PEEK_RE]: mo = r.search(response) if mo is not None: if LITERAL_RE.match(mo.group(2)): data[mo.group(1)] = literal else: data[mo.group(1)] = mo.group(2) return data class IMAPSession(BaseIMAP): '''A class extending the IMAP4 class, with a few optimizations''' def __init__(self, server, port, debug=0, do_expunge=False): try: BaseIMAP.__init__(self, server, port) except (BaseIMAP.error, socket.gaierror, socket.error): print "Cannot connect to server %s on port %s" % (server, port) sys.exit(-1) self.debug = debug # For efficiency, we remember which folder we are currently # in, and only send a select command to the IMAP server if # we want to *change* folders. This function is used by # both IMAPMessage and IMAPFolder. self.current_folder = None self.do_expunge = do_expunge self.logged_in = False # We override the base read so that we only read a certain amount # of data at a time. OS X and Python has problems with getting # large amounts of memory at a time, so maybe this will be a way we # can work around that (I don't know, and don't have a mac to test, # but we need to try something). self._read = self.read self.read = self.safe_read def login(self, username, pwd): try: BaseIMAP.login(self, username, pwd) # superclass login except BaseIMAP.error, e: if str(e) == "permission denied": print "There was an error logging in to the IMAP server." print "The userid and/or password may be incorrect." sys.exit() else: raise self.logged_in = True def logout(self): # sign off if self.do_expunge: # we may never have logged in, in which case we do nothing if self.logged_in: # expunge messages from the spam and unsure folders for fol in ["spam_folder", "unsure_folder",]: self.select(options["imap", fol]) self.expunge() # expunge messages from the ham and spam training folders for fol_list in ["ham_train_folders", "spam_train_folders",]: for fol in options["imap", fol_list]: self.select(fol) self.expunge() BaseIMAP.logout(self) # superclass logout def SelectFolder(self, folder): '''A method to point ensuing imap operations at a target folder''' if self.current_folder != folder: if self.current_folder != None: if self.do_expunge: # It is faster to do close() than a single # expunge when we log out (because expunge returns # a list of all the deleted messages which we don't do # anything with) imap.close() # We *always* use SELECT and not EXAMINE, because this # speeds things up considerably. if folder == "": # This is Python bug #845560 - if the empty string is # passed, we get a traceback, not just an 'invalid folder' # error, so print out a warning and exit. print "Tried to select an invalid folder" sys.exit(-1) response = self.select(folder, None) if response[0] != "OK": print "Invalid response to select %s:\n%s" % (folder, response) sys.exit(-1) self.current_folder = folder return response def folder_list(self): '''Return a alphabetical list of all folders available on the server''' response = self.list() if response[0] != "OK": return [] all_folders = response[1] folders = [] for fol in all_folders: # Sigh. Some servers may give us back the folder name as a # literal, so we need to crunch this out. if isinstance(fol, ()): r = re.compile(r"{\d+}") m = r.search(fol[0]) if not m: # Something is wrong here! Skip this folder continue fol = '%s"%s"' % (fol[0][:m.start()], fol[1]) r = re.compile(r"\(([\w\\ ]*)\) ") m = r.search(fol) if not m: # Something is not good with this folder, so skip it. continue name_attributes = fol[:m.end()-1] # IMAP is a truly odd protocol. The delimiter is # only the delimiter for this particular folder - each # folder *may* have a different delimiter self.folder_delimiter = fol[m.end()+1:m.end()+2] # a bit of a hack, but we really need to know if this is # the case if self.folder_delimiter == ',': print "WARNING: Your imap server uses a comma as the " \ "folder delimiter. This may cause unpredictable " \ "errors." folders.append(fol[m.end()+4:].strip('"')) folders.sort() return folders def FindMessage(self, id): '''A (potentially very expensive) method to find a message with a given spambayes id (header), and return a message object (no substance).''' # If efficiency becomes a concern, what we could do is store a # dict of key-to-folder, and look in that folder first. (It might # have moved independantly of us, so we would still have to search # if we didn't find it). For the moment, we do an search through # all folders, alphabetically. for folder_name in self.folder_list(): fol = IMAPFolder(folder_name) for msg in fol: if msg.id == id: return msg return None # Maximum amount of data that will be read at any one time. MAXIMUM_SAFE_READ = 4096 def safe_read(self, size): """Read data from remote, but in manageable sizes.""" data = [] while size > 0: if size < self.MAXIMUM_SAFE_READ: to_collect = size else: to_collect = self.MAXIMUM_SAFE_READ data.append(self._read(to_collect)) size -= self.MAXIMUM_SAFE_READ return "".join(data) class IMAPMessage(message.SBHeaderMessage): def __init__(self): message.Message.__init__(self) self.folder = None self.previous_folder = None self.rfc822_command = "(BODY.PEEK[])" self.rfc822_key = "BODY[]" self.got_substance = False self.invalid = False def setFolder(self, folder): self.folder = folder def _check(self, response, command): if response[0] != "OK": print "Invalid response to %s:\n%s" % (command, response) sys.exit(-1) def extractTime(self): # When we create a new copy of a message, we need to specify # a timestamp for the message. If the message has a valid date # header we use that. Otherwise, we use the current time. message_date = self["Date"] if message_date is not None: parsed_date = parsedate(message_date) if parsed_date is not None: try: return Time2Internaldate(time.mktime(parsed_date)) except ValueError: # Invalid dates can cause mktime() to raise a # ValueError, for example: # >>> time.mktime(parsedate("Mon, 06 May 0102 10:51:16 -0100")) # Traceback (most recent call last): # File "", line 1, in ? # ValueError: year out of range # (Why this person is getting mail from almost two # thousand years ago is another question ). # In any case, we just pass and use the current date. pass except OverflowError: pass return Time2Internaldate(time.time()) def get_substance(self): '''Retrieve the RFC822 message from the IMAP server and set as the substance of this message.''' if self.got_substance: return if not self.uid or not self.id: print "Cannot get substance of message without an id and an UID" return imap.SelectFolder(self.folder.name) try: response = imap.uid("FETCH", self.uid, self.rfc822_command) except IMAP4.error: self.rfc822_command = "RFC822" self.rfc822_key = "RFC822" response = imap.uid("FETCH", self.uid, self.rfc822_command) if response[0] != "OK": self.rfc822_command = "RFC822" self.rfc822_key = "RFC822" response = imap.uid("FETCH", self.uid, self.rfc822_command) self._check(response, "uid fetch") data = _extract_fetch_data(response[1][0]) # Annoyingly, we can't just pass over the RFC822 message to an # existing message object (like self) and have it parse it. So # we go through the hoops of creating a new message, and then # copying over all its internals. try: new_msg = email.Parser.Parser().parsestr(data[self.rfc822_key]) # We use a general 'except' because the email package doesn't # always return email.Errors (it can return a TypeError, for # example) if the email is invalid. In any case, we want # to keep going, and not crash, because we might leave the # user's mailbox in a bad state if we do. Better to soldier # on. except: # Yikes! Barry set this to return at this point, which # would work ok for training (IIRC, that's all he's # using it for), but for filtering, what happens is that # the message ends up blank, but ok, so the original is # flagged to be deleted, and a new (almost certainly # unsure) message, *with only the spambayes headers* is # created. The nice solution is still to do what sb_server # does and have a X-Spambayes-Exception header with the # exception data and then the original message. self.invalid = True # This is nicked from sb_server - thanks Richie! stream = StringIO.StringIO() traceback.print_exc(None, stream) details = stream.getvalue() # Build the header. This will strip leading whitespace from # the lines, so we add a leading dot to maintain indentation. detailLines = details.strip().split('\n') dottedDetails = '\n.'.join(detailLines) headerName = 'X-Spambayes-Exception' header = Header(dottedDetails, header_name=headerName) # Insert the header, converting email.Header's '\n' line # breaks to IMAP4's '\r\n'. # (Also insert the id header, otherwise we'll keep doing # this message over and over again). headers, body = re.split(r'\n\r?\n', data["RFC822"], 1) header = re.sub(r'\r?\n', '\r\n', str(header)) headers += "\n%s: %s\r\n%s: %s\r\n\r\n" % \ (headerName, header, options["Headers", "mailid_header_name"], self.id) self.invalid_content = headers + body # Print the exception and a traceback. print >>sys.stderr, details else: self._headers = new_msg._headers self._unixfrom = new_msg._unixfrom self._payload = new_msg._payload self._charset = new_msg._charset self.preamble = new_msg.preamble self.epilogue = new_msg.epilogue self._default_type = new_msg._default_type if not self.has_key(options["Headers", "mailid_header_name"]): self[options["Headers", "mailid_header_name"]] = self.id self.got_substance = True if options["globals", "verbose"]: sys.stdout.write(chr(8) + "*") def MoveTo(self, dest): '''Note that message should move to another folder. No move is carried out until Save() is called, for efficiency.''' if self.previous_folder is None: self.previous_folder = self.folder self.folder = dest def as_string(self, unixfrom=False): # Basically the same as the parent class's except that we handle # the case where the data was unparsable, so we haven't done any # filtering, and we are not actually a proper email.Message object. if self.invalid: return self._force_CRLF(self.invalid_content) else: return message.SBHeaderMessage.as_string(self, unixfrom) def Save(self): '''Save message to imap server.''' # we can't actually update the message with IMAP # so what we do is create a new message and delete the old one if self.folder is None: raise RuntimeError, """Can't save a message that doesn't have a folder.""" if not self.id: raise RuntimeError, """Can't save a message that doesn't have an id.""" response = imap.uid("FETCH", self.uid, "(FLAGS INTERNALDATE)") self._check(response, 'fetch (flags internaldate)') data = _extract_fetch_data(response[1][0]) if data.has_key("INTERNALDATE"): msg_time = data["INTERNALDATE"] else: msg_time = self.extractTime() if data.has_key("FLAGS"): flags = data["FLAGS"] # The \Recent flag can be fetched, but cannot be stored # We must remove it from the list if it is there. flags = re.sub(r"\\Recent ?| ?\\Recent", "", flags) else: flags = None for flgs, tme in [(flags, msg_time), (None, msg_time), (flags, Time2Internaldate(time.time())), (None, Time2Internaldate(time.time()))]: response = imap.append(self.folder.name, flgs, tme, self.as_string()) if response[0] == "OK": break self._check(response, 'append') if self.previous_folder is None: imap.SelectFolder(self.folder.name) else: imap.SelectFolder(self.previous_folder.name) self.previous_folder = None response = imap.uid("STORE", self.uid, "+FLAGS.SILENT", "(\\Deleted \\Seen)") self._check(response, 'store') # Not all IMAP servers immediately offer the new message # (stupidly), but we need to find it. Generally a 'no-op' will # allow the server time to handle it, so do that. # See [ 941596 ] sb_imapfilter.py not adding headers / moving messages imap.noop() # We need to update the uid, as it will have changed. # Although we don't use the UID to keep track of messages, we do # have to use it for IMAP operations. imap.SelectFolder(self.folder.name) response = imap.uid("SEARCH", "(UNDELETED HEADER %s \"%s\")" % \ (options["Headers", "mailid_header_name"], self.id.replace('\\',r'\\').replace('"',r'\"'))) self._check(response, 'search') new_id = response[1][0] # See [ 870799 ] imap trying to fetch invalid message UID # It seems that although the save gave a "NO" response to the # first save, the message was still saved (without the flags, # probably). This isn't really good behaviour on the server's # part, but, as usual, we try and deal with it. So, if we get # more than one undeleted message with the same SpamBayes id, # delete all of them apart from the last one, and use that. multiple_ids = new_id.split() for id_to_remove in multiple_ids[:-1]: response = imap.uid("STORE", id_to_remove, "+FLAGS.SILENT", "(\\Deleted \\Seen)") self._check(response, 'store') if multiple_ids: new_id = multiple_ids[-1] else: # Let's hope it doesn't, but, just in case, if the search # turns up empty, we make the assumption that the new # message is the last one with a recent flag response = imap.uid("SEARCH", "RECENT") new_id = response[1][0] if new_id.find(' ') > -1: ids = new_id.split(' ') new_id = ids[-1] # Ok, now we're in trouble if we still haven't found it. # We make a huge assumption that the new message is the one # with the highest UID (they are sequential, so this will be # ok as long as another message hasn't also arrived) if new_id == "": response = imap.uid("SEARCH", "ALL") new_id = response[1][0] if new_id.find(' ') > -1: ids = new_id.split(' ') new_id = ids[-1] self.uid = new_id # This performs a similar function to email.message_from_string() def imapmessage_from_string(s, _class=IMAPMessage, strict=False): return email.message_from_string(s, _class, strict) class IMAPFolder(object): def __init__(self, folder_name): self.name = folder_name # Unique names for cached messages - see _generate_id below. self.lastBaseMessageName = '' self.uniquifier = 2 def __cmp__(self, obj): '''Two folders are equal if their names are equal''' if obj is None: return False return cmp(self.name, obj.name) def _check(self, response, command): if response[0] != "OK": print "Invalid response to %s:\n%s" % (command, response) sys.exit(-1) def __iter__(self): '''IMAPFolder is iterable''' for key in self.keys(): try: yield self[key] except KeyError: pass def recent_uids(self): '''Returns uids for all the messages in the folder that are flagged as recent, but not flagged as deleted.''' imap.SelectFolder(self.name, True) response = imap.uid("SEARCH", "RECENT UNDELETED") self._check(response, "SEARCH RECENT UNDELETED") return response[1][0].split(' ') def keys(self): '''Returns *uids* for all the messages in the folder not marked as deleted.''' imap.SelectFolder(self.name) response = imap.uid("SEARCH", "UNDELETED") self._check(response, "SEARCH UNDELETED") if response[1][0]: return response[1][0].split(' ') else: return [] def __getitem__(self, key): '''Return message (no substance) matching the given *uid*.''' # We don't retrieve the substances of the message here - you need # to call msg.get_substance() to do that. imap.SelectFolder(self.name) # Using RFC822.HEADER.LINES would be better here, but it seems # that not all servers accept it, even though it is in the RFC response = imap.uid("FETCH", key, "RFC822.HEADER") self._check(response, "uid fetch header") data = _extract_fetch_data(response[1][0]) msg = IMAPMessage() msg.setFolder(self) msg.uid = key r = re.compile(re.escape(options["Headers", "mailid_header_name"]) + \ "\:\s*(\d+(\-\d)?)") mo = r.search(data["RFC822.HEADER"]) if mo is None: msg.setId(self._generate_id()) # Unfortunately, we now have to re-save this message, so that # our id is stored on the IMAP server. Before anyone suggests # it, we can't store it as a flag, because user-defined flags # aren't supported by all IMAP servers. # This will need to be done once per message. msg.get_substance() msg.Save() else: msg.setId(mo.group(1)) if options["globals", "verbose"]: sys.stdout.write(".") return msg # Lifted straight from pop3proxy.py (under the name getNewMessageName) def _generate_id(self): # The message id is the time it arrived, with a uniquifier # appended if two arrive within one clock tick of each other. messageName = "%10.10d" % long(time.time()) if messageName == self.lastBaseMessageName: messageName = "%s-%d" % (messageName, self.uniquifier) self.uniquifier += 1 else: self.lastBaseMessageName = messageName self.uniquifier = 2 return messageName def Train(self, classifier, isSpam): '''Train folder as spam/ham''' num_trained = 0 for msg in self: if msg.GetTrained() == (not isSpam): msg.get_substance() msg.delSBHeaders() classifier.unlearn(msg.asTokens(), not isSpam) # Once the message has been untrained, it's training memory # should reflect that on the off chance that for some reason # the training breaks, which happens on occasion (the # tokenizer is not yet perfect) msg.RememberTrained(None) if msg.GetTrained() is None: msg.get_substance() saved_headers = msg.currentSBHeaders() msg.delSBHeaders() classifier.learn(msg.asTokens(), isSpam) num_trained += 1 msg.RememberTrained(isSpam) if isSpam: move_opt_name = "move_trained_spam_to_folder" else: move_opt_name = "move_trained_ham_to_folder" if options["imap", move_opt_name] != "": # We need to restore the SpamBayes headers. for header, value in saved_headers.items(): msg[header] = value msg.MoveTo(IMAPFolder(options["imap", move_opt_name])) msg.Save() return num_trained def Filter(self, classifier, spamfolder, unsurefolder): count = {} count["ham"] = 0 count["spam"] = 0 count["unsure"] = 0 for msg in self: if msg.GetClassification() is None: msg.get_substance() (prob, clues) = classifier.spamprob(msg.asTokens(), evidence=True) # add headers and remember classification msg.delSBHeaders() msg.addSBHeaders(prob, clues) cls = msg.GetClassification() if cls == options["Headers", "header_ham_string"]: # we leave ham alone count["ham"] += 1 elif cls == options["Headers", "header_spam_string"]: msg.MoveTo(spamfolder) count["spam"] += 1 else: msg.MoveTo(unsurefolder) count["unsure"] += 1 msg.Save() return count class IMAPFilter(object): def __init__(self, classifier): self.spam_folder = IMAPFolder(options["imap", "spam_folder"]) self.unsure_folder = IMAPFolder(options["imap", "unsure_folder"]) self.classifier = classifier def Train(self): if options["globals", "verbose"]: t = time.time() total_ham_trained = 0 total_spam_trained = 0 if options["imap", "ham_train_folders"] != "": ham_training_folders = options["imap", "ham_train_folders"] for fol in ham_training_folders: # Select the folder to make sure it exists imap.SelectFolder(fol) if options['globals', 'verbose']: print " Training ham folder %s" % (fol) folder = IMAPFolder(fol) num_ham_trained = folder.Train(self.classifier, False) total_ham_trained += num_ham_trained if options['globals', 'verbose']: print " %s trained." % (num_ham_trained) if options["imap", "spam_train_folders"] != "": spam_training_folders = options["imap", "spam_train_folders"] for fol in spam_training_folders: # Select the folder to make sure it exists imap.SelectFolder(fol) if options['globals', 'verbose']: print " Training spam folder %s" % (fol) folder = IMAPFolder(fol) num_spam_trained = folder.Train(self.classifier, True) total_spam_trained += num_spam_trained if options['globals', 'verbose']: print " %s trained." % (num_spam_trained) if total_ham_trained or total_spam_trained: self.classifier.store() if options["globals", "verbose"]: print "Training took %.4f seconds, %s messages were trained" \ % (time.time() - t, total_ham_trained + total_spam_trained) def Filter(self): if options["globals", "verbose"]: t = time.time() count = {} count["ham"] = 0 count["spam"] = 0 count["unsure"] = 0 # Select the spam folder and unsure folder to make sure they exist imap.SelectFolder(self.spam_folder.name) imap.SelectFolder(self.unsure_folder.name) for filter_folder in options["imap", "filter_folders"]: # Select the folder to make sure it exists imap.SelectFolder(filter_folder) folder = IMAPFolder(filter_folder) subcount = folder.Filter(self.classifier, self.spam_folder, self.unsure_folder) for key in count.keys(): count[key] += subcount.get(key, 0) if options["globals", "verbose"]: if count is not None: print "\nClassified %s ham, %s spam, and %s unsure." % \ (count["ham"], count["spam"], count["unsure"]) print "Classifying took %.4f seconds." % (time.time() - t,) def run(): global imap try: opts, args = getopt.getopt(sys.argv[1:], 'hbPtcvl:e:i:d:p:o:') except getopt.error, msg: print >>sys.stderr, str(msg) + '\n\n' + __doc__ sys.exit() doTrain = False doClassify = False doExpunge = options["imap", "expunge"] imapDebug = 0 sleepTime = 0 promptForPass = False launchUI = False server = "" username = "" for opt, arg in opts: if opt == '-h': print >>sys.stderr, __doc__ sys.exit() elif opt == "-b": launchUI = True elif opt == '-t': doTrain = True elif opt == '-P': promptForPass = True elif opt == '-c': doClassify = True elif opt == '-v': options["globals", "verbose"] = True elif opt == '-e': if arg == 'y': doExpunge = True else: doExpunge = False elif opt == '-i': imapDebug = int(arg) elif opt == '-l': sleepTime = int(arg) * 60 elif opt == '-o': options.set_from_cmdline(arg, sys.stderr) bdbname, useDBM = storage.database_type(opts) # Let the user know what they are using... print get_version_string("IMAP Filter") print "and engine %s.\n" % (get_version_string(),) if (launchUI and (doClassify or doTrain)): print """-b option is exclusive with -c and -t options. The user interface will be launched, but no classification or training will be performed. """ if options["globals", "verbose"]: print "Loading database %s..." % (bdbname), classifier = storage.open_storage(bdbname, useDBM) if options["globals", "verbose"]: print "Done." if options["imap", "server"]: # The options class is ahead of us here: # it knows that imap:server will eventually be able to have # multiple values, but for the moment, we just use the first one server = options["imap", "server"] if len(server) > 0: server = server[0] username = options["imap", "username"] if len(username) > 0: username = username[0] if not promptForPass: pwd = options["imap", "password"] if len(pwd) > 0: pwd = pwd[0] else: pwd = None if not launchUI: print "You need to specify both a server and a username." sys.exit() if promptForPass: pwd = getpass() if server.find(':') > -1: server, port = server.split(':', 1) port = int(port) else: if options["imap", "use_ssl"]: port = 993 else: port = 143 imap_filter = IMAPFilter(classifier) # Web interface if not (doClassify or doTrain): if server != "": imap = IMAPSession(server, port, imapDebug, doExpunge) httpServer = UserInterfaceServer(options["html_ui", "port"]) httpServer.register(IMAPUserInterface(classifier, imap, pwd, IMAPSession)) Dibbler.run(launchBrowser=launchUI or options["html_ui", "launch_browser"]) else: while True: imap = IMAPSession(server, port, imapDebug, doExpunge) imap.login(username, pwd) if doTrain: if options["globals", "verbose"]: print "Training" imap_filter.Train() if doClassify: if options["globals", "verbose"]: print "Classifying" imap_filter.Filter() imap.logout() if sleepTime: time.sleep(sleepTime) else: break if __name__ == '__main__': run()