#!/usr/local/bin/python2.3
## A hammie front-end to make the simple stuff simple.
##
##
## The intent is to call this from procmail and its ilk like so:
##
## :0 fw
## | sb_filter.py
##
## Then, you can set up your MUA to pipe ham and spam to it, one at a
## time, by calling it with either the -g or -s options, respectively.
##
## Author: Neale Pickett <neale@woozle.org>
##
"""Usage: %(program)s [options] [filenames]
Options can one or more of:
-h
show usage and exit
-x
show some usage examples and exit
-d DBFILE
use database in DBFILE
-p PICKLEFILE
use pickle (instead of database) in PICKLEFILE
-n
create a new database
* -f
filter (default if no processing options are given)
* -g
[EXPERIMENTAL] (re)train as a good (ham) message
* -s
[EXPERIMENTAL] (re)train as a bad (spam) message
* -t
[EXPERIMENTAL] filter and train based on the result -- you must
make sure to untrain all mistakes later. Not recommended.
* -G
[EXPERIMENTAL] untrain ham (only use if you've already trained
this message)
* -S
[EXPERIMENTAL] untrain spam (only use if you've already trained
this message)
-o section:option:value
set [section, option] in the options database to value
All options marked with '*' operate on stdin, and write the resultant
message to stdout.
If no filenames are given on the command line, standard input will be
processed as a single message. If one or more filenames are given on the
command line, each will be processed according to the following rules:
* If the filename is '-', standard input will be processed as a single
message (may only be usefully given once).
* If the filename starts with '+' it will be processed as an MH folder.
* If the filename is a directory and it contains a subdirectory named
'cur', it will be processed as a Maildir.
* If the filename is a directory and it contains a subdirectory named
'Mail', it will be processed as an MH Mailbox.
* If the filename is a directory and not a Maildir nor an MH Mailbox, it
will be processed as a Mailbox directory consisting of just .txt and
.lorien files.
* Otherwise, the filename is treated as a Unix-style mailbox (messages
begin on a line starting with 'From ').
Output is always to standard output as a Unix-style mailbox.
"""
import os
import sys
import getopt
from spambayes import hammie, Options, mboxutils, storage
from spambayes.Version import get_version_string
try:
True, False
except NameError:
# Maintain compatibility with Python 2.2
True, False = 1, 0
# See Options.py for explanations of these properties
program = sys.argv[0]
example_doc = """_Examples_
filter a message on disk:
%(program)s < message
(re)train a message as ham:
%(program)s -g < message
(re)train a message as spam:
%(program)s -s < message
procmail recipe to filter and train in one step:
:0 fw
| %(program)s -t
mutt configuration: This binds the 'H' key to retrain the message as
ham, and prompt for a folder to move it to. The 'S' key retrains as
spam, and moves to a 'spam' folder. See contrib/muttrc in the spambayes
distribution for other neat mutt tricks.
macro index S "|sb_filter.py -s | procmail\n"
macro pager S "|sb_filter.py -s | procmail\n"
macro index H "|sb_filter.py -g | procmail\n"
macro pager H "|sb_filter.py -g | procmail\n"
color index red black "~h 'X-Spambayes-Disposition: spam' ~F"
"""
def examples():
print example_doc % globals()
sys.exit(0)
def usage(code, msg=''):
"""Print usage message and sys.exit(code)."""
# Include version info in usage
print >> sys.stderr, get_version_string("sb_filter")
print >> sys.stderr, " with engine %s" % get_version_string()
print >> sys.stderr
if msg:
print >> sys.stderr, msg
print >> sys.stderr
print >> sys.stderr, __doc__ % globals()
sys.exit(code)
class HammieFilter(object):
def __init__(self):
options = Options.options
# This is a bit of a hack to counter the default for
# persistent_storage_file changing from ~/.hammiedb to hammie.db
# This will work unless a user:
# * had hammie.db as their value for persistent_storage_file, and
# * their config file was loaded by Options.py.
if options["Storage", "persistent_storage_file"] == \
options.default("Storage", "persistent_storage_file"):
options["Storage", "persistent_storage_file"] = \
"~/.hammiedb"
options.merge_files(['/etc/hammierc',
os.path.expanduser('~/.hammierc')])
self.dbname, self.usedb = storage.database_type([])
self.h = None
def open(self, mode):
if self.h is None or self.mode != mode:
if self.h is not None:
if self.mode != 'r':
self.h.store()
self.mode = mode
self.h = hammie.open(self.dbname, self.usedb, self.mode)
def close(self):
if self.h is not None:
if self.mode != 'r':
self.h.store()
self.h = None
__del__ = close
def newdb(self):
self.open('n')
self.close()
print >> sys.stderr, "Created new database in", self.dbname
def filter(self, msg):
self.open('r')
return self.h.filter(msg)
def filter_train(self, msg):
self.open('c')
return self.h.filter(msg, train=True)
def train_ham(self, msg):
self.open('c')
self.h.train_ham(msg, True)
self.h.store()
def train_spam(self, msg):
self.open('c')
self.h.train_spam(msg, True)
self.h.store()
def untrain_ham(self, msg):
self.open('c')
self.h.untrain_ham(msg)
self.h.store()
def untrain_spam(self, msg):
self.open('c')
self.h.untrain_spam(msg)
self.h.store()
def main():
h = HammieFilter()
actions = []
opts, args = getopt.getopt(sys.argv[1:], 'hxd:p:nfgstGSo:',
['help', 'examples', 'option='])
create_newdb = False
for opt, arg in opts:
if opt in ('-h', '--help'):
usage(0)
elif opt in ('-x', '--examples'):
examples()
elif opt in ('-o', '--option'):
Options.options.set_from_cmdline(arg, sys.stderr)
elif opt == '-f':
actions.append(h.filter)
elif opt == '-g':
actions.append(h.train_ham)
elif opt == '-s':
actions.append(h.train_spam)
elif opt == '-t':
actions.append(h.filter_train)
elif opt == '-G':
actions.append(h.untrain_ham)
elif opt == '-S':
actions.append(h.untrain_spam)
elif opt == "-n":
create_newdb = True
h.dbname, h.usedb = storage.database_type(opts)
if create_newdb:
h.newdb()
sys.exit(0)
if actions == []:
actions = [h.filter]
if not args:
args = ["-"]
for fname in args:
mbox = mboxutils.getmbox(fname)
for msg in mbox:
for action in actions:
action(msg)
if args == ["-"]:
unixfrom = msg.get_unixfrom() is not None
else:
unixfrom = True
result = mboxutils.as_string(msg, unixfrom=unixfrom)
sys.stdout.write(result)
if __name__ == "__main__":
main()
syntax highlighted by Code2HTML, v. 0.9.1