"""A classifier that uses a CDB database.
A CDB wordinfo database is quite small and fast but is slow to update.
It is appropriate if training is done rarely (e.g. monthly or weekly using
archived ham and spam). See mailsort.py for an example application that
uses this classifier.
"""
from spambayes import cdb
from spambayes.tokenizer import tokenize
from spambayes.classifier import Classifier
class CdbClassifier(Classifier):
def __init__(self, cdbfile=None):
Classifier.__init__(self)
if cdbfile is not None:
self.wordinfo = cdb.Cdb(cdbfile)
def probability(self, record):
return float(record)
def save_wordinfo(self, db_file):
items = []
for word, record in self.wordinfo.iteritems():
prob = Classifier.probability(self, record)
items.append((word, str(prob)))
cdb.cdb_make(db_file, items)
syntax highlighted by Code2HTML, v. 0.9.1