ports//mail/py-spambayes/work/spambayes-1.0.4/Outlook2000/oastats.py

# oastats.py - Outlook Addin Stats

class Stats:
    def __init__(self, config):
        self.config = config
        self.Reset()
    def Reset(self):
        self.num_ham = self.num_spam = self.num_unsure = 0
        self.num_deleted_spam = self.num_deleted_spam_fn  = 0
        self.num_recovered_good = self.num_recovered_good_fp = 0
    def RecordClassification(self, score):
        score *= 100 # same units as our config values.
        if score >= self.config.filter.spam_threshold:
            self.num_spam += 1
        elif score >= self.config.filter.unsure_threshold:
            self.num_unsure += 1
        else:
            self.num_ham += 1
    def RecordManualClassification(self, recover_as_good, score):
        score *= 100 # same units as our config values.
        if recover_as_good:
            self.num_recovered_good += 1
            # If we are recovering an item that is in the "spam" threshold,
            # then record it as a "false positive"
            if score > self.config.filter.spam_threshold:
                self.num_recovered_good_fp += 1
        else:
            self.num_deleted_spam += 1
            # If we are deleting as Spam an item that was in our "good" range,
            # then record it as a false neg.
            if score < self.config.filter.unsure_threshold:
                self.num_deleted_spam_fn += 1
    def GetStats(self):
        num_seen = self.num_ham + self.num_spam + self.num_unsure
        if num_seen==0:
            return ["SpamBayes has processed zero messages"]
        chunks = []
        push = chunks.append
        perc_ham = 100.0 * self.num_ham / num_seen
        perc_spam = 100.0 * self.num_spam / num_seen
        perc_unsure = 100.0 * self.num_unsure / num_seen
        format_dict = dict(perc_spam=perc_spam, perc_ham=perc_ham,
                           perc_unsure=perc_unsure, num_seen = num_seen)
        format_dict.update(self.__dict__)
        push("SpamBayes has processed %(num_seen)d messages - " \
             "%(num_ham)d (%(perc_ham).0f%%) good, " \
             "%(num_spam)d (%(perc_spam).0f%%) spam " \
             "and %(num_unsure)d (%(perc_unsure).0f%%) unsure" % format_dict)
        if self.num_recovered_good:
            push("%(num_recovered_good)d message(s) were manually " \
                 "classified as good (with %(num_recovered_good_fp)d " \
                 "being false positives)" % format_dict)
        else:
            push("No messages were manually classified as good")
        if self.num_deleted_spam:
            push("%(num_deleted_spam)d message(s) were manually " \
                 "classified as spam (with %(num_deleted_spam_fn)d " \
                 "being false negatives)" % format_dict)
        else:
            push("No messages were manually classified as spam")
        return chunks

if __name__=='__main__':
    class FilterConfig:
        unsure_threshold = 15
        spam_threshold = 85
    class Config:
        filter = FilterConfig()
    # processed zero
    s = Stats(Config())
    print "\n".join(s.GetStats())
    # No recovery
    s = Stats(Config())
    s.RecordClassification(.2)
    print "\n".join(s.GetStats())

    s = Stats(Config())
    s.RecordClassification(.2)
    s.RecordClassification(.1)
    s.RecordClassification(.4)
    s.RecordClassification(.9)
    s.RecordManualClassification(True, 0.1)
    s.RecordManualClassification(True, 0.9)
    s.RecordManualClassification(False, 0.1)
    s.RecordManualClassification(False, 0.9)
    print "\n".join(s.GetStats())
syntax highlighted by Code2HTML, v. 0.9.1