#!/usr/local/bin/python2.3
"""
rates.py basename ...
Assuming that file
basename + '.txt'
or
basename
contains output from one of the test drivers (timcv, mboxtest, timtest),
scans that file for summary statistics, displays them to stdout, and also
writes them to file
basename + 's.txt'
(where the 's' means 'summary'). This doesn't need a full output file
from a test run, and will display stuff for as far as the output file
has gotten so far.
Two of these summary files can later be fed to cmp.py.
"""
import sys
"""
-> Training on Data/Ham/Set2-3 & Data/Spam/Set2-3 ... 8000 hams & 5500 spams
-> Predicting Data/Ham/Set1 & Data/Spam/Set1 ...
-> <stat> tested 4000 hams & 2750 spams against 8000 hams & 5500 spams
-> <stat> false positive %: 0.025
-> <stat> false negative %: 0.327272727273
-> <stat> 1 new false positives
"""
def doit(basename):
if basename.endswith('.txt'):
basename = basename[:-4]
try:
ifile = file(basename + '.txt')
except IOError:
ifile = file(basename)
interesting = filter(lambda line: line.startswith('-> '), ifile)
ifile.close()
oname = basename + 's.txt'
ofile = file(oname, 'w')
print basename, '->', oname
def dump(*stuff):
msg = ' '.join(map(str, stuff))
print msg
print >> ofile, msg
ntests = nfn = nfp = 0
sumfnrate = sumfprate = 0.0
for line in interesting:
dump(line[:-1])
fields = line.split()
# 0 1 2 3 4 5 6 -5 -4 -3 -2 -1
#-> <stat> tested 4000 hams & 2750 spams against 8000 hams & 5500 spams
if line.startswith('-> <stat> tested '):
ntests += 1
continue
# 0 1 2 3
# -> <stat> false positive %: 0.025
# -> <stat> false negative %: 0.327272727273
if line.startswith('-> <stat> false '):
kind = fields[3]
percent = float(fields[-1])
if kind == 'positive':
sumfprate += percent
lastval = percent
else:
sumfnrate += percent
dump(' %7.3f %7.3f' % (lastval, percent))
continue
# 0 1 2 3 4 5
# -> <stat> 1 new false positives
if len(fields) >= 5 and fields[3] == 'new' and fields[4] == 'false':
kind = fields[-1]
count = int(fields[2])
if kind == 'positives':
nfp += count
else:
nfn += count
dump('total unique false pos', nfp)
dump('total unique false neg', nfn)
dump('average fp %', sumfprate / ntests)
dump('average fn %', sumfnrate / ntests)
for name in sys.argv[1:]:
doit(name)
syntax highlighted by Code2HTML, v. 0.9.1