#!/usr/local/bin/python2.3
"""
cmp.py sbase1 sbase2
Combines output from sbase1.txt and sbase2.txt, which are created by
rates.py from timtest.py output, and displays comparison statistics to
stdout.
"""
import sys
f1n, f2n = sys.argv[1:3]
# Return
# (list of all f-p rates,
# list of all f-n rates,
# total f-p,
# total f-n,
# average f-p rate,
# average f-n rate,
# list of all ham score deviations,
# list of all spam score deviations,
# ham score deviation for all runs,
# spam score deviations for all runs,
# )
# from summary file f.
def suck(f):
fns = []
fps = []
hamdev = []
spamdev = []
hamdevall = spamdevall = (0.0, 0.0)
get = f.readline
while 1:
line = get()
if line.startswith('-> <stat> tested'):
print line,
if line.find(' items; mean ') != -1:
# -> <stat> Ham distribution for this pair: 1000 items; mean 0.05; sample sdev 0.68
# and later "sample " went away
vals = line.split(';')
mean = float(vals[1].split()[-1])
sdev = float(vals[2].split()[-1])
val = (mean, sdev)
typ = vals[0].split()[2]
if line.find('for all runs') != -1:
if typ == 'Ham':
hamdevall = val
else:
spamdevall = val
elif line.find('all in this') != -1:
if typ == 'Ham':
hamdev.append(val)
else:
spamdev.append(val)
continue
if line.startswith('-> '):
continue
if line.startswith('total'):
break
# A line with an f-p rate and an f-n rate.
p, n = map(float, line.split())
fps.append(p)
fns.append(n)
# "total unique false pos 0"
# "total unique false neg 0"
# "average fp % 0.0"
# "average fn % 0.0"
fptot = int(line.split()[-1])
fntot = int(get().split()[-1])
fpmean = float(get().split()[-1])
fnmean = float(get().split()[-1])
return (fps, fns, fptot, fntot, fpmean, fnmean,
hamdev, spamdev, hamdevall, spamdevall)
def tag(p1, p2):
if p1 == p2:
t = "tied "
else:
t = p1 < p2 and "lost " or "won "
if p1:
p = (p2 - p1) * 100.0 / p1
t += " %+7.2f%%" % p
else:
t += " +(was 0)"
return t
def mtag(m1, m2):
mean1, dev1 = m1
mean2, dev2 = m2
t = "%7.2f %7.2f " % (mean1, mean2)
if mean1:
mp = (mean2 - mean1) * 100.0 / mean1
t += "%+7.2f%%" % mp
else:
t += "+(was 0)"
t += " %7.2f %7.2f " % (dev1, dev2)
if dev1:
dp = (dev2 - dev1) * 100.0 / dev1
t += "%+7.2f%%" % dp
else:
t += "+(was 0)"
return t
def dump(p1s, p2s):
alltags = ""
for p1, p2 in zip(p1s, p2s):
t = tag(p1, p2)
print " %5.3f %5.3f %s" % (p1, p2, t)
alltags += t + " "
print
for t in "won", "tied", "lost":
print "%-4s %2d times" % (t, alltags.count(t))
print
def dumpdev(meandev1, meandev2):
for m1, m2 in zip(meandev1, meandev2):
print mtag(m1, m2)
def windowsfy(fn):
import os
if os.path.exists(fn + '.txt'):
return fn + '.txt'
else:
return fn
print f1n, '->', f2n
f1n = windowsfy(f1n)
f2n = windowsfy(f2n)
(fp1, fn1, fptot1, fntot1, fpmean1, fnmean1,
hamdev1, spamdev1, hamdevall1, spamdevall1) = suck(file(f1n))
(fp2, fn2, fptot2, fntot2, fpmean2, fnmean2,
hamdev2, spamdev2, hamdevall2, spamdevall2) = suck(file(f2n))
print
print "false positive percentages"
dump(fp1, fp2)
print "total unique fp went from", fptot1, "to", fptot2, tag(fptot1, fptot2)
print "mean fp % went from", fpmean1, "to", fpmean2, tag(fpmean1, fpmean2)
print
print "false negative percentages"
dump(fn1, fn2)
print "total unique fn went from", fntot1, "to", fntot2, tag(fntot1, fntot2)
print "mean fn % went from", fnmean1, "to", fnmean2, tag(fnmean1, fnmean2)
print
if len(hamdev1) == len(hamdev2) and len(spamdev1) == len(spamdev2):
print "ham mean ham sdev"
dumpdev(hamdev1, hamdev2)
print
print "ham mean and sdev for all runs"
dumpdev([hamdevall1], [hamdevall2])
print
print "spam mean spam sdev"
dumpdev(spamdev1, spamdev2)
print
print "spam mean and sdev for all runs"
dumpdev([spamdevall1], [spamdevall2])
print
diff1 = spamdevall1[0] - hamdevall1[0]
diff2 = spamdevall2[0] - hamdevall2[0]
print "ham/spam mean difference: %2.2f %2.2f %+2.2f" % (diff1,
diff2,
diff2 - diff1)
else:
print "[info about ham & spam means & sdevs not available in both files]"
syntax highlighted by Code2HTML, v. 0.9.1