#!/usr/bin/perl

use strict;
use vars qw { $USER %CONFIG $SPAM_CORPUS $NONSPAM_CORPUS };

$CONFIG{'SHOW_SUBJECTS'} = 1;
$CONFIG{'DSPAM_BINARY'}  = '@bindir@/@dspam_transformed@'; 
$CONFIG{'BINDIR'}        = '@bindir@';

### DO NOT CONFIGURE ANYTHING BELOW THIS LINE

$USER           = shift;
$SPAM_CORPUS    = shift;
$NONSPAM_CORPUS = shift;

if ($NONSPAM_CORPUS eq "") {
  usage();
}

sub usage {
    print STDERR "Usage: $0 [username] [[-i index]|[spam_dir] [nonspam_dir]]\n";
    exit(-1);
}

if ($SPAM_CORPUS != "-i" && (! -d $SPAM_CORPUS || ! -d $NONSPAM_CORPUS)) {
    print STDERR "ERROR: " . ((-d $SPAM_CORPUS) ? "nonspam" : "spam" ) . "corpus must be path to maildir directory\n";
    usage(); 
}

print "Taking Snapshot...\n";
system("$CONFIG{'BINDIR'}/dspam_stats -r $USER");

&Train("$NONSPAM_CORPUS", "$SPAM_CORPUS");

sub Train {
    my($nonspam, $spam) = @_;
    my(@nonspam_corpus, @spam_corpus);
    my($msg);

    print "Training $nonspam / $spam corpora...\n";

    # Train on index
    if ($SPAM_CORPUS eq "-i") {
      open(INDEX, "<$NONSPAM_CORPUS");
      while(<INDEX>) {
        chomp;
        my($class, $filename) = split(/\s+/);
        if ($class eq "ham" || $class eq "nonspam") {
            TestNonspam(".", $filename);
        } elsif ($class eq "spam") {
            TestSpam(".", $filename);
        } else {
            die "ERROR: Unknown class '$class'. Test Broken.";
        }
      }
      return FinishTraining();
    }

    @nonspam_corpus = GetFiles($nonspam);
    @spam_corpus = GetFiles($spam); 

    while($#nonspam_corpus > -1 || $#spam_corpus > -1) {
        if ($#nonspam_corpus > -1) {
            my($count) = 0;

            # Process nonspam until balanced
            $msg = shift(@nonspam_corpus);
            TestNonspam($nonspam, $msg);
            if ($#spam_corpus > -1) {
                $count = ($#nonspam_corpus+1) / ($#spam_corpus+1);
            }
            for(1..$count-1)
            { 
                $msg = shift(@nonspam_corpus);
                TestNonspam($nonspam, $msg);
            }
        }

        if ($#spam_corpus > -1) {
            my($count) = 0;
            # Process spam until balanced
            $msg = shift(@spam_corpus);
            TestSpam($spam, $msg);
            if ($#nonspam_corpus > -1) {
                $count = ($#spam_corpus+1) / ($#nonspam_corpus+1);
            }
            for(1..$count-1)

            {
                $msg = shift(@spam_corpus);
                TestSpam($spam, $msg);
            }
        }
    }

    FinishTraining();

}

sub FinishTraining() {
    print "TRAINING COMPLETE\n";
    print "\nTraining Snapshot:\n";
    system("$CONFIG{'BINDIR'}/dspam_stats -S -s $USER");
    print "\nOverall Statistics:\n";
    system("$CONFIG{'BINDIR'}/dspam_stats -S $USER");
}

sub GetFiles {
  my($corpus) = @_;
  my(@files);

  opendir(DIR, "$corpus") || die "$corpus: $!";
  @files = grep(!/^\.\.?$/, readdir(DIR));
  closedir(DIR);
  return @files;
} 

sub TestNonspam {
    my($code, $cmd, $response);
    my($dir, $msg) = @_;
    print "[test: nonspam] " . substr($msg . " " x 32, 0, 32) .  " result: ";
    $cmd = "$CONFIG{'DSPAM_BINARY'} --user $USER --deliver=summary < $dir/$msg";
    $response = `$cmd`;

    $code = "UNKNOWN";
    if ($response =~ /class="(\S+)"/i) {
        $code = $1;
    }
    if ($code eq "UNKNOWN") {
        print "\n===== WOAH THERE =====\n";
        print "I was unable to parse the result. Test Broken.\n";
        print "======================\n";
        exit(0);
    }

    if ($code eq "Innocent" || $code eq "Whitelisted") {
        print "PASS";
    } else {
        my($class) = "UNKNOWN";
        my($signature) = "UNKNOWN";
        if ($response =~ /class="(\S+)"/i) {
            $class = $1;
        }

        if ($response =~ /signature=(\S+)/i) {
            $signature = $1;
        } else {
            print "\n===== WOAH THERE =====\n";
            print "I was unable to find the DSPAM signature. Test Broken.\n";
            print "======================\n";
            print "\n$response\n";
            exit(0);
        }

        print "FAIL ($class)";
        if ($CONFIG{'SHOW_SUBJECTS'} == 1) {
            print "\n\t[fp] ";
            open(FILE, "<$dir/$msg");
            while(<FILE>) {
                if (/^Subject:/i) {
                    chomp;
                    print $_;
                    close(FILE);
                }
            }
            close(FILE);
        }
        open(TRAIN, "|$CONFIG{'DSPAM_BINARY'} --user $USER --class=innocent " .
                    "--source=error --signature=$signature");
        close(TRAIN);
    }
    print "\n";
    return;
}

sub TestSpam {
    my($code, $cmd, $response);
    my($dir, $msg) = @_;

    print "[test: spam   ] " . substr($msg . " " x 32, 0, 32) . " result: ";
    $cmd = "$CONFIG{'DSPAM_BINARY'} --user $USER --deliver=summary < $dir/$msg";
    $response = `$cmd`;

    $code = "UNKNOWN";
    if ($response =~ /class="(\S+)"/i) {
        $code = $1;
    }
    if ($code eq "UNKNOWN") {
        print "\n===== WOAH THERE =====\n";
        print "I was unable to parse the result. Test Broken.\n";
        print "======================\n";
        exit(0);
    }

    if ($code eq "Spam") {
        print "PASS";
    } else {
        my($class) = "UNKNOWN";
        my($signature) = "UNKNOWN";
        if ($response =~ /class="(\S+)"/i) {
            $class = $1;
        }

        if ($response =~ /signature=(\S+)/i) {
            $signature = $1;
        } else {
            print "\n===== WOAH THERE =====\n";
            print "I was unable to find the DSPAM signature. Test Broken.\n";
            print "======================\n";
            print "\n$response\n";
            exit(0);
        }
        print "FAIL ($class)";

        if ($CONFIG{'SHOW_SUBJECTS'} == 1) {
            print "\n\t[fn] ";
            open(FILE, "<$dir/$msg");
            while(<FILE>) {
                if (/^Subject:/i) {
                    chomp;
                    print $_;
                    close(FILE);
                }
            }
            close(FILE);
        }
        open(TRAIN, "|$CONFIG{'DSPAM_BINARY'} --user $USER --class=spam ".
                    "--source=error --signature=$signature");
        close(TRAIN);
    }
    print "\n";
    return;
}


syntax highlighted by Code2HTML, v. 0.9.1