# <@LICENSE>
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to you under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# </@LICENSE>
=head1 NAME
Mail::SpamAssassin::Plugin::AutoLearnThreshold - threshold-based discriminator for Bayes auto-learning
=head1 SYNOPSIS
loadplugin Mail::SpamAssassin::Plugin::AutoLearnThreshold
=head1 DESCRIPTION
This plugin implements the threshold-based auto-learning discriminator
for SpamAssassin's Bayes subsystem. Auto-learning is a mechanism
whereby high-scoring mails (or low-scoring mails, for non-spam) are fed
into its learning systems without user intervention, during scanning.
Note that certain tests are ignored when determining whether a message
should be trained upon:
=over 4
=item * rules with tflags set to 'learn' (the Bayesian rules)
=item * rules with tflags set to 'userconf' (user configuration)
=item * rules with tflags set to 'noautolearn'
=back
Also note that auto-learning occurs using scores from either scoreset 0
or 1, depending on what scoreset is used during message check. It is
likely that the message check and auto-learn scores will be different.
=cut
package Mail::SpamAssassin::Plugin::AutoLearnThreshold;
use Mail::SpamAssassin::Plugin;
use Mail::SpamAssassin::Logger;
use strict;
use warnings;
use bytes;
use vars qw(@ISA);
@ISA = qw(Mail::SpamAssassin::Plugin);
sub new {
my $class = shift;
my $mailsaobject = shift;
$class = ref($class) || $class;
my $self = $class->SUPER::new($mailsaobject);
bless ($self, $class);
$self->set_config($mailsaobject->{conf});
return $self;
}
sub set_config {
my($self, $conf) = @_;
my @cmds = ();
=head1 USER OPTIONS
The following configuration settings are used to control auto-learning:
=over 4
=item bayes_auto_learn_threshold_nonspam n.nn (default: 0.1)
The score threshold below which a mail has to score, to be fed into
SpamAssassin's learning systems automatically as a non-spam message.
=cut
push (@cmds, {
setting => 'bayes_auto_learn_threshold_nonspam',
default => 0.1,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
});
=item bayes_auto_learn_threshold_spam n.nn (default: 12.0)
The score threshold above which a mail has to score, to be fed into
SpamAssassin's learning systems automatically as a spam message.
Note: SpamAssassin requires at least 3 points from the header, and 3
points from the body to auto-learn as spam. Therefore, the minimum
working value for this option is 6.
=cut
push (@cmds, {
setting => 'bayes_auto_learn_threshold_spam',
default => 12.0,
type => $Mail::SpamAssassin::Conf::CONF_TYPE_NUMERIC
});
$conf->{parser}->register_commands(\@cmds);
}
sub autolearn_discriminator {
my ($self, $params) = @_;
my $scan = $params->{permsgstatus};
my $conf = $scan->{conf};
# Figure out min/max for autolearning.
# Default to specified auto_learn_threshold settings
my $min = $conf->{bayes_auto_learn_threshold_nonspam};
my $max = $conf->{bayes_auto_learn_threshold_spam};
# Find out what score we should consider this message to have ...
my $score = $scan->get_autolearn_points();
my $body_only_points = $scan->get_body_only_points();
my $head_only_points = $scan->get_head_only_points();
my $learned_points = $scan->get_learned_points();
dbg("learn: auto-learn? ham=$min, spam=$max, ".
"body-points=".$body_only_points.", ".
"head-points=".$head_only_points.", ".
"learned-points=".$learned_points);
my $isspam;
if ($score < $min) {
$isspam = 0;
} elsif ($score >= $max) {
$isspam = 1;
} else {
dbg("learn: auto-learn? no: inside auto-learn thresholds, not considered ham or spam");
return;
}
my $learner_said_ham_points = -1.0;
my $learner_said_spam_points = 1.0;
if ($isspam) {
my $required_body_points = 3;
my $required_head_points = 3;
if ($body_only_points < $required_body_points) {
dbg("learn: auto-learn? no: scored as spam but too few body points (".
$body_only_points." < ".$required_body_points.")");
return;
}
if ($head_only_points < $required_head_points) {
dbg("learn: auto-learn? no: scored as spam but too few head points (".
$head_only_points." < ".$required_head_points.")");
return;
}
if ($learned_points < $learner_said_ham_points) {
dbg("learn: auto-learn? no: scored as spam but learner indicated ham (".
$learned_points." < ".$learner_said_ham_points.")");
return;
}
if (!$scan->is_spam()) {
dbg("learn: auto-learn? no: scored as ham but autolearn wanted spam");
return;
}
} else {
if ($learned_points > $learner_said_spam_points) {
dbg("learn: auto-learn? no: scored as ham but learner indicated spam (".
$learned_points." > ".$learner_said_spam_points.")");
return;
}
if ($scan->is_spam()) {
dbg("learn: auto-learn? no: scored as spam but autolearn wanted ham");
return;
}
}
dbg("learn: auto-learn? yes, ".($isspam?"spam ($score > $max)":"ham ($score < $min)"));
return $isspam;
}
1;
=back
=cut
syntax highlighted by Code2HTML, v. 0.9.1