#! /usr/local/bin/python2.3
# This script has a similar interface and purpose to sb_filter, but avoids
# re-initialising spambayes for consecutive requests using a short-lived
# server process. This is intended to give the performance advantages of
# sb_xmlrpcserver, without the administrative complications.
#
# The strategy is:
#
# * while we cant connect to a unix domain socket
# * fork a separate process that runs in the background
# * in the child process:
# * exec sb_bnserver. it listens on that same unix domain socket.
# * in the parent process:
# * sleep a little, to give the child chance to start up
# * write the filtering/training command line options to the socket
# * copy the content of stdin to the socket
# * meanwhile..... sb_bnserver gets to work on that data in the same manner
# as sb_filter. it writes its response back through that socket
# * read a line from the socket containing a success/failure code
# * read a line from the socket containing a byte count
# * copy the remainder of the content of the socket to stdout or stderr,
# depending on whether it reported success or failure.
# * if the number of bytes read from the socket is different to the byte
# count, exit with an error
# * if the reported exit code is non-zero, exit with an error
#
# sb_bnfilter will only terminate with a zero exit code if everything
# is ok. If it terminates with a non-zero exit code then its stdout should
# be ignored.
#
# sb_bnserver will close itself and remove its socket after a period of
# inactivity to ensure it does not use up resources indefinitely.
#
# Author: Toby Dickenson
#
"""Usage: %(program)s [options]
Where:
-h
show usage and exit
* -f
filter (default if no processing options are given)
* -g
[EXPERIMENTAL] (re)train as a good (ham) message
* -s
[EXPERIMENTAL] (re)train as a bad (spam) message
* -t
[EXPERIMENTAL] filter and train based on the result -- you must
make sure to untrain all mistakes later. Not recommended.
* -G
[EXPERIMENTAL] untrain ham (only use if you've already trained
this message)
* -S
[EXPERIMENTAL] untrain spam (only use if you've already trained
this message)
-k FILE
Unix domain socket used to communicate with a short-lived server
process. Default is ~/.sbbnsock-<hostname>
These options will not take effect when connecting to a preloaded server:
-p FILE
use pickle FILE as the persistent store. loads data from this file
if it exists, and saves data to this file at the end.
-d FILE
use DBM store FILE as the persistent store.
-o section:option:value
set [section, option] in the options database to value
-a seconds
timeout in seconds between requests before this server terminates
-A number
terminate this server after this many requests
"""
import sys, getopt, socket, errno, os, time
def usage(code, msg=''):
"""Print usage message and sys.exit(code)."""
if msg:
print >> sys.stderr, msg
print >> sys.stderr
print >> sys.stderr, __doc__
sys.exit(code)
def main():
try:
opts, args = getopt.getopt(sys.argv[1:], 'hfgstGSd:p:o:a:A:k:')
except getopt.error, msg:
usage(2, msg)
# build the default socket filename from environment variables
filename = os.path.expanduser('~/.sbbnsock-'+socket.gethostname())
action_options = []
server_options = []
for opt, arg in opts:
if opt == '-h':
usage(0)
elif opt in ('-f', '-g', '-s', '-t', '-G', '-S'):
action_options.append(opt)
elif opt in ('-d', '-p', '-o', '-a', '-A'):
server_options.append(opt)
server_options.append(arg)
elif opt == '-k':
filename = arg
if len(args) != 0:
usage(2)
server_options.append(filename)
s = make_socket(server_options, filename)
# We have a connection to the existing shared server
w_file = s.makefile('w')
r_file = s.makefile('r')
# pass our command line on the first line into the socket
w_file.write(' '.join(action_options)+'\n')
# copy entire contents of stdin into the socket
while 1:
b = sys.stdin.read(1024*64)
if not b:
break
w_file.write(b)
w_file.flush()
w_file.close()
s.shutdown(1)
# expect to get back a line containing the size of the rest of the response
error = int(r_file.readline())
expected_size = int(r_file.readline())
if error:
output = sys.stderr
else:
output = sys.stdout
total_size = 0
# copy entire contents of socket into stdout or stderr
while 1:
b = r_file.read(1024*64)
if not b:
break
output.write(b)
total_size += len(b)
output.flush()
# If we didnt receive the right amount then something has gone wrong.
# exit now, and procmail will ignore everything we have sent to stdout.
# Note that this policy is different to the xmlrpc client, which
# tries to handle errors internally by constructing a stdout that is
# the same as stdin was.
if total_size != expected_size:
print >> sys.stderr, 'size mismatch %d != %d' % (total_size,
expected_size)
sys.exit(3)
if error:
sys.exit(error)
def make_socket(server_options, file):
refused_count = 0
no_server_count = 0
while 1:
try:
s = socket.socket(socket.AF_UNIX,socket.SOCK_STREAM)
s.connect(file)
except socket.error,e:
if e[0] == errno.EAGAIN:
# baaah
pass
elif e[0] == errno.ENOENT or not os.path.exists(file):
# We need to check os.path.exists for use on operating systems that
# never return ENOENT; linux 2.2.
#
# no such file.... no such server. create one.
no_server_count += 1
if no_server_count>4:
raise
fork_server(server_options)
elif e[0] == errno.ECONNREFUSED:
# socket file exists but noone listening.
refused_count += 1
if refused_count == 6:
# We have been waiting ages and still havent been able
# to connect. Maybe that socket file has got
# orphaned. remove it, wait, and try again. We need to
# allow enough time for sb_bnserver to initialise the
# rest of spambayes
try:
os.unlink(file)
except EnvironmentError:
pass
elif refused_count>6:
raise
else:
raise # some other problem
time.sleep(0.2 * 2.0**no_server_count * 2.0**refused_count)
else:
return s
def fork_server(options):
if os.fork():
# parent
return
os.close(0)
sys.stdin = sys.__stdin__ = open("/dev/null")
os.close(1)
sys.stdout = sys.__stdout__ = open("/dev/null", "w")
# leave stderr
# os.close(2)
# sys.stderr = sys.__stderr__ = open("/dev/null", "w")
os.setsid()
# Use exec rather than import here because eventually it may be nice to
# reimplement this one file in C
os.execv(sys.executable,[sys.executable,
os.path.join(os.path.split(sys.argv[0])[0],
'sb_bnserver.py') ]+options)
# should never get here
sys._exit(1)
if __name__ == "__main__":
main()
syntax highlighted by Code2HTML, v. 0.9.1