#!/usr/bin/perl
#-----------------------------------------------------------------------------
#
#  POPular -- A POP3 server and proxy for large mail systems
#
#  $Id: pclean,v 1.4 2001/02/09 15:59:07 sqrt Exp $
#
#  http://www.remote.org/jochen/mail/popular/
#
#-----------------------------------------------------------------------------
#
#  pclean
#
#  This script does (nightly) mailbox cleanup, expire of mailboxes, etc.
#
#-----------------------------------------------------------------------------
#
#  Copyright (C) 1999-2001  Jochen Topf <jochen@remote.org>
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
#
#-----------------------------------------------------------------------------

#-----------------------------------------------------------------------------
#
#  TODO
#
#  - master check: file format, ownership, permissions
#  - Quota File should not count towards quota when delivering and checking
#  - expire time vars are a mess
#  - histgram for: mailsize, mailbox size
#
#-----------------------------------------------------------------------------

use strict;
use Getopt::Std;

my %opts;
getopts('Dtescmbnf:', \%opts);

$opts{'D'} = 1 if ($opts{'n'});	# -n implies -D
$opts{'s'} = 1 if ($opts{'m'});	# -m implies -s


#-----------------------------------------------------------------------------
#  START OF OPTIONS
#-----------------------------------------------------------------------------
my $POPDIR  = "/pop";
my $BOXLIST = "/etc/popular/mb";
my $LOGFILE = "/var/log/popular/pclean";

# number of days after which mails in the backup of a deleted mailbox are
# removed
my $EXPIRETIME_BACKUP = 30;

# maximum number of files in one mailbox
my $QUOTA_FILENUM = 1000;

my $WARN_FILENUM = 800;

my $QUOTA_WARNPERCENT = 90;

# hash with expire times in days for each namespace. 
#
my %EXPIRETIME = ("BACKUP",  { 'new' =>  30, 'cur' =>  30 },
		  "example", { 'new' => 360, 'cur' => 120 },
		  "noexp",   { 'new' =>   0, 'cur' =>   0 },
                 );

# Time in seconds after which a mail file in 'tmp' is considered stale and
# will be removed.
my $STALETIME = 60 * 60 * 24;


#-----------------------------------------------------------------------------
#  END OF OPTIONS
#-----------------------------------------------------------------------------

my $NOW = time();
my %EXP;
foreach (keys(%EXPIRETIME)) {
  if ($EXPIRETIME{$_}->{'new'}) {
    $EXP{$_}->{'new'} = $NOW - $EXPIRETIME{$_}->{'new'} * 60 * 60 * 24;
  }
  if ($EXPIRETIME{$_}->{'cur'}) {
    $EXP{$_}->{'cur'} = $NOW - $EXPIRETIME{$_}->{'cur'} * 60 * 60 * 24;
  }
}


my $DEBUG_INDENT_LEVEL = 0;
my $ERROR_COUNT = 0;

my $stat = {};

my $timestamp = timestamp();

chdir("$POPDIR") or die("Can't cd to $POPDIR: $!\n");


my $boxhash = GetBoxHash();
my $dirlist = GetDirList();

foreach my $dir (@$dirlist) {
  if (! -d $dir) {
    Debug("No directory $dir");
    next;
  }
  Debug("Directory $dir {");
  my $mailboxes = ScanDir($dir);
  foreach my $mailbox (@$mailboxes) {
    my $x = "$dir/$mailbox";
    my $mb = new Mailbox $dir, $mailbox, $boxhash->{"$dir/$mailbox"};
    next unless ($mb);
    Debug("Mailbox $mailbox {");
    Debug("dir=$mb->{'dir'} mailbox=$mb->{'mailbox'} ns=$mb->{'namespace'} state=$mb->{'state'} quota=$mb->{'quota'}");
    if ($mb->{'state'} eq "BACKUP") {
      # this mailbox is a backup of a deleted box
      $mb->Expire() if ($opts{'e'});
      $mb->PurgeIfEmpty();
    } elsif ($mb->{'state'} eq "ACTIVE") {
      # this mailbox is active
      $mb->CheckTmp() if ($opts{'t'});
      $mb->Expire() if ($opts{'e'});
      $mb->CheckSize() if ($opts{'s'});
      $mb->QuotaMail() if ($opts{'m'});
      $mb->CheckAll() if ($opts{'c'});
    } elsif ($mb->{'state'} eq "OLD") {
      # this mailbox is deleted
      $mb->DeleteMailbox() if ($opts{'b'});
    } else {
      # should never be here
      Error("Unknown mailbox state. Should never be here.");
      exit(1);
    }
    $mb->UpdateStat();
    Debug("}");
  }
  Debug("}");
}

Mailbox::PrintStat();

Debug("There were $ERROR_COUNT errors!") if ($ERROR_COUNT);

exit($ERROR_COUNT > 0);


#-----------------------------------------------------------------------------
#
#  Usage()
#
#  Print command line help
#
#-----------------------------------------------------------------------------
sub Usage {
  print <<"EOF";
pclean [OPTIONS]

  -D		Print debug messages (Be careful! This is really verbose)
  -t		Check tmp directories for old files
  -e		Expire mailbox contents
  -s		Check and report size of all mailboxes
  -c		Check names, ownership, and permissions of all dirs/files
  -m		Put mail in mailboxes that are over quota (implies -c)
  -b		Backup deleted mailboxes
  -f <file>	Read options from this file
  -n		Don't do anything, just print what you would do (implies -D)

EOF
}

#-----------------------------------------------------------------------------
sub timestamp {
  my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time);

  return sprintf("%04d%02d%02d.%02d%02d%02d",
			$year+1900, $mon+1, $mday, $hour, $min, $sec);
}


#-----------------------------------------------------------------------------
#
#  Debug(), Warn(), Error()
#
#  Output functions.
#  
#-----------------------------------------------------------------------------
sub Debug {
  my ($text) = @_;

  return unless ($opts{'D'});
  $DEBUG_INDENT_LEVEL-- if ($text =~ /\}/);
  print "      ", "  " x $DEBUG_INDENT_LEVEL, "$text\n";
  $DEBUG_INDENT_LEVEL++ if ($text =~ /\{/);
}


sub Warn {
  my ($text) = @_;

  print "WARN  ", "  " x $DEBUG_INDENT_LEVEL, "$text\n";
}


sub Error {
  my ($text) = @_;

  print "ERROR ", "  " x $DEBUG_INDENT_LEVEL, "$text\n";
  $ERROR_COUNT++;
}


#-----------------------------------------------------------------------------
#
#  $files = ScanDir($dir)
#
#  Scans the given directory and returns reference to an array of file names.
#  Names starting with a dot are not included in the output.
#
#-----------------------------------------------------------------------------
sub ScanDir {
  my ($dir) = @_;

  if (! opendir(DIR, $dir)) {
    Error("Can't opendir '$dir': $!");
    return [];
  }
  my @files = grep(!/^\./, readdir(DIR));
  closedir(DIR);
  return \@files;
}


#-----------------------------------------------------------------------------
#
#  GetBoxHash
#
#  This function returns a reference to a hash containing a mapping between
#  mailbox name and quota for this mailbox. This hash can be huge, if you
#  have lots of mailboxes.
#
#-----------------------------------------------------------------------------
sub GetBoxHash {
  my %boxhash;

  foreach my $d (0..9, 'a', 'b', 'c', 'd', 'e', 'f') {
    open(LIST, "$BOXLIST/$d") or die("Can't open $BOXLIST/$d: $!\n");
    while(<LIST>) {
      next if (/^$/);
      next if (/^#/);
      chomp;
      my ($box, $quota) = split(/:/, $_, 2);
      $boxhash{$box} = $quota;
    }
    close LIST;
  }
  return \%boxhash;
}


#-----------------------------------------------------------------------------
#
#  GetDirList
#
#  This function returns a reference to an array containing all the
#  directories where mailboxes can be stored. All directories must be relative
#  to $POPDIR. You can return directories that don't exist, they will be
#  silently skipped.
#
#  You will have to change this function, if your mailbox directory layout
#  is different from the default layout.
#
#-----------------------------------------------------------------------------
sub GetDirList {
  my @dirs;

  foreach my $d (0x0..0xf) {
    foreach my $n (0x00..0xff) {
      my $dir = sprintf("%x/%02x", $d, $n);
      push(@dirs, $dir);
    }
  }

  return \@dirs;
}


#-----------------------------------------------------------------------------
#
#  GetSize()
#
#  Gets size of a mailbox file. If the mailbox name is in the right format,
#  the size is taken from the name, otherwise a stat() is performed.
#
#-----------------------------------------------------------------------------
sub GetSize {
  my ($filename) = @_;

  if ($filename =~ /_(\d+)$/) {
    return($1);
  } else {
    my ($dev,$ino,$mode,$nlink,$uid,$gid,$rdev,$size,$atime,$mtime,
	$ctime,$blksize,$blocks) = stat($filename);
    return 0 unless (defined $dev);
    return $size;
  }
}


#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
#
#  Mailbox Package
#
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
package Mailbox;

sub new {
  my ($class, $dir, $mailbox, $quota) = @_;

  my($self) = {
    'dir'      => $dir,
    'filename' => $mailbox,
    'path'     => "$dir/$mailbox",
    'quota'    => $quota * 1024 * 1024,
    'size'     => { 'new' => 0, 'cur' => 0 },
    'count'    => { 'new' => undef, 'cur' => undef },
    'mails'    => { 'new' => undef, 'cur' => undef, 'tmp' => undef },
  };
  
  if ($mailbox =~ /^([-a-z0-9._]+)=([-a-z0-9._]+)_(\d{8}\.\d{6})$/) {
    $self->{'mailbox'}   = $1;
    $self->{'namespace'} = $2;
    $self->{'deltime'}   = $3;
    $self->{'state'}     = "BACKUP";
    $self->{'expire'}    = $EXPIRETIME{'BACKUP'};
  } elsif ($mailbox =~ /^([-a-z0-9._]+)=([-a-z0-9._]+)$/) {
    $self->{'mailbox'}   = $1;
    $self->{'namespace'} = $2;
    $self->{'expire'}    = $EXPIRETIME{$self->{'namespace'}};
    if ($quota) {
      $self->{'state'}   = "ACTIVE";
    } else {
      $self->{'state'}   = "OLD";
    }
  } else {
    ::Error("Wrong format for mailbox name: '$dir/$mailbox'");
    return undef;
  }

  bless($self, (ref($class) || $class));
}


#-----------------------------------------------------------------------------
#
#  $mailbox->ListMails($dir);
#
#  Returns a reference to a list of all mails in the named subdirectory
#  ('new', 'cur' or 'tmp'). The first time this function is called for a
#  given subdirectory the directory is scanned and the content cached. Any
#  subsequent call to this function will return the cached info.
#
#-----------------------------------------------------------------------------
sub ListMails {
  my ($self, $dir) = @_;

  if (! $self->{'mails'}->{$dir}) {
    $self->{'mails'}->{$dir} =
	::ScanDir("$self->{'path'}/$dir") unless ($self->{$dir});
  }

  return $self->{'mails'}->{$dir};
}


#-----------------------------------------------------------------------------
#
#  $mailbox->CountMails($dir);
#
#  Returns the number of mails in the subdirectory $dir ('new', 'cur' or
#  'tmp'). The first time this function is called, it will actually look
#  at the directories and cache the results in case this function is called
#  again.
#
#-----------------------------------------------------------------------------
sub CountMails {
  my ($self, $dir) = @_;

  if (! defined $self->{'count'}->{$dir}) {
    $self->{'count'}->{$dir} = $#{$self->ListMails($dir)} + 1;
  }

  return $self->{'count'}->{$dir};
}


#-----------------------------------------------------------------------------
#
#  $mailbox->CheckAll();
#
#-----------------------------------------------------------------------------
sub CheckAll {
  my $self = shift;
}


#-----------------------------------------------------------------------------
#
#  $mailbox->CheckTmp();
#
#  This function checks the tmp directory of this mailbox for stale files.
#  Anything older than $STALETIME seconds is deleted.
#
#-----------------------------------------------------------------------------
sub CheckTmp {
  my $self = shift;

  ::Debug("Checking tmp directory {");
  foreach my $file (@{$self->ListMails('tmp')}) {
    if ($file =~ /^([0-9]*)\./) {
      if ($NOW - $1 > $STALETIME) {
        ::Debug("File '$file' is stale. Deleting...");
        next if ($opts{'n'});
        if (! unlink("$self->{'path'}/tmp/$file")) {
          ::Error("Can't unlink file: $!");
        }
      } else {
        ::Debug("File '$file' is ok.");
      }
    } else {
      ::Error("Name of file '$file' has wrong format.");
    }
  }
  ::Debug("}");
}


#-----------------------------------------------------------------------------
#
#  $mailbox->DeleteMailbox();
#
#  Deletes a mailbox by renaming the mailbox directory to another name.
#
#-----------------------------------------------------------------------------
sub DeleteMailbox {
  my $self = shift;

  ::Debug("Deleting mailbox... (Backup in '$self->{'path'}_$timestamp')");

  return if ($opts{'n'});

  # We have to count the number of mails in the mailbox in case we need it
  # later. After the rename we won't find the mailbox anymore...
  $self->CountMails('new');
  $self->CountMails('cur');

  rename($self->{'path'}, "$self->{'path'}_$timestamp") or
    ::Error("Can't rename '$self->{'path'}' to '$self->{'path'}_$timestamp': $!");
}


#-----------------------------------------------------------------------------
#
#  $mailbo->CheckSize()
#
#  Checks the size of the mailbox, by counting the mails in the 'new' and
#  'cur' directories, getting their sizes ans summing it all up. Compares
#  this to the quota defined for this mailbox.
#
#-----------------------------------------------------------------------------
sub CheckSize {
  my $self = shift;

  foreach ('new', 'cur') {
    foreach my $file (@{$self->ListMails($_)}) {
      $self->{'size'}->{$_} += ::GetSize("$self->{'path'}/$_/$file");
    }
  }

  my $ssum = $self->{'size'}->{'new'} + $self->{'size'}->{'cur'};
  my $csum = $self->CountMails('new') + $self->CountMails('cur');

  if ($ssum >= $self->{'quota'}) {
    $self->{'sizeflag'} = 'S';
  } elsif ($ssum >= $self->{'quota'} * $QUOTA_WARNPERCENT / 100) {
    $self->{'sizeflag'} = 's';
  } else {
    $self->{'sizeflag'} = ' ';
  }

  if ($csum >= $QUOTA_FILENUM) {
    $self->{'countflag'} = 'N';
  } elsif ($csum >= $WARN_FILENUM) {
    $self->{'countflag'} = 'n';
  } else {
    $self->{'countflag'} = ' ';
  }

  ::Debug(sprintf("CheckSize: %s%s new=%d/%d cur=%d/%d sum=%d/%d", $self->{'sizeflag'}, $self->{'countflag'}, $self->{'size'}->{'new'}, $self->CountMails('new'), $self->{'size'}->{'cur'}, $self->CountMails('cur'), $ssum, $csum));
}


#-----------------------------------------------------------------------------
#
#  $mailbox->QuotaMail()
#
#  Puts a warning mail into mailboxes, which are over or near the quota limits.
#
#-----------------------------------------------------------------------------
sub QuotaMail {
  my $self = shift;
}


#-----------------------------------------------------------------------------
#
#  $mailbox->PurgeIfEmpty();
#
#  This will delete a mailbox if it is empty.
#
#-----------------------------------------------------------------------------
sub PurgeIfEmpty {
  my $self = shift;

  return if ($#{$self->ListMails('cur')} + $#{$self->ListMails('new')} > 0);
  ::Debug("Purging mailbox because it is empty");
  if (! $opts{'n'}) {
# XXX return code
#    rmdir("$self->{'path'}/new");
#    rmdir("$self->{'path'}/cur");
#    rmdir("$self->{'path'}/tmp");
#    rmdir("$self->{'path'}");
  }
}


#-----------------------------------------------------------------------------
#
#  $mailbox->ExpireMailbox();
#
#  Expires content of the given mailbox. Unread mails older then $newtime
#  are deleted and read mails older then $curtime are deleted. Both times
#  can be 0, in which case no mails will be expired.
#
#-----------------------------------------------------------------------------
sub ExpireMailbox {
  my $self = shift;

  my $exnew = $self->{'expire'}->{'new'};
  my $excur = $self->{'expire'}->{'cur'};

  foreach my $d ("cur", "new") {
    next unless ($self->{'expire'}->{$d});
    ::Debug("Expiring $d... {");
    foreach my $file (@{$self->ListMails($d)}) {
      if ($file =~ /^([0-9]+)\./ && $EXP{$self->{'namespace'}}->{$d} < $1) {
        ::Debug("loeschen");
# XXX
      }
    }
    ::Debug("}");
  }
}


#-----------------------------------------------------------------------------
#
#  $mailbox->UpdateStat();
#
#  Update statistics.
#
#-----------------------------------------------------------------------------
sub UpdateStat {
  my $self = shift;

  my $ns = $self->{'namespace'};
  foreach ('new', 'cur') {
    $stat->{$ns}->{'mails'}->{$_} += $self->CountMails($_);
    $stat->{$ns}->{'size'}->{$_}  += $self->{'size'}->{$_};
  }
  $stat->{$ns}->{'sumquota'}    += $self->{'quota'} / 1024 / 1024;
  $stat->{$ns}->{'overquota'}++ if (($self->{'countflag'} eq 'N') ||
				    ($self->{'sizeflag'} eq 'S'));
  $stat->{$ns}->{'active'}++    if ($self->{'state'} eq 'ACTIVE');
  $stat->{$ns}->{'old'}++       if ($self->{'state'} eq 'OLD');
  $stat->{$ns}->{'backup'}++    if ($self->{'state'} eq 'BACKUP');
}


#-----------------------------------------------------------------------------
#
#  Mailbox::PrintStat();
#
#  Print statistics.
#
#-----------------------------------------------------------------------------
sub PrintStat {
  print "------------------------------------------------------------------------------\n";
  print "Statistics:\n";
  foreach my $ns ('surf1') {		# XXX ns
    print "------------------------------------------------------------------------------\n";
    print "  Namespace: $ns\n\n";
    printf "  Mailboxes (ACTIVE): %8d\n", $stat->{$ns}->{'active'};
    printf "  Mailboxes (BACKUP): %8d\n", $stat->{$ns}->{'backup'};
    printf "  Mailboxes (OLD)   : %8d\n", $stat->{$ns}->{'old'};
    printf "  Mailboxes (Total) : %8d\n\n", ($stat->{$ns}->{'active'} + $stat->{$ns}->{'backup'} + $stat->{$ns}->{'old'});
    print "  Total quota of of all mailboxes: $stat->{$ns}->{'sumquota'} MBytes\n\n";
    print "  Number of mailboxes above quota: $stat->{$ns}->{'overquota'}\n\n" if (defined $stat->{$ns}->{'overquota'});
    printf "  Number of mails (new) : %12d\n", $stat->{$ns}->{'mails'}->{'new'};
    printf "  Number of mails (read): %12d\n", $stat->{$ns}->{'mails'}->{'cur'};
    printf "  Size of all mails (new) : %12d\n", $stat->{$ns}->{'size'}->{'new'} if ($stat->{$ns}->{'size'}->{'new'});
    printf "  Size of all mails (read): %12d\n", $stat->{$ns}->{'size'}->{'cur'} if ($stat->{$ns}->{'size'}->{'cur'});
    print "------------------------------------------------------------------------------\n";
  }
}


#-- THE END ------------------------------------------------------------------


syntax highlighted by Code2HTML, v. 0.9.1