# # MailScanner - SMTP E-Mail Virus Scanner # Copyright (C) 2002 Julian Field # # $Id: SweepContent.pm 3698 2006-08-23 09:25:31Z sysjkf $ # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # The author, Julian Field, can be contacted by email at # Jules@JulianField.net # or by paper mail at # Julian Field # Dept of Electronics & Computer Science # University of Southampton # Southampton # SO17 1BJ # United Kingdom # package MailScanner::SweepContent; use strict 'vars'; use strict 'refs'; no strict 'subs'; # Allow bare words for parameter %'s use MIME::Head; use DirHandle; use vars qw($VERSION); ### The package version, both in 1.23 style *and* usable by MakeMaker: $VERSION = substr q$Revision: 3698 $, 10; # Attributes are # # Constructor. sub new { my $type = shift; my $this = {}; bless $this, $type; return $this; } # Do all the message content scanning in here sub ScanBatch { my $batch = shift; my $ScanType = shift; # Insert your own checking here. # In $BaseDir, you will find a directory for each message, which has the # same name as the message id. Also there is a messageid.header file # containing all the headers for the message. # Add entries into %$infections, where they are referenced as # $infections->{"message id"}{"filename"} but please don't over-write ones # that are already there. # If the danger was detected in a header or applies to the whole message # then append the error report (and a newline) to # $infections->{"message id"}{""}. # Return the number of infections/problems you found. # Can play with the MIME headers of a message using $mime. my($id,$message,$ent,$partialcount); my($stripdangerous, $counter, $stripcounter); $counter = 0; $stripcounter = 0; # No. of messages we need to strip HTML from $0 = 'MailScanner: dangerous content scanning'; while(($id, $message) = each %{$batch->{messages}}) { next if $message->{deleted}; $ent = $message->{entity}; # # Do the partial and external checks even if they don't want # dangerous content scanning, as they directly affect our ability # to scan for viruses. # # Search for multipart/partial messages. This is entity-based as # the last part of the message (which is what is split into the # next message) probably won't have a filename. if ((! MailScanner::Config::Value('allowpartial', $message)) && FindPartialMessage($message, $ent)) { $message->{otherreports}{""} .= MailScanner::Config::LanguageValue($message, 'partialmessage') . "\n"; $message->{othertypes}{""} .= "e"; #$message->{otherinfected}++; $counter++; # Force replacement of the last bit of the MIME message #$message->{entityreports}{LastEntity($message, $ent)} .= # "Fragmented messages cannot be reliably scanned\n"; MailScanner::Log::WarnLog('Content Checks: Detected and rejected ' . 'fragmented message section in %s', $id); } # Search for message/external-body messages. This is entity-based # as, almost by definition, we won't have the filename worked out # for the file that is external. if ((! MailScanner::Config::Value('allowexternal', $message)) && FindExternalBody($message, $ent)) { $counter++; MailScanner::Log::WarnLog('Content Checks: Detected and rejected ' . 'external message body in %s', $id); } # Do the remaining checks if any recipient wants dangerous content checking next unless MailScanner::Config::Value('dangerscan', $message) =~ /1/; # Go through the attachments and remove any that are bigger than this # user/domain/site is allowed. my $maxmessagesize = MailScanner::Config::Value('maxmessagesize', $message); if ($maxmessagesize>0 && $message->{size}>$maxmessagesize) { MailScanner::Log::WarnLog("Content Checks: Message %s is bigger than " . "%d bytes", $message->{id}, $maxmessagesize); $message->{otherreports}{""} .= MailScanner::Config::LanguageValue($message, 'toobig') . ": " . $message->{size} . " bytes\n"; $message->{othertypes}{""} .= "s"; $message->{sizeinfected}++; $counter++; } # Check all the files for the attachment-size limit $counter += CheckAttachmentSizes($message, $id); # Search for Microsoft-specific attacks # Disallow both by default. Allow them only if all addresses agree. my $iframevalue = MailScanner::Config::Value('allowiframetags', $message); my $objectvalue = MailScanner::Config::Value('allowobjecttags', $message); my $formvalue = MailScanner::Config::Value('allowformtags', $message); my $scriptvalue = MailScanner::Config::Value('allowscripttags', $message); my $webbugvalue = MailScanner::Config::Value('allowwebbugtags', $message); my $phishingvalue = MailScanner::Config::Value('findphishing', $message); my $allowiframes = 0; my $allowobjects = 0; my $allowforms = 0; my $allowscripts = 0; my $allowwebbugs = 0; my $convertiframes = 0; my $convertobjects = 0; my $convertforms = 0; my $convertscripts = 0; my $convertwebbugs = 0; my $allowphishing = 0; # Allow the tags only if everyone allows them $allowiframes = 1 if $iframevalue =~ /^[1\s]+$/; $allowobjects = 1 if $objectvalue =~ /^[1\s]+$/; $allowforms = 1 if $formvalue =~ /^[1\s]+$/; $allowscripts = 1 if $scriptvalue =~ /^[1\s]+$/; $allowwebbugs = 1 if $webbugvalue =~ /^[1\s]+$/; $allowphishing =1 if $phishingvalue !~ /1/; # Convert the tags if no-one blocks them and someone converts them $convertiframes = 1 if $iframevalue !~ /0/ && $iframevalue =~ /convert/i; $convertobjects = 1 if $objectvalue !~ /0/ && $objectvalue =~ /convert/i; $convertforms = 1 if $formvalue !~ /0/ && $formvalue =~ /convert/i; $convertscripts = 1 if $scriptvalue !~ /0/ && $scriptvalue =~ /convert/i; $convertwebbugs = 1 if $webbugvalue !~ /0/ && $webbugvalue =~ /convert/i; $stripdangerous = MailScanner::Config::Value('stripdangeroustags',$message); #print STDERR "WebBugvalue = $webbugvalue\n"; #print STDERR "Allowforms = $allowforms and convertforms = $convertforms\n"; #print STDERR "Allowphishing = $allowphishing\n"; # Shortcut the check completely if they want to allow everything # and are not converting nasty tags to text if (!($allowiframes && $allowforms && $allowscripts && $allowobjects && $allowphishing && !$stripdangerous) && FindHTMLExploits($message, $id, $ent, $allowiframes, $allowobjects, $allowforms, $allowscripts, $allowwebbugs, $convertiframes, $convertobjects, $convertforms, $convertscripts, $convertwebbugs, $stripdangerous)) { $counter++; MailScanner::Log::WarnLog('Content Checks: Detected HTML-' . 'specific exploits in %s', $id); } # Look for encrypted messages. They can allow some and block some # so we need to find the messages then apply the rules to the result. my $reason; my $encrypted = EncryptionStatus($message, $ent); if ($encrypted && MailScanner::Config::Value('blockencrypted', $message) =~ /1/) { $reason = 'encrypted'; } if (!$encrypted && MailScanner::Config::Value('blockunencrypted', $message) =~ /1/) { $reason = 'unencrypted'; } if ($reason) { $message->{otherreports}{""} .= MailScanner::Config::LanguageValue( $message, $reason) . "\n"; $message->{othertypes}{""} .= "e"; #$message->{otherinfected}++; $counter++; MailScanner::Log::WarnLog('Content Checks: Detected and blocked ' . '%s message in %s', $reason, $id); } # Replace the MIME boundary string, for any multipart/alternative # sections inside multipart/mixed sections, where the outer # boundary is a substring of the inner boundary. Works around bugs # in the Cyrus IMAP server, and some old versions of Eudora. FixSubstringBoundaries($message, $id); # Check for nasty subject lines and quietly fix them FixMaliciousSubjects($message); # Find and save all the public keys (X.509 and PGP) in each message. #ExtractPublicKeys($message, $ent) # if MailScanner::Config::Value('archivepublickeys', $message); # Convert text/html components into text/plain attachments. # Do this if any of the recipients need it done. # This involves forcing the message to rebuild itself from the # MIME structure. We will end up replacing MIME entities in the # message with ones pointing to new files/strings. if (MailScanner::Config::Value('htmltotext', $message) =~ /1/) { $message->{needsstripping} = 1; $stripcounter++; #MailScanner::Log::InfoLog('Content Checks: Detected and will convert ' . # 'HTML message to plain text in %s', $id); #$message->{otherreports}{""} .= "Converted HTML to plain text\n"; #$message->{othertypes}{""} .= "m"; # Modified body, but no infection } } # Print just a summary of the HTML stripping that needs doing MailScanner::Log::InfoLog('Content Checks: Need to convert HTML to plain ' . 'text in %s messages', $stripcounter) if $stripcounter>0; return $counter; } # Remove all trailing space from the subject line and remove any large # blocks of spaces. sub FixMaliciousSubjects { my($message) = @_; my $subject = $message->{subject}; my $newsubject = $subject; $newsubject =~ s/\s{20,}.*\..{1,4}\s*$//; # Delete file extensions at end of filename $newsubject =~ s/\s*$//g; $newsubject =~ s/\s{20,}//g; # If it has changed then force an update #print STDERR "Message metadata is:\n" . join("\n", @{$message->{metadata}}) . "\n"; if ($newsubject ne $subject) { $message->{subjectwasunsafe} = 1; $message->{safesubject} = $newsubject; #$global::MS->{mta}->ReplaceHeader($message, 'Subject:', $newsubject); } else { $message->{subjectwasunsafe} = 0; $message->{safesubject} = $message->{subject}; } } # Check each of the file attachments to make sure they are all within # the acceptable limit. Replace each one that's too big with a warning # message. sub CheckAttachmentSizes { my($message, $id) = @_; my($BaseDir, $basefh, $safename, $maxsize, $attachsize, $tnefname); my($unsafename, $counter, $minsize, $attachentity); # Read the configuration setting, value<=0 implies setting not in use. $maxsize = MailScanner::Config::Value('maxattachmentsize', $message); $minsize = MailScanner::Config::Value('minattachmentsize', $message); return 0 if $maxsize<0 && $minsize<0; $tnefname = $message->{entity2file}{$message->{tnefentity}}; # Get into the directory containing all the attachments $BaseDir = $global::MS->{work}->{dir} . "/$id"; chdir $BaseDir or die "Cannot chdir to $BaseDir for file size checking, $!"; $basefh = new DirHandle; $basefh->open('.') or MailScanner::Log::DieLog("Could not open attachment dir %s, %s", $BaseDir, $!); $counter = 0; while ($safename = $basefh->read()) { next if $safename eq '.' || $safename eq '..'; # "Safe" attachment filename is in $safename, this is what we stat $attachsize = -s "$BaseDir/$safename"; $unsafename = $message->{safefile2file}{$safename} || $tnefname; $attachentity = $message->{file2entity}{$unsafename}; next unless $attachentity; # Only check attachment, not contents of zips next if $safename =~ /^msg[-\d]+\.(txt|html)$/; #print STDERR "\nSafename = $safename\n"; #print STDERR "Attachsize=$attachsize\nMin=$minsize\nMax=$maxsize\n"; if ($maxsize>=0 && $attachsize > $maxsize) { #print STDERR "$safename is too big $attachsize > $maxsize\n"; #print STDERR "Unsafename is $unsafename\n"; MailScanner::Log::NoticeLog("Attachment size check: %s > %s (%s) in %s", $attachsize, $maxsize, $unsafename, $id); $message->{otherreports}{$safename} .= MailScanner::Config::LanguageValue($message,'attachmenttoolarge') . ": $attachsize bytes\n"; $message->{othertypes}{$safename} .= "s"; $counter++; $message->{sizeinfected}++; } if ($minsize>=0 && $attachsize < $minsize) { #print STDERR "Attachment is too small\n"; MailScanner::Log::NoticeLog("Attachment size check: %s < %s (%s) in %s", $attachsize, $minsize, $unsafename, $id); $message->{otherreports}{$safename} .= MailScanner::Config::LanguageValue($message,'attachmenttoosmall') . "\n"; $message->{othertypes}{$safename} .= "s"; $counter++; $message->{sizeinfected}++; } } return $counter; } # Walk the entire tree of a message, looking for any # Content-type: message/partial # headers. # Write an entity report about them so we keep the rest of the message. sub FindPartialMessage { my($message, $entity) = @_; # Track number of dangerous things found my $counter = 0; # Reached a leaf node? return 0 unless $entity && defined($entity->head); # Mark the message as a problem if it's a "message/partial" my $type = $entity->head->mime_attr('content-type'); if ($type && $type =~ /message\/partial/i) { #print STDERR "Found partial message at entity $entity\n"; $message->{otherinfected}++; $counter++; } # Now try the same on all the parts my(@parts, $part); @parts = $entity->parts; foreach $part (@parts) { $counter += FindPartialMessage($message, $part); } return $counter; } # Find the last MIME entity in the tree. This will be the part that # we need to replace if we found a message/partial. # This is no longer used. We did try to just clean out the split # attachments in partial messages, but it isn't feasible to do. Sorry. sub LastEntity { my($message, $entity) = @_; my($NumParts); #print STDERR "Looking for LastEntity of $message\n"; #print STDERR "Skeleton is " . $entity->dump_skeleton() . "\n"; # Is this a nested entity? If so, search the last part of it if ($entity && !$entity->bodyhandle) { # How many parts? $NumParts = $entity->parts; #print STDERR "Message is multipart with $NumParts parts\n"; return LastEntity($message, $entity->parts($NumParts-1)) if $NumParts>0; return $entity; # NumParts == 0 so it's not actually multipart at all } else { # It's not multipart, so I must be at the end return $entity; } } # Look through all the message parts finding text/html entities # that contain Microsoft-specific exploits. sub FindHTMLExploits { my($message, $id, $entity, $allowiframes, $allowobjects, $allowforms, $allowscripts, $allowwebbugs, $convertiframes, $convertobjects, $convertforms, $convertscripts, $convertwebbugs, $stripdangerous) = @_; # Track number of dangerous things found my $counter = 0; # Reached a leaf node? return 0 unless $entity && defined($entity->head); # Look for text/html sections my $type = $entity->head->mime_attr('content-type'); #my $disposition = $entity->head->mime_attr('content-disposition'); #$disposition = 'inline' unless $disposition; if ($type && $type =~ /text\/html/i && #$disposition !~ /attachment/i && defined($entity->bodyhandle) && defined($entity->body) && defined($entity->bodyhandle->path)) { $counter += SearchHTMLBody($message, $id, $entity->bodyhandle->path, $allowiframes, $allowobjects, $allowforms, $allowscripts, $allowwebbugs, $convertiframes, $convertobjects, $convertforms, $convertscripts, $convertwebbugs, $stripdangerous); } # Now try the same on all the parts my(@parts, $part); @parts = $entity->parts; foreach $part (@parts) { $counter += FindHTMLExploits($message, $id, $part, $allowiframes, $allowobjects, $allowforms, $allowscripts, $allowwebbugs, $convertiframes, $convertobjects, $convertforms, $convertscripts, $convertwebbugs, $stripdangerous); } return $counter; } # Search an HTML part of the message body for dangerous HTML # that either uses the