#!/usr/local/bin/perl # Program : NAGS Spam Filter # Author : Ian Leicht (ian@nags.org) # Copyright.......: Copyright (c) 1996, 1997 by Ian Leicht $version="3.B2"; # Description : Rejects email from a list of predefined sites & criteria. Rejected email # is returned to the sender with instructions for getting the mail through the filter. # Documentation : http://www.nags.org/spamfilter.html # Requires : nags_config.pl, spammers, exceptions, junkmailers # Usage : Create a .forward file that consists of: |/path/nags/nags_filter.pl # # I've spent *A LOT* of time working on this project. If you have it in your heart/wallet # to send me a donation, drop me a line (ian@nags.org) and I will give you instructions. # # Revision History: # v3.B2 (10-30-97): Script wasn't checking to: addresses! # v3.B1 (10-22-97): Include new domains # v3.B (09-25-97): Support for multiple okay subjects # -itsokay command line override added # v3.A4 (09-11-97): Check for empty Message-ID - Thanks Mark Baushke # v3.A3 (09-09-97): Fixed numeric checks to not tag xyx123@abc.com # v3.A2 (09-08-97): Check reply-to for spam # v3.A1 (09-04-97): Check for 123432@abc.com # v3.A (09-02-97): &forward_mail_message => &send_via_smtp # Filter looks for empty To: addresses - Thanks to John Deters # Support for mail forwarding (all or just okay mail) # Fixed up headers in rejection letter # Removed all send to postmaster code... # v3.92 (08-29-97): If you didn't set $your_email it won't think all mail is from you anymore.. # v3.91 (08-28-97): Fixed bug in by: line processing # v3.9 (08-27-97): Support for spam_subjects # Moved home directory code from nags_config.pl to nags_filter.pl # Added explanations for exceptions # v3.82 (08-26-97): Cleaned up code and modified rejection letter text # v3.81 (08-24-97): Detect new stealth way of mailing (no From:) - Thanks Thomas Lostaunau # v3.8 (08-03-97): To_exceptions list added # v3.73 (07-21-97): Disable Message-ID / received-by checking # v3.72 (07-07-97): Message-ID "RAM" now case insensitive # v3.71 (06-29-97): Parse another type of received-by line correctly # v3.7 (06-28-97): If multiple names in To: don't parse it # Receied by headers like: sherrie [166.70.3.83] are accepted # Received by algorithm uses PERL case statements # v3.61 (06-26-97): Remove trailing ;s from DNS addresses # v3.6 (06-25-97): Mail from you is ignored # v3.5 (06-24-97): Look for Received-By headers after Message-ID # v3.49 (06-24-97): X-Shocking-Web-Page e=> X-Shock (thanks Kevin Franden) # v3.48 (06-21-97): "Unable to resolve" only printed with debug=1 # v3.47 (06-16-97): "In spite of" text at next line # Message ID mismatch messages shortened # v3.46 (06-15-97): To address exception checking disabled # v3.45 (06-15-97): Fixed broken implementation of message-ID / last by checking # Mail will be ignored based on whichever email address is decided to be most "authentic" # and not just the from address. # v3.44 (06-14-97): Ignored mail now gives explanation # v3.43 (06-13-97): Explanations for spam mail now go on seperate line # From information no longer displayed in log when rejecting mail # Log entry now of format mm-dd-yy hh:mm:ss message # v3.42 (06-10-97): One of the debug messages was always being displayed (thanks Gail Gurman) # v3.41 (06-09-97): If messageID is IP - perform revDNS check of top two levels with by header # messageID-by check for DNS is only two two levels # v3.4 (06-08-97): Code to auto-detect where we are (thanks Art Yaffee) # v3.3 (06-08-97): Check if $messageid is from same place as last received by site # Only compare last parts of domain name in reverse DNS check # Prettied up debug=1 output # v3.2 (06-08-97): Reverse DNS compared to IP address in received-by headers (thanks Zack Weinberg) # If IP address is in private network - don't bother to do DNS (thanks Bill Stewart-Cole) # New explanation text for IPNAME_MISMATCH # All from: addresses that are an email address will have the username@ removed # Any DNS addresses with <= 1 parts (i.e. none or just something like localhost) are skipped # New subroutine: &remove_email # New subroutine: &check_equiv # Date: added to rejection letter (thanks Zack Weinberg) # v3.16 (06-06-97): DNS entry with root@ is accepted # v3.15 (06-06-97): Rejection mail - while illegal message => single print statement so it will # be easier to modify for non-US citizens. (Thanks Charlie Stross) # v3.14 (06-06-97): Log entries weren't giving explantion for spam anymore # Uppercase domain names were being rejected # v3.13 (06-05-97): Empty DNS addresses or ones with @localhost are accepted # Mail that is okay due to exceptions still list the reasons it was flagged. # v3.12 (06-04-97): Don't check "itsays" for valid DNS # v3.11 (06-04-97): To addresses are also checked for in the exceptions file (Thanks Kevin Franden) # v3.1 (06-04-97): All addresses are classified as IP or DNS. If IP they are checked to make # make sure they are not multicast and within legit range. If DNS, the top level domain # is checked against my list. # New subroutine: &check_valid # New subroutine: &check_dns_address # Debug=1 displays progress of received-by header processing # v3.01 (06-03-97): Look for throughout header (in case by is on different line) # v3.0 (06-03-97): Advanced Received parsing algorithm implemented # Sendmail: 8.8.5/8.6.5 is looked for in by: # Illegal IP addresses are looked for # New subroutine: &check_up_address # Message-id scanned for spam site taint # Explanations for sendmail, illegal IPs added # v2.F2 (06-02-97): Spam sites are now case insensitive (thanks Timitiy Coldenhoff for catching this) # Debug=3 now prints all of socket communication # v2.F1 (06-01-97): Modified how reasons for rejection are created/displayed (no more duplicates) # All mail with from address including localhost is rejected # New subroutine: &check_for_localhost # New subroutine: &reason_for_rejection # X-UIDL and LOCALHOST explanations added to rejection letter # v2.F (06-01-97): All reasons for mailing being rejected are listed # Script will not accept an authenticated sender of @localhost # v2.E2 (05-30-97): Modified auto-reject text # Any line in spammers file beginning with a space is skipped # v2.E1 (05-26-97): Cleaned up header detection code - should burn less CPU cycles # v2.E (05-24-97): Reject mail that comes with X-UIDL header. # v2.D (05-21-97): Parse From: headers correctly when sent from local mailer # Look for MaxAnnon mailer's Status: MC (thanks Scott Keszler) # Look for X-Distribution: Mass (thanks David Harris [Author of Pegasus Mail]) # v2.C5 (05-19-97): Email address can have a ~ in them # v2.C4 (05-12-97): Minor text changes # v2.C3 (05-12-97): Fixed some probs with socket code (thanks Markus Baumeister) # v2.C2 (05-07-97): Fixed date bug I created in v2.C # v2.C1 (05-07-97): Look for blank line between headers and message body (thanks Juan Cabanela) # v2.C (05-07-97): Major revamp of the header detection # Message-ID parsing code a bit more readable # v2.B3 (04-21-97): Fixed typo's in reject letter (thanks to Stephen Mack) # v2.B2 (04-16-97): SMTP-QM 3.02 => SMTP-QM # v2.B1 (04-13-97): Unable to deliver mail to messsage did not display address # Added some additional comments in the code # v2.B (04-07-97): Rejection letter explanations specific to cause # Fix double Reply-to bug (thanks to Juan Cabanela) # Look for CyberPromo: X-Shocking-Web-Page # v2.A3 (04-03-97): Improved quickmail header processing algorithm for author # v2,A2 (03-25-97): Autoreject subject supersedes exception list # v2.A1 (03-04-97): Script is more verbose about why messages are ignored # v2.A (02-18-97): Script is more verbose about why messages are tagged as spam # Noticed that I spelled "received" as "recieved" at some point.. *sigh* # Included explanation of rejection in rejection message # v2.92 (02-09-97): Reject mail with Message-Id: RAF* (thanks to Scott Keszler) # v2.91 (01-29-97): Script looks at To: header # Algorithm for parsing headers modified to account for To: headers # Bug fixes in Quickmail forwarding detection feature for author # v2.9 (01-28-97): Script configuration moved to: nags_config.pl # Script now reads from: "junkmailers" file to get a list of X-Mailers to reject # Algorithm for parsing of headers has been modified - it seems to work # Quickmail forwarding detection feature for the author # If spammers file is missing, an error message to STDOUT will be generated # New sub check_home_dir(); # v2.83 (01-27-97): Bug fix: from_address=>$from_address (thanks to Juan Cabenela) # Bug fix: mail wasn't going to postmaster!! # Reject mail sent with Extractor (thanks to Scott Keszler) # v2.82 (01-25-97): Reject mail with Message-Id: Ready Aim Fire (thanks to Scott Keszler) # v2.81 (01-21-97): New variable: $check_pegasus to make this optional # If a serious error occurs and the script can not write to the log file, a message # to standard output is generated. # Blank lines, leading and trailing spaces are ignored in spammers/exceptions file. # v2.8 (01-09-97): @exceptions and %spammers moved to seperate file # v2.7 (12-11-96): Better parsing of X-Mailer # v2.6 (11-27-96): Reject mail sent with Flood Gate # Changed rejection message # v2.5 (11-11-96): Reject mail sent with Pegasus Mail # v2.4 (10-20-96): Cleaned up my crappy socket code (thanks to Russ Allbery) # v2.31 (10-15-96): If @smtp_hosts is left blank, $hostname is used # A number of new sites have been added to the %spammers array # v2.30 (10-02-96): New command line argument: -debug # A number of new sites have been added to the %spammers array # v2.21 (09-28-96): When rejecting mail messages, the rejected message is prepended with > # v2.20 (09-28-96): Autoreject subjects will not be ignored # Fixed misreporting in -justparse # -justparse and main logic use same logical structure # v2.11 (09-28-96): Script will not accept headers after it has found the subject and from fields # Good messages are logged to junk mail if from: parsing is screwed # v2.10 (09-27-96): Script will not accept headers after it has found the subject. # v2.00 (09-26-96): Script now interacts with the SMTP port. It is therefore able # to detect if the transaction is going smoothly or not. # An array of SMTP servers can be specified which will be tried in order # v1.51 (09-26-96): Blank first line is removed # v1.5 (09-25-96): Aesthetic changes and additional comments added to code # Mail-error filename is now customizable # Fixed bug in postmaster generation # Changed behavior of $ignore and $mail_is_spam # -justparse gives additional information # v1.4 (09-24-96): Script will try smtp_host1 first and then smtp_host2 # Script can fake its identity with helo... # You can now test out the rejection with a special subject # v1.3 (09-24-96): Only the first from, auhenticated sender, subject will be used # New command line argument: -justparse # v1.22 (09-23-96): Matches for from:,subject:, etc. are now case insensitive # v1.21 (09-23-96): Don't send mail to empty email addresses # v1.20 (09-23-96): Command line arguments added # New command line argument: -nopost # New command line argument: -noreply # v1.11 (09-23-96): "-"s added to the email address pattern matches # v1.10 (09-22-96): Script will only check for exceptions with spam mail # Junk mail is logged to user specified directory # Pattern matches are now case sensitive # v1.00 (09-15-96): Script is born # To do: precedence: junk # To do: add "D # Standard home directory detection -- if your system is unusual, you may need # to manually set this # Determine where we are if ($0=~/(.*)\/.*$/) { $local_dire="$1"; } else { $local_dire="."; } #$home_dir="."; $home_dir= $ENV{"HOME"} || $ENV{"LOGDIR"} || (getpwuid($<))[7] || $local_dire; $home_dir.="/nags"; # All of the configuration information resides here push (@INC,$home_dir); require "nags_config.pl"; &check_home_dir(); chop($hostname = `hostname`) unless ($hostname); @smtp_hosts=$hostname unless (@smtp_hosts); $usg= 'Usage: nags_filter.pl [] -noreply -justparse -itsokay -debug x -h WARNING: Use these only for command line testing. They will NOT work in your .forward file. '; # Parse the command line arguments while ($#ARGV >= $[) { $arg = shift @ARGV; if ($arg eq "-debug") { $arg = shift @ARGV; if ($arg) { $debug=$arg; next; } else { $debug=1 } } if ($arg eq "-justparse") { $just_parse=1; next; } if ($arg eq "-noreply") { $no_sender=1; next; } if ($arg eq "-itsokay") { $itsokay=1; next; } if ($arg eq "-h") { die "$usg"; } if ($arg =~ /^-/) { die "Invalid flag: $arg\n$usg"; } die "Invalid argument: $arg\n$usg"; } # Various date routines... ($second,$minute,$hour,$mday,$mon,$year,$temp,$temp,$temp)=localtime(time); $mon++; $dateinfo = "$mon-$mday-$year"; $hour = "0$hour" if ($hour<=9); $second = "0$second" if ($second<=9); $minute = "0$minute" if ($minute<=9); $date="$dateinfo $hour:$minute:$second"; $datefile="$mon-$mday-$year-$hour-$minute-$second"; &load_exceptions(); &load_to_exceptions(); &load_spammers(); &load_spam_subjects(); &load_junkmailers(); if ($debug==2) { print "Configuration Information:\n"; print "Exceptions: "; foreach $ex (@exceptions) { print "\t$ex\n"; } print "\n"; print "To_Exceptions: "; foreach $ex (@to_exceptions) { print "\t$ex\n"; } print "\n"; print "Junk Mailers "; foreach $jm (@junkmailers) { print "\t$jm\n"; } print "\n"; print "Spammers: "; foreach $sp (keys %spammers) { print "\t$sp\n"; } print "\n\n"; } # Read from standard input $first=1; $line=-1; while () { chop; # In case the first line is blank... if ($first) { $first=0; next unless ($_); } $line++; # Note: only a-z A-Z 0-9 . - ~ are currently recognized as parts of an # email address. Should I add # in? # Debugging code # print "$headers_found:$_\n"; if (!$headers_found) { # Check the From: header for spam taint if (/^From:[\t ]*(.*)/i) { $from=$1; #print "Found from: $1\n"; # Handle either of these types of from lines: # From: Firstlook <1stlook@america.com> # From: ablebake@ix10.ix.netcom.com if ($from =~ /([\w\.\-~]+)\@([\w\.\-~]+)/ ) { $from_name=$1; $from_address=$2; } else { # Get rid of anything in ""s $from =~ s#".*"##g; # Note: this code could reject some valid addresses with nested ()s # see RFC822. # # Get rid of anything in ()s $from =~ s#\(.*\)##g; $from_name=$from; # Get rid of @<> $from_name=~ s#[\@<>]##g; # Get rid of trailing/leading spaces $from_name=~ s#^[ ]*##g; $from_name=~ s#[ ]*$##g; $from_address=$hostname; } # ($from_name,$from_address)=/([\w\.\-~]+)\@([\w\.\-~]+)/; &check_spam_sites; &check_from_localhost; } # Check the To: header for spam taint if (/^[\t]*To:[\t ]*(.*)/i) { $to=$1; $pos=-1; while (($pos=index($to,'@',$pos)) > -1) { $recip_count++; $pos++; } print "Found to [$recip_count]: $1\n"; if ($recip_count==1) { &check_spam_sites; } # Handle either of these types of from lines: # From: Firstlook <1stlook@america.com> # From: ablebake@ix10.ix.netcom.com ($to_name,$to_address)=/([\w\.\-~]+)\@([\w\.\-~]+)/; } # Check the Reply-To: header for spam taint if (/^[\t]*Reply-To:[\t ]*(.*)/i) { $rto=$1; &check_spam_sites; } # Parse the subject if (/^[\t]*Subject:[\t ]*(.*)/i) { $subject=$1; #print "Found subject: $subject\n"; } # Parse the authenticated sender if (/^[\t]*Comments: Authenticated sender is/i) { # Handle either of these types of from lines: # From: Firstlook <1stlook@america.com> # From: ablebake@ix10.ix.netcom.com ($auth_name,$auth_address)=/([\w\.\-~]+)\@([\w\.\-~]+)/; &check_spam_sites; &check_from_localhost; } # Parse the return path # I don't think this one is really necessary.. but what the hell... if (/^[\t]*Return-Path: /i) { ($return_name,$return_address)=/([\w\.\-~]+)\@([\w\.\-~]+)/; &check_spam_sites; } # Check if the mail went through a known spammer # A lot of spammers bounce their mail of sites like earthlink.net. Even if # the spam doesn't originate from a black listed site, if it passes through it # it is GUILTY AS CHARGED. $received=0; if (/^[\t]*Received: from/i) { # This feature is still experimental and should not be released yet #if ($messageid) { # $reason{"RECEIVED"}="Extraneous received-by"; #} # See who it is from: this can be in a number of formats: # from mailhost.totuff.net(alt2.forevermails.net(254.750.86.9)) # from myco.InnovSoftD.com (ip19.new-haven.ct.pub-ip.psi.net [38.11.102.19]) # from myco.InnovSoftD.com # from mx.West.saic.com by cpmx.mail.saic.com; Wed, 16 Apr 97 00:33:23 # from 255.199.212.34 (205.199.212.34) # from newshost.nytimes.com ([199.181.173.226] (may be forged)) # from free.mirabilis.com (root@[208.202.84.42] (may be forged)) # from sherrie [166.70.3.83] # First we grab everything in between "from" and "by" if they are both on # the same line. If not we just take everything after from. if (/from (.*) by/i) { $rf=$1; #print "from1: $rf\n"; } else { /from (.*)/i; $rf=$1; #print "from2: $rf\n"; } print "from: $rf\n" if ($debug==1); # If there is a set of parentheses in what we found then that means that the # machine has a different name than it gave. Now we have something like: # # mailhost.totuff.net(alt2.forevermails.net(254.750.86.9)) # (ip19.new-haven.ct.pub-ip.psi.net [38.11.102.19]) # 255.199.212.34 (205.199.212.34) # # Where the first it was it says and the second group is who it is. SWITCH: { if ($rf =~ m#([^\(\[]*)\((.*)\)#) { $itsays=$1; $itreally=$2; # Get rid of any trailing/leading spaces $itsays=~ s#^[ ]*##g; $itsays=~ s#[ ]*$##g; $itreally=~ s#^[ ]*##g; $itreally=~ s#[ ]*$##g; print "\tsays: $itsays\n" if ($debug==1); #&check_valid($itsays); print "\treally: $itreally\n" if ($debug==1); # Now we know who the machine really is. If it was a name, then the server # should try to resolve a number. If it finds a number, we will make sure # that it is a valid IP address. if ($itreally =~ m#(.*)[\(\[](.*)[\]\)]#) { $nm=$1; $ip=$2; # Get rid of any trailing/leading spaces $nm=~ s#^[ ]*##g; $nm=~ s#[ ]*$##g; $ip=~ s#^[ ]*##g; $ip=~ s#[ ]*$##g; print "\t\tname: $nm\n" if ($debug==1); #&check_dns_address($nm); &check_valid($nm); print "\t\tip: $ip\n" if ($debug==1); $invalid=&check_ip_address($ip); # Make sure the reverse DNS and the name are the same &check_equiv($nm,$ip) unless ($invalid); } else { # We couldn't find a name/number combo for who it really is so we will # just make sure that any IPs we do know about are legit &check_valid($itreally); } last SWITCH; } if ($rf =~ m#([^\(\[]*)\[(.*)\]#) { $itreally=$2; &check_valid($itreally); last SWITCH; } # We didn't find a set of "who it says it is and who it really is" in the # headers but if this is an IP address we will check to make sure it is legit &check_valid($rf,1); } # Check the whole damn line for spam taint &check_spam_sites; # Let the next hunk of code know we just read a received line # Is this still necessary? $received=1; } # Parse the by header if (/by (.*)/) { if (m#by[\s]*([\S]+) \(([^\)]+)\)#i) { $byname=$1; $sendmailv=$2; print "by1: $byname sendmail: $sendmailv\n" if ($debug==1); # This is the elegant way -- but what if the by and sendmail version are on different lines? #if ($sendmailv eq "8.8.5/8.6.5") { # $mail_is_spam=1; # $reason{"SENDMAIL"}=$sendmailv; #} &check_valid($byname,1); } else { m#by ([^\s;]+)#i; $byname=$1; print "by2: $byname\n" if ($debug==1); &check_valid($byname,1); } print "\n" if ($debug == 1); } # This may be a real sendmail version/configuration -- but the spammers seem to # love this one. if (m#\(([\d]+\.[\d]+.[\d+])/([\d]+\.[\d]+.[\d+])\)#) { if (($1 eq "8.8.5") && ($2 eq "8.6.5")) { $mail_is_spam=1; $reason{"SENDMAIL"}="8.8.5/8.6.5"; } } # Why the hell am I doing this? Maybe later I will check for weird mailing times? if (/^[\t]*Date: (.*)/i) { #print "Date Found: $maildate\n"; $maildate=$1; } # Check for annoying mailers if (/^[\t]*X-Mailer:[\t ]*(.*)/i) { $xmailer=$1; #print "Found Xmailer: $xmailer\n"; foreach $jmailer (@junkmailers) { if ($xmailer =~ /$jmailer/i) { $mail_is_spam=1; $reason{"xmailer"}=$xmailer; } } # Customization for the author - deal with it.. :) # My mail gateway at work forwards me the messages so I need to read the # mail headers at the bottom and not top of the message. if ($xmailer =~ /^Mail\*Link SMTP-QM/) { $saic=1; #print "SAIC=1\n"; } } # Check the message ID if (/^[\t]*Message-Id:[\t ]*(.*)/i) { $messageid=$1; $messageid=~ s#[\<\>]##g; print "Message-Id: $messageid\n" if ($debug == 1); # Thanks to SRK for telling me about this mail header if (($messageid =~ /Ready Aim Fire/i) || ($messageid=~ /RAF/)) { $mail_is_spam=1; $reason{"other"}.="Ready Aim Fire "; } # Thanks to Mark Baushke for this one: if ($messageid =~ /\<\>/) { $mail_is_spam=1; $reason{"other"}.="Empty Message-ID "; } &check_spam_sites; $mid=&remove_email($messageid); &check_valid($mid,1); } # Stanford Wallace uses this one if (/^[\t]*X-Shock:/i) { $mail_is_spam=1; $reason{"other"}.="X-Shocking-Web-Page "; } # X-UIDL headers should only be created by your POP server if (/^[\t]*X-UIDL:/i) { $mail_is_spam=1; $reason{"X-UIDL"}="X-UIDL"; } # MaxAnnon! uses this one (thanks Scott Keszler) if (/^[\t]*Status: MC/i) { $mail_is_spam=1; $reason{"other"}.="MaxAnnon! Mailer "; } # Thanks to the wonderful author of Pegasus mail for this one # If someone uses a newer version of Pegasus, it adds the Distribution: Mass # if more than 5,000 people are being mailed to. if (/^[\t]*X-Distribution: Mass/i) { $mail_is_spam=1; $reason{"other"}.="Bulk Mail sent with Pegasus "; } if (/^[\t]*X-Advertisement/i) { $mail_is_spam=1; $reason{"other"}.="X-Advertisement "; } # This is a key hunk of code here - it is new as of version 2.C1. # Look for the first blank line -- once it has been found that is the # end of the headers. if ($_ eq "") { # John Deters innovation $headers_found=$line; } } else { # Only check these if we are not in the mail headers # So a single message with forwarded headers doesn't appear as two messages s/^From\b/>From/; } # This will help us avoid infinite loops # All messages generated by the NAGS mailer contain this line, so if # it receives mail that contains this, then the mail is ignored (i.e. # thrown away, but no response is generated). if (/^X-lewp: NAGS/) { $ignore=1; } # Customization for the author - deal with it.. :) # My mail gateway at work forwards me the messages so I need to read the # mail headers at the bottom and not top of the message. if (($saic) && (/RFC822 Header Follows/i) ) { #print "Resetting headers...\n"; $headers_found=0; undef $xmailer; undef $subject; undef $from_name; undef $from_address; undef $auth_name; undef $auth_address; undef $to_name; undef $to_address; undef $from; undef $to; undef $maildate; undef $messageid; } # Put the mail into an array so we can do stuff with it later $out[$line]=$_; } # If this is a test message, mark it as spam if ($subject =~ /^$autoreject_subject/i) { $mail_is_spam=1; $reason{"autorejectsubject"}="Autoreject Subject"; } &check_spam_subject(); # At this point there are three cases: # $mail_is_spam | $ignore | Action # --------------+---------+--------------- # 0 | 0 | Mail is good -- keep it # 1 | 0 | Mail is spam -- complain and send it to junk-mail # 1 | 1 | Mail is spam -- just send it to junk-email # Generate a from: and to: for mail that might be sent to the sender # Generate the to and from addresses from the authenticated sender if we know it #if (($auth_name && $auth_address) && !($auth_address=~/localhost/i)){ if ($from_name && $from_address) { # otherwise use the from fields $asshole = "$from_name\@$from_address"; $replyto = "$your_name \<$from_name\@$from_address\>"; } elsif ($auth_name && $auth_address) { $asshole = "$auth_name\@$auth_address"; $replyto = "$your_name \<$auth_name\@$auth_address\>"; } else { $mail_is_spam=1; $reason{"OTHER"}.="Can't determine sender "; $no_sender=1; } $asshole=~s#[\n]*$##; $replyto=~s#[\n]*$##; if ($check_num_only_addresses) { if ($asshole=~m#^[\d]+\@#) { $reason{"OTHER"}.=" Numeric email address"; $mail_is_spam=1; } } if (($your_email) && ($asshole =~ /^$your_email/i)) { $ignore=1; $mail_is_spam=1; $reason{"FROMYOU"}="This mail message was from you"; } # This is a new stealth way of mailing - Thanks Thomas Lostaunau if (!$from) { $mail_is_spam=1; $reason{"OTHER"}.="No legit From address "; $no_sender=1; } if (!$to) { $mail_is_spam=1; $reason{"OTHER"}.="No legit To address "; } # If the mail is suspected spam then give it a chance for redemption (i.e. let mail # from root@ or abuse@ the site through). Exceptions can cause the mail to either # be sent to you ($mail_is_spam=0), or for it to be removed without # contacting the host site ($ignored=1). &check_for_exceptions if ($mail_is_spam); # Build a string of reasons that the mail was tagged as spam if it has been $explanation=&reason_for_rejection(); # If the -justparse command line is given then just show what the results # would be if the filter was run on the input if ($just_parse) { print "NAGS Spam Filter v$version - Diagnostic Mode\n"; print "----------------------------------------\n"; print "From: $asshole\n"; print "To: $to_name\@$to_address\n"; print "Subject: $subject\n"; print "X-Mailer: $xmailer\n"; print "Messade-ID: $messageid\n\n"; if ($mail_is_spam) { print "This mail is spam mail ($explanation)\n"; if ($ignore) { print "This spam mail will be ignored\n"; } else { if (!$forward_mail) { print "\tA complaint would be sent:\n\t\tTo: $asshole\n\t\tFrom: $replyto\n" unless ($no_sender); } if ($forward_mail==1) { print "\tA complaint would be sent:\n\t\tTo: $asshole\n\t\tFrom: $replyto\n" unless ($no_sender); print "\tThis mail message is not forwarded...\n"; } if ($forward_mail==2) { print "\tThis mail message will be forwarded...\n"; } } } else { print "This mail is okay "; print "($okayreason)" if ($okayreason); print "\n\tIn spite of [$explanation]" if ($explanation); print "\n\tMail would be forwarded to: $forward_mail_to\n" if ($forward_mail); print "\n"; } exit; } # All spam mail (including mail that is ignored) are saved here. # Mail messages that were parsed incorrectly are also kept here (i.e. if # my regular expressions didn't match the users name.) This is done for # debugging purposes. if (($mail_is_spam) || (!$asshole)) { &log_junk_mail; } # If this is junk mail and it isn't from someone like their postmaster # We return the mail back to the sender if ($mail_is_spam) { if ($ignore) { # If we are ignoring this mail.. just make a note that it was received... &log_entry("Ignored mail\n\tReason: $explanation\n\tFrom: $asshole\n\tSubject: $subject"); } else { # This mail is junk mail -- now we have to decide what to do with it # If $forward_mail=0, log it, put a copy in the junkmail directory, send rejection letter # If $forward_mail=1, log it, put a copy in the junkmail directory, send rejection letter # If $forward_mail=2, log it and forward it to $forward_mail_to &log_entry("Spam mail received\n\tReason: $explanation\n\tFrom: $asshole received\n\tSubject: $subject"); if ($forward_mail==2) { &log_entry("\tJunk mail header added",1); &send_mail_message($forward_mail_to,$asshole); } else { # return the mail to the person who send it # Either the command line flag or the configuration variable can override # complaining to the sender unless ($no_sender) { $result=&send_mail_message($asshole,$replyto) if ($asshole); unless ($result) { &log_entry ("\tERROR: Unable to deliver mail to: $asshole",1); } } } } # end if ($ignore or else...) } else { # This mail is legit -- now we have to decide what to do with it # If $forward_mail=0, log it and put it in mail box # If $forward_mail=1, log it and forward it to $forward_mail_to # If $forward_mail=2, log it and forward it to $forward_mail_to if ($forward_mail) { &send_mail_message($forward_mail_to,$asshole) } else { unless (open (OUT,">>$mailbox")) { # If the mail can't be appended to the mailbox, then # append the mail to the $mail_error file &log_entry("SERIOUS ERROR: Can not open $mailbox\n\tSystem reports: $!\n\tUsing $mail_error instead"); unless (open (OUT,">>$mail_error")) { # If the $mail_error file can't be appended to, this mail message # is gonna get eaten. &log_entry("SERIOUS ERROR: Can not write to $mail_error. This mail message is probably lost\n"); $stderr_output=select (STDERR); open (OUT,">$stderr_output"); } } # Obtain a file lock. If your OS does not support this, comment out this line &lock(); #print OUT "$out[$x]\n" if ($out[$x]); for ($x=0; $x<=$line;$x++) { print OUT "$out[$x]\n"; } # Print two blank lines -- this should help mailers which concactenate messages print OUT "\n\n"; # Release file lock. If your OS does not support this, comment out this line &unlock(); close (OUT); } # Okay so they aren't an asshole, but I'm a pessimist and they will never know... if ($explanation) { &log_entry("Kept mail from: $asshole ($okayreason)\n\tIn spite of ($explanation)"); } else { &log_entry("Kept mail from: $asshole"); } } # Check if $_ matches against the list of spam sites. If it does flag the mail as spam # and note the site name in the %reason array. sub check_spam_sites { foreach $spamsite (keys %spammers) { if (/$spamsite/i) { $mail_is_spam=1; $spamsites_found{$spamsite}=1; #$reason{"site"}=$spamsite; } } } # Check to see if there is a reason that this spam identified mail should be let through # or see if it is one of the spam mails that should be ignored sub check_for_exceptions { # If they send the magic subject, let the mail through foreach $okay_subject (@okay_subjects) { if ($subject =~ /$okay_subject/i) { $okayreason="Okay Subject Used"; $mail_is_spam=0 ; } } # If they match an exception, let the mail through foreach $exception (@exceptions) { #$mail_is_spam=0 if ("$from_name\@$from_address" =~ /$exception/i); if ($asshole =~ /$exception/i) { $mail_is_spam=0 ; $okayreason="From_Exception: $exception"; } } # If they match an exception, let the mail through foreach $exception (@to_exceptions) { if ("$to_name\@$to_address" =~ /$exception/i) { $mail_is_spam=0; $okayreason="To_Exception: $exception"; } } # If they match an ignore, don't generate complaint mail... foreach $exception (@ignores) { if ("$asshole" =~ /$exception/i) { $ignore=1; $okayreason="Ignore: $exception"; } } # If this is a spam mail subject test then make sure you don't ignore the # mail -- you want it to responsd. Furthermore reassert that the mail is spam # in case one of the previous exceptions "cleared" the mail. if ($subject =~ /^$autoreject_subject/i) { $ignore=0; $mail_is_spam=1; } if ($itsokay) { $mail_is_spam=0; $ignore=0; $okayreason="Command line override"; } } # Sends an email message consisting of the rejected letter with a specified # to and from. sub send_via_smtp { local ($to,$from,$smtp_host)=@_; unless ($from && $to && $smtp_host) { &log_entry("\tERROR: Missing from($from), to($to), or smtp_host($smtp_host)\n",1); exit; } # Connect up to the SMTP port and send the response $port=25; $hello_from=$hostname unless ($hello_from); use Socket; ($prototype)=getprotobyname("tcp"); # Thanks to: Markus Baumeister ($in_name, $in_aliases, $in_type, $in_len, $thataddr) = gethostbyname($smtp_host); $serveraddr = pack("Sna4x8",&AF_INET,$port,$thataddr); # CHANGED!! unless (socket (SOCKET,&AF_INET, &SOCK_STREAM, $prototype)) { &log_entry ("ERROR: $smtp_host: No socket: $!",1); return 0; } unless (connect (SOCKET, $serveraddr)) { &log_entry ("\tERROR: $smtp_host: Can't connect: $!",1); return 0; } # Turn off buffering select (SOCKET); $|=1; # Prepare stuff for socket polling $rin=$win=$ein=''; vec($rin,fileno(SOCKET),1)=1; vec($win,fileno(SOCKET),1)=1; # This line isn't used $ein=$rin | $win; # This line isn't used $default_timeout=10; # Used if no parameter is sent to &read_a_line or &read_all_lines # Check to see if we receive a greeting response within 15 seconds... $response=&read_a_line(15); unless ($response =~ /^220/) { $response="[NO DATA RECEIVED]" unless ($response); &log_entry ("\tERROR: $smtp_host: Incorrect greeting message: $response",1); return 0; } # Identify yourself print "helo $hello_from\n"; print STDERR "helo $hello_from\n" if ($debug==3); $response=&read_all_lines(); unless ($response =~ /^250/) { $response="[NO DATA RECEIVED]" unless ($response); &log_entry ("\tERROR: $smtp_host: Incorrect response to helo: $response",1); return 0; } # Your name @ their email address print "mail from:\"$from\"\n"; print STDERR "mail from:\"$from\"\n" if ($debug==3); $response=&read_all_lines(); unless ($response =~ /^250/) { $response="[NO DATA RECEIVED]" unless ($response); &log_entry ("\tERROR: $smtp_host: Incorrect response to mail from: $response",1); return 0; } # Their email address print "rcpt to:\"$to\"\n"; print STDERR "rcpt to:\"$to\"\n" if ($debug==3); $response=&read_all_lines(); unless ($response =~ /^250/) { $response="[NO DATA RECEIVED]" unless ($response); &log_entry ("\tERROR: $smtp_host: Incorrect response to rcpt to: $response",1); return 0; } print "data\n"; print STDERR "data\n" if ($debug==3); $response=&read_all_lines(); unless ($response =~ /^354/) { $response="[NO DATA RECEIVED]" unless ($response); &log_entry ("\tERROR: $smtp_host: Incorrect response to data: $response",1); return 0; } # If this is a rejection letter create a forged rejection letter if ( ($mail_is_spam) && ($forward_mail!=2)) { # print "From: $from\n"; # print scalar localtime; # print "\n"; print "To: $to\n"; print "Reply-to: $from\n"; print "Subject: $rejected_subject\n"; print "X-Mailer: NAGS Mail Filter v$version\n"; print "X-lewp: NAGS\n\n"; # X-lewp helps to protect against infinite recursion. Any mail messages # containing this that are tagged as spam are ignored. print "Your mail has been rejected for the following reason(s):\n\n"; if ($reason{"xmailer"}) { print "Mailer: $reason{\"xmailer\"}\n"; print "\tYou are using a piece of mail software that is commonly\n"; print "\tused by junk emailers. This includes software that is dedicated to\n"; print "\tspamming people as well as legitmate software that is highly abused by\n"; print "\tspammers.\n\n"; } if ($reason{"other"}) { print "Other: $reason{\"other\"}\n"; print "\tYour mail message contains special headers that are normally only\n"; print "\tgenerated by some of the bulk emailers.\n\n"; } if ($reason{"autorejectsubject"}) { print "Autoreject Subject: $autoreject_subject\n"; print "\tYou are testing this mail filter to see what a rejection letter looks like.\n\n"; } if ($reason{"subject"}) { print "Subject: $reason{\"subject\"}\n"; print "\tOne of the phrases used in the subject of your message has flagged\n"; print "\tthis mail filter. If this is the only reason your mail was rejected,\n"; print "\tyou may get mail through by simply changing the subject of the message.\n"; } if ($reason{"site"}) { print "Site match: $reason{\"site\"}\n"; print "\tOne of the sites that your mail to me passed through matched\n"; print "\tmy list of spammers. If this match was intentional, your mail\n"; print "\twas rejected because I receive large amounts of junk email from one\n"; print "\tof the sites your mail originated from or passed through on its way to me.\n"; print "\tSince I receive more junk than legitimate mail from your site, it has been\n"; print "\tadded to my junk list. If you are a legitimate user of this system\n"; print "\tyou should realize that your system administrators either are\n"; print "\tinvolved in a campaign to send out millions of pieces of unwanted\n"; print "\temail to the internet or they are unwilling to put a stop to their\n"; print "\tusers that are engaging in this form of abuse. It is also possible\n"; print "\tthat your site name matches a word or pattern that I have deemed\n"; print "\tsuspicious (i.e. money).\n"; } if ($reason{"FAKEIP"}) { print "Fake IP address found\n"; print "\tThe mail message sent contained an illegally formed IP address.\n\n"; } if ($reason{"IPNAME_MISMATCH"}) { print "IP => NAME Mismatch\n"; print "\tOne of the reverse-DNS entries in your mail headers did not match the\n"; print "\tname it was listed with. This means that either there are multiple names\n"; print "\tmatched to that IP address or the mail sent was a forgery.\n\n"; } if ($reason{"SENDMAIL"}) { print "Sendmail v: $reason{\"SENDMAIL\"}\n"; print "\tWhile a few sites use this unusual configuration, the large majority\n"; print "\tof email with this version/configuration file for sendmail are forgeries\n\n"; } if ($reason{"X-UIDL"}) { print "X-UIDL header found\n"; print "\tThe X-UIDL header is used by many pop servers to store information about\n"; print "\tthe messages left on the server. Many spammers are including this header\n"; print "\tin their mail messages in the hope that this will cause the messages to\n"; print "\tautomatically downloaded.\n\n"; } if ($reason{"LOCALHOST"}) { print "FROM: LOCALHOST\n"; print "\tYour message has a return address of: $asshole. It is possible\n"; print "\tthat you are a local user of this system. However, spammers have been putting\n"; print "\tlocalhost into the from address in order to try and trick responses to their email\n"; print "\tinto going to the local system instead of them.\n\n"; } print "If you have a legitimate reason to contact me, you may get your\n"; print "mail through the filter by using the following subject:\n\n"; print "$okay_subjects[0]\n\n"; print "I will then add you to my 'okay' list so future mail will not be\n"; print "rejected. NOTE: If you reply to this letter, it will *NOT* go to me.\n"; print "You must send a new piece of mail or forward your original piece of\n"; print "mail to my address. This has been to avoid receiving autoresponse messages\n"; print "from these rejection letters.\n\n"; print "If the email you sent to me was a piece of unsolicited commercial email, you should be aware that in addition to being rude, UCE is also illegal: From: http://www.ca-probate.com/faxlaw.htm Under United States law, it is unlawful \"to use any telephone facsimile machine, computer, or other device to send an unsolicited advertisement\" to any \"equipment which has the capacity (A) to transcribe text or images (or both) from an electronic signal received over a regular telephone line onto paper.\" The law allows individuals to sue the sender of such illegal \"junk mail\" for \$500 per copy. Most states will permit such actions to be filed in Small Claims Court. "; print "NAGS Email Filter v$version\n"; print "A service of Netizens Against Gratuitous Spamming\n"; print "http://www.nags.org/\n\n"; print "$your_name [$your_email]\n\n"; print "The text of the rejected email follows:\n"; print "---------------------------------------\n"; } # Send the mail for ($x=0; $x<=$line;$x++) { # if okay mail: # if $forward_mail, add X-Forwarded-By: NAGS header # if spam mail: # if $forward_mail=2, # add X-Forwarded-By: NAGS header # add $forwarded_junk_mail_tag if (($x==$headers_found) && ($forward_mail)){ if ($mail_is_spam) { if ($forward_mail==2) { print "X-Forwarded-By: NAGS v$version\n"; print "$forwarded_junk_mail_tag ($explanation)\n" if ($forwarded_junk_mail_tag); } } print "\n"; } if (($mail_is_spam) && ($forward_mail!=2)) { print "> $out[$x]\n"; } else { print "$out[$x]\n"; } } print "\n.\n"; print "quit\n"; # Read through responses until a 250 accepted response is found... $found=0; while ($next=&read_a_line()) { if ($next =~ /^250/) { $found=1; last; } } unless ($found) { $next="[NO DATA RECEIVED]" unless ($next); &log_entry ("\tERROR: $smtp_host: Last line reads: $next",1); return 0; } close (SOCKET); select (STDOUT); if (($mail_is_spam) && ($forward_mail!=2)) { &log_entry ("\tMail sent to: $to via $smtp_host",1); } else { &log_entry ("\tMail forwarded to: $to via $smtp_host",1); } return 1; } # Lock the mailbox sub lock { flock (OUT,2); # In case someone appended while we were waiting seek (OUT,0,2); } # Unlock the mailbox sub unlock { flock (OUT,8); } # Uncomment this code if you are running under Solaris # Thanks to Markus Baumeister ## Lock the mailbox under Solaris #sub lock { # $lockstr[0]=2; #F_WRLCK # $lockstr[1]=0; # $lockstr[2]=0; # $lockstr[3]=0; # $lockstr[4]=0; # $lockstr[5]=0; # # In case someone appended while we were waiting seek (OUT,0,2); # fcntl (OUT,7,$lockstr); #F_SETLKW #} # # Unlock the mailbox #sub unlock { # $lockstr[0]=3; #F_UNLCK # $lockstr[1]=0; # $lockstr[2]=0; # $lockstr[3]=0; # $lockstr[4]=0; # $lockstr[5]=0; # fcntl (OUT,6,$lockstr); #F_SETLK #} # Create log entry of this transaction sub log_entry { $message=@_[0]; $flag=@_[1]; unless (open (LOG,">>$logfile")) { print "NAGS Email Filter: Serious Error\n"; print "\tUnable to write to log file - please check \$home_dir\n"; print "\t-----------------------------------------------------\n"; print "\t$date: $message\n"; } if ($flag) { print LOG "$message\n"; } else { print LOG "$date $message\n"; } close (LOG); } # Creates a file in the specified directory named after the time of the # received mail and then put the contents of the mail message in it. sub log_junk_mail { unless (open (JUNK,">>$junkdir/$datefile")) { &log_entry ("ERROR: Could not create: $junkdir/$datefile"); } for ($x=0; $x<=$line;$x++) { print JUNK "$out[$x]\n"; } close (JUNK); } # Read just one line from the socket sub read_a_line { local ($data); local ($timeout)=@_; $timeout=$default_timeout unless ($timeout); ($nfound,$timeleft)=select($rout=$rin,undef,undef,$timeout); if ($nfound) { #$count++; $data=; print STDERR "$data" if ($debug==3); } else { #print STDERR "NO DATA after $timeout seconds\n"; return 0; } return $data; } # Read all available lines from the socket sub read_all_lines { local ($data); local ($timeout)=@_; $timeout=$default_timeout unless ($timeout); ($nfound,$timeleft)=select($rout=$rin,undef,undef,$timeout); unless ($nfound) { #print STDERR "NO RESPONSE after $timeout seconds\n"; return 0; } while ($nfound) { $data=; print STDERR "$data" if ($debug==3); ($nfound,$timeleft)=select($rout=$rin,undef,undef,$timeout); } return $data; } # Send a mail message to: $to using the first SMTP host. If this fails, # use the next in line until all available SMTP hosts have failed. sub send_mail_message { local ($to,$from)=@_; # Go through list of SMTP hosts until one of them is sucessful foreach $mailhost (@smtp_hosts) { #&log_entry ("Trying: $mailhost",1); return 1 if (&send_via_smtp($to,$from,$mailhost)); } return 0; } # Load exceptions array sub load_exceptions { unless (open (EXCEPT, $exceptionfile)) { &log_entry ("ERROR: Could not find a list of exceptions: $exceptionfile"); } while () { next if /^#/; chop; s/^[\s]*//; s/[\s]*$//; next unless ($_); push (@exceptions,$_); } close (EXCEPT); } # Load to_exceptions array sub load_to_exceptions { if ($to_exceptionfile) { unless(open (EXCEPT, $to_exceptionfile)) { print "ERROR: Can not find list of TO exceptions: $to_exceptionfile\n"; print "\tPlease check your $home_dir\n"; &log_entry ("ERROR: Could not find a list of TO exceptions: $to_exceptionfile"); } while () { next if /^#/; chop; s/^[\s]*//; s/[\s]*$//; next unless ($_); push (@to_exceptions,$_); } close (EXCEPT); } } # Load spammers array sub load_spammers { unless (open (SPAMMERS, $spammerfile)) { print "ERROR: Can not find list of spammers: $spammerfile\n"; print "\tPlease check your $home_dir\n"; &log_entry ("ERROR: Could not find a list of spamming sites: $spammerfile"); } while () { next if /^#/; next if /^[\s]/; chop; #s/^[\s]*//; s/[\s]*$//; next unless ($_); $spammers{$_}=1; } close (SPAMMERS); } # Load spam_subjects array sub load_spam_subjects { if ($spamsubjectfile) { unless (open (SPAMMERS, $spamsubjectfile)) { print "ERROR: Can not find list of spam subjects: $spammersubjectfile\n"; print "\tPlease check your $home_dir\n"; &log_entry ("ERROR: Could not find a list of spam subjects: $spammersubjectfile"); } while () { next if /^#/; next if /^[\s]/; chop; #s/^[\s]*//; s/[\s]*$//; next unless ($_); $spamsubject{$_}=1; } close (SPAMMERS); } } # Load junkmailers array sub load_junkmailers { unless (open (JUNKMAILERS, $junkmailerfile)) { print "ERROR: Can not find list of junkmailers: $junkmailerfile\n"; &log_entry ("ERROR: Could not find a list of junk mailers sites: $junkmailerfile"); } while () { next if /^#/; chop; s/^[\s]*//; s/[\s]*$//; next unless ($_); push (@junkmailers,$_); } close (JUNKMAILERS); } # See if the users home directory exists sub check_home_dir { unless (-d $home_dir) { print "HOME DIRECTORY IS MISCONFIGURED\n"; print "===============================\n"; print "The NAGS email filter has the home directory improperly\n"; print "configured as: $home_dir\n\n"; print "Please edit \$home_dir in the nags_config.pl script to correct this!\n"; die; } } sub reason_for_rejection { local ($explanation); foreach $site (keys %spamsites_found) { $reason{"site"}.="$site "; } $explanation=""; foreach $ss (keys %reason) { $explanation.="$reason{$ss} "; } $explanation =~ s#[\s]+# #g; chop($explanation); return ($explanation); } # Don't accept mail from @localhost -- it is likely a ploy to confuse the mail rejection # routines. sub check_from_localhost { if (($from_address =~ /^localhost$/i) || ($auth_address =~ /^localhost$/i)){ $mail_is_spam=1; $reason{"LOCALHOST"}="LOCALHOST"; } } # Check if DNS name / IP address is valid -- determine whether the address is an # IP or DNS name and send it to the appropriate routine sub check_valid { local ($address)=$_[0]; local ($lesstabs) = $_[1]; $address=&remove_email($address); if ($address =~ /[a-zA-Z]/) { &check_dns_address($address,$lesstabs); } else { &check_ip_address($address,$lesstabs); } return $invalid; } # Sets $mail_is_spam, %reason and $response=1 if forged IP address sub check_ip_address { local ($ip) = $_[0]; local ($lesstabs) = $_[1]; local ($response)=0; if ($debug==1) { if ($lesstabs) { print "\tChecking IP: [$ip]\n" if ($debug==1); } else { print "\t\t\tChecking IP: [$ip]\n" if ($debug==1); } } if ($ip eq "may be forged") { #$reason{"FORGED"}.="May be forged"; return 1; } @ipparts=split(/\./,$ip); if ($ipparts[0] > 240) { $response=1; $mail_is_spam=1; $reason{"FAKEIP"}.="Multicast IP: $ipparts[0] "; print "\t\t\t\tFake IP address: Multicast - [$ip]\n" if ($debug ==1); } foreach $ippart (@ipparts) { # if ($ippart > 255) { if (($ippart > 255) ||($ippart =~ /[a-zA-Z]/)) { $mail_is_spam=1; $reason{"FAKEIP"}.="Forged IP address [$ip] "; $response=1; print "\t\t\t\tFake IP address: Part [$ippart] - [$ip]\n" if ($debug ==1); } } return $response; } # Check if the DNS name is legit # Blank, localhost, or anything ending in one of the listed domain extensions is valid sub check_dns_address { local ($name) = $_[0]; local ($lesstabs) = $_[1]; $name=~s#[\s]*$##; if ($debug==1) { if ($lesstabs) { print "\tChecking name: $name\n"; } else { print "\t\t\tChecking name: $name\n"; } } @valid_domains=('ARPA', 'COM', 'EDU', 'GOV', 'INT', 'MIL', 'NATO', 'NET', 'ORG', 'AD', 'AE', 'AF', 'AG', 'AI', 'AL', 'AM', 'AN', 'AO', 'AQ', 'AR', 'AS', 'AT', 'AU', 'AW', 'AZ', 'BA', 'BB', 'BD', 'BE', 'BF', 'BG', 'BH', 'BI', 'BJ', 'BM', 'BN', 'BO', 'BR', 'BS', 'BT', 'BV', 'BW', 'BY', 'BZ', 'CA', 'CC', 'CF', 'CG', 'CH', 'CI', 'CK', 'CL', 'CM', 'CN', 'CO', 'CR', 'CS', 'CU', 'CV', 'CX', 'CY', 'CZ', 'DE', 'DJ', 'DK', 'DM', 'DO', 'DZ', 'EC', 'EE', 'EG', 'EH', 'ES', 'ET', 'FI', 'FJ', 'FK', 'FM', 'FO', 'FR', 'FX', 'GA', 'GB', 'GD', 'GE', 'GH', 'GI', 'GL', 'GP', 'GQ', 'GF', 'GM', 'GN', 'GR', 'GT', 'GU', 'GW', 'GY', 'HK', 'HM', 'HN', 'HR', 'HT', 'HU', 'ID', 'IE', 'IL', 'IN', 'IO', 'IQ', 'IR', 'IS', 'IT', 'JM', 'JO', 'JP', 'KE', 'KG', 'KH', 'KI', 'KM', 'KN', 'KP', 'KR', 'KW', 'KY', 'KZ', 'LA', 'LB', 'LC', 'LI', 'LK', 'LR', 'LS', 'LT', 'LU', 'LV', 'LY', 'MA', 'MC', 'MD', 'MG', 'MH', 'ML', 'MM', 'MN', 'MO', 'MP', 'MQ', 'MR', 'MS', 'MT', 'MU', 'MV', 'MW', 'MX', 'MY', 'MZ', 'NA', 'NC', 'NE', 'NF', 'NG', 'NI', 'NL', 'NO', 'NP', 'NR', 'NT', 'NU', 'NZ', 'OM', 'PA', 'PE', 'PF', 'PG', 'PH', 'PK', 'PL', 'PM', 'PN', 'PT', 'PR', 'PW', 'PY', 'QA', 'RE', 'RO', 'RU', 'RW', 'SA', 'SB', 'SC', 'SD', 'SE', 'SG', 'SH', 'SI', 'SJ', 'SK', 'SL', 'SM', 'SN', 'SO', 'SR', 'ST', 'SU', 'SV', 'SY', 'SZ', 'TC', 'TD', 'TF', 'TG', 'TH', 'TJ', 'TK', 'TM', 'TN', 'TO', 'TP', 'TR', 'TT', 'TV', 'TW', 'TZ', 'UA', 'UG', 'UK', 'UM', 'US', 'UY', 'UZ', 'VA', 'VC', 'VE', 'VG', 'VI', 'VN', 'VU', 'WF', 'WS', 'YE', 'YU', 'ZA', 'ZM', 'ZR', 'ZW','LOCALHOST','ARTS','FIRM','INFO','NOM','REC','STORE','WEB'); $name=&remove_email($name); @ipparts=split(/\./,$name); $found=0; # If it is empty or only one part - skip it if ($#ipparts < 1) { $found=1; print "\t\t\t\tSkipping check ($name)\n" if ($debug == 1); } #$found=1 if (&from_is_email($name)); foreach $domain (@valid_domains) { if ( "\L$ipparts[$#ipparts]" eq "\L$domain" ) { $found=1; last if ($found=1); } } unless ($found) { $mail_is_spam=1; $reason{"FAKEIP"}.="Forged DNS address [$name] "; } } # Turn an email address into a sitename sub remove_email { local ($name)=$_[0]; local ($origname)=$name; $name=~s#(.*)\@##; $name=~s#[\<\>\[\]]##g; $name=~s#;$##g; #print "\t\t\t\t$origname => $name\n" if (($origname ne $name) && ($debug==1)); return ($name); } # Check if the IP addresses is the same as the reverse-DNS for the name sub check_equiv { local ($name) = $_[0]; local ($ip) = $_[1]; return if (&is_private($ip)); $name=&remove_email($name); # If the hostname is an email address -- forget it. $foundname = (gethostbyaddr(pack("C4", split(/\./, $ip)), &AF_INET))[0]; if ($foundname) { print "\t\t\t$ip => $foundname\n" if ($debug ==1); # Only compare the last parts of the domain (i.e. www.thisplace.com => thisplace.com) ($ef_foundname) = $foundname =~ m#([^\.]+\.[^\.]+)$#; ($ef_name) = $name =~ m#([^\.]+\.[^\.]+)$#; unless ($ef_foundname =~ /$ef_name/i) { #$mail_is_spam=1; $reason{"IPNAME_MISMATCH"}.="DNS Mismatch [$ip=>$foundname!=$name]"; print "\t\t\t\t\tSPAM!\n" if ($debug==1); } } else { # Should we make mail be spam if we can't reverse DNS?? # Until I figure out this -- it will just note it in the logs # # Here is a thought if the DNS name was an email address then it is okay # that it can't resolve it, otherwise mark it as spam? print "\t\t\tUnable to resolve $ip\n" if ($debug==1); #$reason{"NO_DNS"}.="Unable to perform DNS on: $ip "; } } # Check if IP address belongs to a private netblock -- which therefore shouldn't # be in a legit email address... sub is_private { local ($ip)=$_[0]; # The reserved private netblocks are: # 10.*.*.* # 172.[16-31].*.* # 192.168.*.* # These are respectively 24, 20, and 16 bits of address space, so if you have # a simple way to turn dotted quads into 32-bit values you can do simple # bitmask logic on them. local (@privatenetworks)=("10\..*","192\.168\..*"); for ($x=16; $x<31; $x++) { #print "adding : 172\.$x\..*\n"; push (@privatenetworks,"172\.$x\..*"); } print "\t\tChecking if $ip => $name\n" if ($debug==1); foreach $pn (@privatenetworks) { if ($ip =~ /$pn/) { print "\t\t\t$ip is in a private network ($pn)\n" if ($debug==1); return 1; } } return 0; } sub check_spam_subject { foreach $cs (keys %spamsubject) { if ($subject =~ /$cs/i) { $mail_is_spam=1; $reason{"subject"}=$cs; } } }