Hi Erik,

Am 19.10.2008 3:13 Uhr, Eric Shubert schrieb:
> Felix Buenemann wrote:
>> Hi Erik,
>>
>> Am 18.10.2008 20:39 Uhr, Eric Shubert schrieb:
>>> Sergio Minini wrote:
>>>> Erik, try:
>>>> # cat /var/log/maillog | ./spamdyke_stats.pl
>> [...]
>>>> % Valid: 29.11%
>>>> % Spam : 69.42%
>>>> % Error: 1.47%
>>>>
>>>> Good luck/
>>>> Sergio
>>> Thanks, but that doesn't work for me. My spamdyke log messages are going 
>>> to the qmail smtp log (log-target=0|stderr). Perhaps that's where the 
>>> problem lies, as the formatting would be slightly different.
>>>
>> Yes, the script currently expects syslog syntax. If you can provide a
>> few sample lines from the log and I can modify the script to work with it.
>>
>> -- Felix
> 
> Thanks, Felix. Here's a sample rejection (each line begins with @40000):
> @4000000048fa5df51149c60c tcpserver: status: 1/100
> @4000000048fa5df51149d5ac tcpserver: pid 22865 from 209.133.101.250
> @4000000048fa5df51149e164 tcpserver: ok 22865 
> doris.shubes:192.168.171.11:25 :209.133.101.250::58673
> @4000000048fa5dfc34b1ebec DENIED_SENDER_NO_MX from: 
> [EMAIL PROTECTED] 
> to: [EMAIL PROTECTED] origin_ip: 209.1
> 33.101.250 origin_rdns: broadcaster.eonline.com auth: (unknown)
> @4000000048fa5dfd01593124 tcpserver: end 22865 status 0
> @4000000048fa5dfd015a7d2c tcpserver: status: 0/100
> 
> In case you don't know, the first (@4000...) field is a date/time stamp, 
> and can be converted to local time with the tai64nlocal program. I don't 
> know if that'll be necessary or not.
> 
> Thanks again. I'm eager to see the results.

Please test the attached version. You have to pass --nosyslog parameter
to the script. Also please check which of the $linematch variants
beginning at line 33 is the fastest (run time cat /your/log |
spamdyke-stats.pl --nosyslog to find out) and report it back to me.

Btw. the new version is about 25% faster than the last one, due to
optimizations of the regular expression matching, so updating might be
useful to others aswell.

-- Felix
#!/usr/bin/perl -w
# build 2008101905
use diagnostics;
use strict;
use Getopt::Long;
my $tldtop = 0;
my $detailed = 1;
my $syslog = 1;
GetOptions (
        "tld=i"   => \$tldtop,
        "detail!" => \$detailed,
        "syslog!" => \$syslog
) or exit 1;

# Usage:  # cat /var/log/qmail/smtpd/current | ./this_file

my %status = ();        # hash of status connections
my %origin = ();        # hash of tld per status code
my %originsum = ();     # hash of tld per status code sums
my %rblstat = ();       # hash of DNSBL lists matched
my %rhsblstat = ();     # hash of RHSBL lists matched
my %rdnsblstat = ();    # hash of patterns in IP_IN_RDNS_BLACKLIST  matched
my ($allow, $deny, $error, $allowpercentage, $errorpercentage, $spampercentage, 
$sum, $rblsum, $rhsblsum, $rdnsblsum);

$allow = 0;
$deny = 0;
$error = 0;

my $linematch = "";
if($syslog) {
        $linematch = 'spamdyke\[\d+\]:';
} else {
        $linematch = '[EMAIL PROTECTED],24}';
        #$linematch = '[EMAIL PROTECTED],24}';
        #$linematch = '[EMAIL PROTECTED] ]+';
}

while(<>){
#        if( m/(spamdyke\[\d+\]:|[EMAIL PROTECTED],24}) 
(ALLOWED|ERROR|TIMEOUT|((DENIED|FILTER)_[^ ]+))(.*)/ ){
        if( m/$linematch (.*)/ ){
                my $line = $1;
                if( $line =~ m/^(ALLOWED|ERROR|TIMEOUT|((DENIED|FILTER)_[^ 
]+))/ ) {
                        my $sdstatus = $1;
                        if( $sdstatus =~ m/FILTER_RBL_MATCH/ ){
                                $line =~ m/rbl: (\S+)/;
                                $rblstat{$1}++;
                                $rblsum++;
                        }
                        elsif( $sdstatus =~ m/FILTER_RHSBL_MATCH/ ){
                                $line =~ m/rhsbl: (\S+)/;
                                $rhsblstat{$1}++;
                                $rhsblsum++;
                        }
                        elsif( $sdstatus =~ m/FILTER_IP_IN_RDNS_BLACKLIST/ ){
                                $line =~ m/keyword: (\S+)/;
                                $rdnsblstat{$1}++;
                                $rdnsblsum++;
                        }
                        next if $sdstatus =~ m/CHKUSER|FILTER_.*/;
                        $status{$sdstatus}++;
                        if($tldtop and $line =~ m/ origin_rdns: ([^ ]+)/) {
                                my $rdns = $1;
                                $originsum{$sdstatus}++;
                                if($rdns =~ m/^\(unknown\)$/){
                                        #$origin{$sdstatus}{'unknown'}++;
                                        next;
                                } elsif($rdns =~ m/\.(com|net)$/){
                                        $origin{$sdstatus}{$1}++;
                                } elsif($rdns =~ 
m/\.([a-z]{2,2}\.[a-z]{2,2})$/){ # co.uk
                                        $origin{$sdstatus}{$1}++;
                                } elsif($rdns =~ m/\.([a-z]{2,})$/){ # de, ru, 
...
                                        $origin{$sdstatus}{$1}++
                                } else {
                                        #$origin{$sdstatus}{'unknown'}++;
                                        next;
                                }
                        }
                }
                
        }

}
foreach my $stat (sort keys %status){
        if( $stat =~ m/ALLOWED/ ){
                $allow = $status{$stat};
        }
        elsif( $stat =~ m/TIMEOUT|ERROR/ ){
                $error += $status{$stat};
        }
        else{
                $deny += $status{$stat};
        }
}

my $aed_sum = $allow+$error+$deny;
if($aed_sum > 0) {
        $spampercentage = sprintf("%2.2f", ($deny/($aed_sum)*100) );
        $errorpercentage = sprintf("%2.2f", ($error/($aed_sum)*100) );
        $allowpercentage = sprintf("%2.2f", ($allow/($aed_sum)*100) );
} else {
        $spampercentage = $errorpercentage = $allowpercentage = 
sprintf("%2.2f", 0);
}

foreach my $key (sort { $status{$b} <=> $status{$a} || $a cmp $b; } keys 
%status){
        print "$status{$key}\t$key\n";
        if($detailed and  $key eq "DENIED_RBL_MATCH" ){
                print "-- Breakdown --\n";
                foreach my $key (sort { $rblstat{$b} <=> $rblstat{$a} || $a cmp 
$b; } keys %rblstat){
                        printf "%2.2f%%\t$key\n", ($rblstat{$key}/$rblsum*100);
                }
                print "---------------\n";
        }
        elsif($detailed and $key eq "DENIED_RHSBL_MATCH" ){
                print "-- Breakdown --\n";
                foreach my $key (sort { $rhsblstat{$b} <=> $rblstat{$a} || $a 
cmp $b; } keys %rhsblstat){
                        printf "%2.2f%%\t$key\n", 
($rhsblstat{$key}/$rhsblsum*100);
                }
                print "---------------\n";
        }
        elsif($detailed and $key eq "DENIED_IP_IN_RDNS" ){
                print "-- Breakdown --\n";
                foreach my $key (sort { $rdnsblstat{$b} <=> $rdnsblstat{$a} || 
$a cmp $b; } keys %rdnsblstat){
                        printf "%2.2f%%\t$key\n", 
($rdnsblstat{$key}/$rdnsblsum*100);
                }
                print "---------------\n";
        }
        if($tldtop && $origin{$key}) {
                my $top = $tldtop;
                print "-- Top $top TLD --\n";
                my $tldsum = 0;
                my $lastsum = 0;
                my @tldgroup = ();
                my %neworigin = ();
                foreach my $tld (sort { $origin{$key}{$a} <=> $origin{$key}{$b} 
} keys %{$origin{$key}}){
                        if(($origin{$key}{$tld}/$originsum{$key}*100) == 
$lastsum) {
                                #print "push tldgroup, $tld 
($origin{$key}{$tld})\n";
                                push(@tldgroup, $tld);
                        } else {
                                if(scalar @tldgroup) {
                                        $neworigin{join(', ', @tldgroup)} = 
$lastsum;
                                        #print "tldgroup=". join(', ', 
@tldgroup) ." ($lastsum)\n";
                                        @tldgroup = ();
                                }
                                #print "push tldgroup, $tld 
($origin{$key}{$tld})\n";
                                push(@tldgroup, $tld);
                        }
                        $lastsum = $origin{$key}{$tld}/$originsum{$key}*100;
                        $tldsum += $origin{$key}{$tld};
                }
                if(scalar @tldgroup) {
                        $neworigin{join(', ', @tldgroup)} = $lastsum * length 
@tldgroup;
                        #print "tldgroup=". join(', ', @tldgroup) ." 
($lastsum)\n";
                }
                foreach my $tld (sort { $neworigin{$b} <=> $neworigin{$a} } 
keys %neworigin){
                        printf "%2.2f%%\t$tld\n", $neworigin{$tld};
                        last unless --$top;
                }
                #printf "%2.2f%%\t(unknown/illegal)\n", 
(($originsum{$key}-$tldsum)/$originsum{$key}*100) if $tldsum && 
($originsum{$key}-$tldsum);
                print "------------\n";
        }
        
}
$sum = ($deny + $error + $allow);
print "\n";
print "Allowed: $allow \n";
print "Denied : $deny \n";
print "Errors : $error \n";
print "Total  : $sum \n";
print "% Valid: $allowpercentage% \n";
print "% Spam : $spampercentage% \n";
print "% Error: $errorpercentage% \n";

_______________________________________________
spamdyke-users mailing list
spamdyke-users@spamdyke.org
http://www.spamdyke.org/mailman/listinfo/spamdyke-users

Reply via email to