Hi,

I thought I'd post my updated spamdyke-stats script which is based on the work of a fellow spamdyke user and has been updated by me to icnlude more detailed statistics, especially when running with log-level=verbose.

I use the script mainly to optimize spamdyke config over time and see what filters are effective and which can be disabled or which blacklists to prefer and their best order.

The script is used by piping mail logs into it and accepts two options:
--tld <num>       enable breakdown of <num> top tlds matched by filter
--nodetail      disable detailed brakdown gathered from FILTER entries

Here's an example output:
---snip---
# bzcat /var/log/mail.info-20080[89]*|cat - /var/log/mail.info|spamdyke-stats
563413  DENIED_RDNS_MISSING
435588  DENIED_IP_IN_CC_RDNS
201553  DENIED_RBL_MATCH
-- Breakdown --
78.51%  zen.spamhaus.org
13.41%  ix.dnsbl.manitu.net
8.08%   bl.spamcop.net
---------------
134583  DENIED_RDNS_RESOLVE
21609   DENIED_IP_IN_RDNS
-- Breakdown --
60.31%  .net
16.96%  .dynamic.
11.73%  .com
8.03%   .dhcp.
2.59%   .t-dialin.net
0.25%   .broadband.
0.13%   .dialup.
---------------
15125   ALLOWED
10037   TIMEOUT
4836    DENIED_BLACKLIST_NAME
1816    DENIED_OTHER
1435    ERROR
505     DENIED_SENDER_NO_MX
11      DENIED_RHSBL_MATCH
-- Breakdown --
100.00% dsn.rfc-ignorant.org
---------------
1       DENIED_EARLYTALKER

Allowed: 15125
Denied : 1363915
Errors : 11472
Total  : 1390512
% Valid: 1.09%
% Spam : 98.09%
% Error: 0.83%
---snip--

I hope you find this script useful. Do with it whatever you like.

Best Regards,
   Felix Buenemann

#!/usr/bin/perl -w
use diagnostics;
use strict;
use Getopt::Long;
my $tldtop = 0;
my $detailed = 1;
GetOptions ("tld=i"   => \$tldtop,
            "detail!" => \$detailed) or exit 1;

# Usage:  # cat /var/log/qmail/smtpd/current | ./this_file

my %status = ();        # hash of status connections
my %origin = ();        # hash of tld per status code
my %originsum = ();     # hash of tld per status code sums
my %rblstat = ();       # hash of DNSBL lists matched
my %rhsblstat = ();     # hash of RHSBL lists matched
my %rdnsblstat = ();    # hash of patterns in IP_IN_RDNS_BLACKLIST  matched
my ($allow, $deny, $error, $allowpercentage, $errorpercentage, $spampercentage, 
$sum, $rblsum, $rhsblsum, $rdnsblsum);

$allow = 0;

while(<>){
        my $line = $_;
        if( m/spamdyke/ ){
                my ($a, $b, $c, $d) = split(/:/ , $line);
                my ($e, $sdstatus) = split(/ /, $d);
                #print "$b\n";
                #next if $sdstatus eq "CHKUSER";
                if( $sdstatus =~ m/FILTER_RBL_MATCH/ ){
                        $line =~ m/rbl: (\S+)/;
                        $rblstat{$1}++;
                        $rblsum++;
                }
                elsif( $sdstatus =~ m/FILTER_RHSBL_MATCH/ ){
                        $line =~ m/rhsbl: (\S+)/;
                        $rhsblstat{$1}++;
                        $rhsblsum++;
                }
                elsif( $sdstatus =~ m/FILTER_IP_IN_RDNS_BLACKLIST/ ){
                        $line =~ m/keyword: (\S+)/;
                        $rdnsblstat{$1}++;
                        $rdnsblsum++;
                }
                next if $sdstatus =~ m/CHKUSER|(FILTER|DEBUG|EXCESSIVE)_.*/;
                $status{$sdstatus}++;
                if($tldtop and $line =~ m/ origin_rdns: ([^ ]+)/) {
                        my $rdns = $1;
                        $originsum{$sdstatus}++;
                        if($rdns =~ m/^\(unknown\)$/){
                                #$origin{$sdstatus}{'unknown'}++;
                                next;
                        } elsif($rdns =~ m/\.(com|net)$/){
                                $origin{$sdstatus}{$1}++;
                        } elsif($rdns =~ m/\.([a-z]{2,2}\.[a-z]{2,2})$/){ # 
co.uk
                                $origin{$sdstatus}{$1}++;
                        } elsif($rdns =~ m/\.([a-z]{2,})$/){ # de, ru, ...
                                $origin{$sdstatus}{$1}++
                        } else {
                                #$origin{$sdstatus}{'unknown'}++;
                                next;
                        }
                }
                
        }

}
foreach my $stat (sort keys %status){
        if( $stat =~ m/ALLOWED/ ){
                $allow = $status{$stat};
        }
        elsif( $stat =~ m/TIMEOUT|ERROR/ ){
                $error += $status{$stat};
        }
        else{
                $deny += $status{$stat};
        }
}

$spampercentage = sprintf("%2.2f", ($deny/($allow+$error+$deny)*100) );
$errorpercentage = sprintf("%2.2f", ($error/($allow+$error+$deny)*100) );
$allowpercentage = sprintf("%2.2f", ($allow/($allow+$error+$deny)*100) );

foreach my $key (sort { $status{$b} <=> $status{$a} || $a cmp $b; } keys 
%status){
        print "$status{$key}\t$key\n";
        if($detailed and  $key eq "DENIED_RBL_MATCH" ){
                print "-- Breakdown --\n";
                foreach my $key (sort { $rblstat{$b} <=> $rblstat{$a} || $a cmp 
$b; } keys %rblstat){
                        printf "%2.2f%%\t$key\n", ($rblstat{$key}/$rblsum*100);
                }
                print "---------------\n";
        }
        elsif($detailed and $key eq "DENIED_RHSBL_MATCH" ){
                print "-- Breakdown --\n";
                foreach my $key (sort { $rhsblstat{$b} <=> $rblstat{$a} || $a 
cmp $b; } keys %rhsblstat){
                        printf "%2.2f%%\t$key\n", 
($rhsblstat{$key}/$rhsblsum*100);
                }
                print "---------------\n";
        }
        elsif($detailed and $key eq "DENIED_IP_IN_RDNS" ){
                print "-- Breakdown --\n";
                foreach my $key (sort { $rdnsblstat{$b} <=> $rdnsblstat{$a} || 
$a cmp $b; } keys %rdnsblstat){
                        printf "%2.2f%%\t$key\n", 
($rdnsblstat{$key}/$rdnsblsum*100);
                }
                print "---------------\n";
        }
        if($tldtop && $origin{$key}) {
                my $top = $tldtop;
                print "-- Top $top TLD --\n";
                my $tldsum = 0;
                my $lastsum = 0;
                my @tldgroup = ();
                my %neworigin = ();
                foreach my $tld (sort { $origin{$key}{$a} <=> $origin{$key}{$b} 
} keys %{$origin{$key}}){
                        if(($origin{$key}{$tld}/$originsum{$key}*100) == 
$lastsum) {
                                #print "push tldgroup, $tld 
($origin{$key}{$tld})\n";
                                push(@tldgroup, $tld);
                        } else {
                                if(scalar @tldgroup) {
                                        $neworigin{join(', ', @tldgroup)} = 
$lastsum;
                                        #print "tldgroup=". join(', ', 
@tldgroup) ." ($lastsum)\n";
                                        @tldgroup = ();
                                }
                                #print "push tldgroup, $tld 
($origin{$key}{$tld})\n";
                                push(@tldgroup, $tld);
                        }
                        $lastsum = $origin{$key}{$tld}/$originsum{$key}*100;
                        $tldsum += $origin{$key}{$tld};
                }
                if(scalar @tldgroup) {
                        $neworigin{join(', ', @tldgroup)} = $lastsum * length 
@tldgroup;
                        #print "tldgroup=". join(', ', @tldgroup) ." 
($lastsum)\n";
                }
                foreach my $tld (sort { $neworigin{$b} <=> $neworigin{$a} } 
keys %neworigin){
                        printf "%2.2f%%\t$tld\n", $neworigin{$tld};
                        last unless --$top;
                }
                #printf "%2.2f%%\t(unknown/illegal)\n", 
(($originsum{$key}-$tldsum)/$originsum{$key}*100) if $tldsum && 
($originsum{$key}-$tldsum);
                print "------------\n";
        }
        
}
$sum = ($deny + $error + $allow);
print "\n";
print "Allowed: $allow \n";
print "Denied : $deny \n";
print "Errors : $error \n";
print "Total  : $sum \n";
print "% Valid: $allowpercentage% \n";
print "% Spam : $spampercentage% \n";
print "% Error: $errorpercentage% \n";

_______________________________________________
spamdyke-users mailing list
spamdyke-users@spamdyke.org
http://www.spamdyke.org/mailman/listinfo/spamdyke-users

Reply via email to