Hi,
I thought I'd post my updated spamdyke-stats script which is based on
the work of a fellow spamdyke user and has been updated by me to icnlude
more detailed statistics, especially when running with log-level=verbose.
I use the script mainly to optimize spamdyke config over time and see
what filters are effective and which can be disabled or which blacklists
to prefer and their best order.
The script is used by piping mail logs into it and accepts two options:
--tld <num> enable breakdown of <num> top tlds matched by filter
--nodetail disable detailed brakdown gathered from FILTER entries
Here's an example output:
---snip---
# bzcat /var/log/mail.info-20080[89]*|cat -
/var/log/mail.info|spamdyke-stats
563413 DENIED_RDNS_MISSING
435588 DENIED_IP_IN_CC_RDNS
201553 DENIED_RBL_MATCH
-- Breakdown --
78.51% zen.spamhaus.org
13.41% ix.dnsbl.manitu.net
8.08% bl.spamcop.net
---------------
134583 DENIED_RDNS_RESOLVE
21609 DENIED_IP_IN_RDNS
-- Breakdown --
60.31% .net
16.96% .dynamic.
11.73% .com
8.03% .dhcp.
2.59% .t-dialin.net
0.25% .broadband.
0.13% .dialup.
---------------
15125 ALLOWED
10037 TIMEOUT
4836 DENIED_BLACKLIST_NAME
1816 DENIED_OTHER
1435 ERROR
505 DENIED_SENDER_NO_MX
11 DENIED_RHSBL_MATCH
-- Breakdown --
100.00% dsn.rfc-ignorant.org
---------------
1 DENIED_EARLYTALKER
Allowed: 15125
Denied : 1363915
Errors : 11472
Total : 1390512
% Valid: 1.09%
% Spam : 98.09%
% Error: 0.83%
---snip--
I hope you find this script useful. Do with it whatever you like.
Best Regards,
Felix Buenemann
#!/usr/bin/perl -w
use diagnostics;
use strict;
use Getopt::Long;
my $tldtop = 0;
my $detailed = 1;
GetOptions ("tld=i" => \$tldtop,
"detail!" => \$detailed) or exit 1;
# Usage: # cat /var/log/qmail/smtpd/current | ./this_file
my %status = (); # hash of status connections
my %origin = (); # hash of tld per status code
my %originsum = (); # hash of tld per status code sums
my %rblstat = (); # hash of DNSBL lists matched
my %rhsblstat = (); # hash of RHSBL lists matched
my %rdnsblstat = (); # hash of patterns in IP_IN_RDNS_BLACKLIST matched
my ($allow, $deny, $error, $allowpercentage, $errorpercentage, $spampercentage,
$sum, $rblsum, $rhsblsum, $rdnsblsum);
$allow = 0;
while(<>){
my $line = $_;
if( m/spamdyke/ ){
my ($a, $b, $c, $d) = split(/:/ , $line);
my ($e, $sdstatus) = split(/ /, $d);
#print "$b\n";
#next if $sdstatus eq "CHKUSER";
if( $sdstatus =~ m/FILTER_RBL_MATCH/ ){
$line =~ m/rbl: (\S+)/;
$rblstat{$1}++;
$rblsum++;
}
elsif( $sdstatus =~ m/FILTER_RHSBL_MATCH/ ){
$line =~ m/rhsbl: (\S+)/;
$rhsblstat{$1}++;
$rhsblsum++;
}
elsif( $sdstatus =~ m/FILTER_IP_IN_RDNS_BLACKLIST/ ){
$line =~ m/keyword: (\S+)/;
$rdnsblstat{$1}++;
$rdnsblsum++;
}
next if $sdstatus =~ m/CHKUSER|(FILTER|DEBUG|EXCESSIVE)_.*/;
$status{$sdstatus}++;
if($tldtop and $line =~ m/ origin_rdns: ([^ ]+)/) {
my $rdns = $1;
$originsum{$sdstatus}++;
if($rdns =~ m/^\(unknown\)$/){
#$origin{$sdstatus}{'unknown'}++;
next;
} elsif($rdns =~ m/\.(com|net)$/){
$origin{$sdstatus}{$1}++;
} elsif($rdns =~ m/\.([a-z]{2,2}\.[a-z]{2,2})$/){ #
co.uk
$origin{$sdstatus}{$1}++;
} elsif($rdns =~ m/\.([a-z]{2,})$/){ # de, ru, ...
$origin{$sdstatus}{$1}++
} else {
#$origin{$sdstatus}{'unknown'}++;
next;
}
}
}
}
foreach my $stat (sort keys %status){
if( $stat =~ m/ALLOWED/ ){
$allow = $status{$stat};
}
elsif( $stat =~ m/TIMEOUT|ERROR/ ){
$error += $status{$stat};
}
else{
$deny += $status{$stat};
}
}
$spampercentage = sprintf("%2.2f", ($deny/($allow+$error+$deny)*100) );
$errorpercentage = sprintf("%2.2f", ($error/($allow+$error+$deny)*100) );
$allowpercentage = sprintf("%2.2f", ($allow/($allow+$error+$deny)*100) );
foreach my $key (sort { $status{$b} <=> $status{$a} || $a cmp $b; } keys
%status){
print "$status{$key}\t$key\n";
if($detailed and $key eq "DENIED_RBL_MATCH" ){
print "-- Breakdown --\n";
foreach my $key (sort { $rblstat{$b} <=> $rblstat{$a} || $a cmp
$b; } keys %rblstat){
printf "%2.2f%%\t$key\n", ($rblstat{$key}/$rblsum*100);
}
print "---------------\n";
}
elsif($detailed and $key eq "DENIED_RHSBL_MATCH" ){
print "-- Breakdown --\n";
foreach my $key (sort { $rhsblstat{$b} <=> $rblstat{$a} || $a
cmp $b; } keys %rhsblstat){
printf "%2.2f%%\t$key\n",
($rhsblstat{$key}/$rhsblsum*100);
}
print "---------------\n";
}
elsif($detailed and $key eq "DENIED_IP_IN_RDNS" ){
print "-- Breakdown --\n";
foreach my $key (sort { $rdnsblstat{$b} <=> $rdnsblstat{$a} ||
$a cmp $b; } keys %rdnsblstat){
printf "%2.2f%%\t$key\n",
($rdnsblstat{$key}/$rdnsblsum*100);
}
print "---------------\n";
}
if($tldtop && $origin{$key}) {
my $top = $tldtop;
print "-- Top $top TLD --\n";
my $tldsum = 0;
my $lastsum = 0;
my @tldgroup = ();
my %neworigin = ();
foreach my $tld (sort { $origin{$key}{$a} <=> $origin{$key}{$b}
} keys %{$origin{$key}}){
if(($origin{$key}{$tld}/$originsum{$key}*100) ==
$lastsum) {
#print "push tldgroup, $tld
($origin{$key}{$tld})\n";
push(@tldgroup, $tld);
} else {
if(scalar @tldgroup) {
$neworigin{join(', ', @tldgroup)} =
$lastsum;
#print "tldgroup=". join(', ',
@tldgroup) ." ($lastsum)\n";
@tldgroup = ();
}
#print "push tldgroup, $tld
($origin{$key}{$tld})\n";
push(@tldgroup, $tld);
}
$lastsum = $origin{$key}{$tld}/$originsum{$key}*100;
$tldsum += $origin{$key}{$tld};
}
if(scalar @tldgroup) {
$neworigin{join(', ', @tldgroup)} = $lastsum * length
@tldgroup;
#print "tldgroup=". join(', ', @tldgroup) ."
($lastsum)\n";
}
foreach my $tld (sort { $neworigin{$b} <=> $neworigin{$a} }
keys %neworigin){
printf "%2.2f%%\t$tld\n", $neworigin{$tld};
last unless --$top;
}
#printf "%2.2f%%\t(unknown/illegal)\n",
(($originsum{$key}-$tldsum)/$originsum{$key}*100) if $tldsum &&
($originsum{$key}-$tldsum);
print "------------\n";
}
}
$sum = ($deny + $error + $allow);
print "\n";
print "Allowed: $allow \n";
print "Denied : $deny \n";
print "Errors : $error \n";
print "Total : $sum \n";
print "% Valid: $allowpercentage% \n";
print "% Spam : $spampercentage% \n";
print "% Error: $errorpercentage% \n";
_______________________________________________
spamdyke-users mailing list
spamdyke-users@spamdyke.org
http://www.spamdyke.org/mailman/listinfo/spamdyke-users