#!/usr/bin/perl

sub DisplayUsage
{
<<USAGE

usage:
	# display all the rules and some statistics
	saf.pl

	# display information on the specified rules and some statistics
	saf.pl rule_name rule_name ...

saf.pl parses spamassassin oconfiguration files and reports some data on the rules.

rule 'URGENT_BIZ' :
|- body = /usr/share/spamassassin/20_phrases.cf:418
|- description = Contains urgent matter
|- score = 2.0
`- score_definition = /usr/share/spamassassin/50_scores.cf:850

saf will also display all the overrides it found in the warnings sections.
find(
{
preprocess => sub 
	{
	print DumpTree(\@_) ;
	push @cf_files, grep{/\.cf$/} @_ ;
	}
, wanted => sub{}
}, "/usr/share/spamassassin", "/etc/mail/spamassassin");

Finally, saf will display a list of the rules with the number of rules in the file
as well as the age of the file in days.

(C) Nadim Khemir - 2004
This is sofware is Public Domain.

USAGE
}

use strict;
use File::Find ;
use Data::TreeDumper ;

#----------------------------------------------------------------------------------------------------

my @display = ('score') ; # any of ('score',  'description', 'header', 'body', 'uri') ;
my @warnings ;

my @cf_files ;
find(sub {push @cf_files, $File::Find::name if(/\.cf$/)}, "/usr/share/spamassassin", "/etc/mail/spamassassin/");

my $cf_file_size_in_lines ;

my %rules ;
my %rules_per_cf_file ;
my ($lowest_score, $highest_score) ;
my $rules_without_score ;
my $rules_with_score_zero ;

#--------------------	
# parse the cf files
#--------------------	
for my $cf_file (@cf_files)
	{
	open CF_FILE, "<", $cf_file or die "Couldn't open '$cf_file': $!\n" ;
	$cf_file_size_in_lines += $. ;
	$. = 0 ;

	while(<CF_FILE>)
		{
		my @fields = (
			  'header',   'header',           'header_definition'
			, 'body',     'body',             'body_definition'
			, 'describe', 'describe_address', 'describe'
			, 'score',    'score_address',    'score'
			, 'uri',      'uri',              'uri_definition'
			) ;
			
		while( my ($regex, $address_field, $field) = splice (@fields, 0, 3))
			{
			if(/^\s*$regex\s+([_a-zA-Z0-9]+)\s+(.*)/)
				{
				
				if(exists $rules{$1}{$field})
					{
					my $warning = "Redefinition of '$1:$field' at '$cf_file:$.' was defined at $rules{$1}{$address_field}\n" ;
					push @warnings, $warning ;
					push @{$rules{$1}{warnings}}, $warning ;
					}
				
				$rules{$1}{$field} = "$2" ;
				$rules{$1}{$address_field} = "$cf_file:$." ;
				$rules_per_cf_file{$cf_file}{$1}++ ;
				}
			}
			
		# check if we miss any field		
 		#~ if(/^\s[#\s]+\s+([_a-zA-Z0-9]+)\s+(.*)/)
 			#~ {
 			#~ warn $_ ;
 			#~ $rules{$2}{$1} = $3 ;
 			#~ $rules_per_cf_file{$cf_file}{$2}++ ;
 			#~ }
		}
	}

#--------------------	
# display the rules
#--------------------	
if(@ARGV)
	{
	for my $rule (@ARGV)
		{
		if(exists $rules{$rule})
			{
			print DumpTree($rules{$rule}, "rule '$rule' :", DISPLAY_ADDRESS => 0) ;
			}
		else
			{
			print "Couldn't find rule '$rule'.\n" ;
			}
		}
	}
else
	{
	print DumpTree
			(
			\%rules
			, 'Spamassassin rules [' . keys(%rules) . ']'
			, FILTER => sub
							{
							my ($s, $level) = @_;
							if($level == 1)
								{
								# gather stats on the fly
								$rules_without_score++ unless defined $s->{score} ;
								$rules_with_score_zero++ if(defined $s->{score} && !$s->{score}) ;
								$highest_score = $s->{score} if(defined $s->{score} && $s->{score} > $highest_score) ;
								$lowest_score = $s->{score} if(defined $s->{score} && $s->{score} < $lowest_score) ;
								
								# allow only selected fields o be displayed
								my @display_warnings ;
								push @display_warnings, 'warnings' if exists ($s->{warnings}) ;
								
								return('HASH', $s, @display, @display_warnings)
								}
							
							return(Data::TreeDumper::DefaultNodesToDisplay($s)) ;
							}
			, DISPLAY_ADDRESS => 0
			) ;

	print "\n" ;
	print keys(%rules) . " rules in " . @cf_files . " files [$cf_file_size_in_lines lines]:\n" ;
	print "lowest score: $lowest_score.\n" ;
	print "Highest score: $highest_score.\n" ;
	print "$rules_without_score rules without score.\n" ;
	print "$rules_with_score_zero rules with score '0' (disabled!).\n" ;
	}

#--------------------	
if(@warnings )
#--------------------	
	{
	print "\nWarnings:\n" ;
	print @warnings ;
	print "\n" ;
	}
#--------------------	
# file statistics
#--------------------	
print keys(%rules) . " rules in " . @cf_files . " files [$cf_file_size_in_lines lines]:\n" ;

for my $cf_file (@cf_files)
	{
	my $number_of_rules = keys %{$rules_per_cf_file{$cf_file}} ;

	my $rules .= 'rule' ;
	$rules .= 's' if(keys %{$rules_per_cf_file{$cf_file}} > 1) ;

	my $age = int(-M $cf_file) ;
	if($age > 1)
		{
		$age = "[$age days]" ;
		}
	else
		{
		$age = '[1 day]' ;
		}

	if($number_of_rules)
		{
		printf("%-57s %4d %-5s $age\n", $cf_file, $number_of_rules, $rules) ;
		}
	else
		{
		printf("%-59s no rules $age\n", $cf_file) ;
		}
	}

