Re: [dspam-users] retraining DSPAM on server with cyrus-imap

Peter Santiago Mon, 12 Nov 2007 10:13:28 -0800

Chris Ryland wrote:

I'm thinking of switching from a manual retraining scheme (forwardingjunk folder messages under Mac OS X using an Applescript to pick outthe DSPAM signature and forward a message to spam@<myhost>.com) to aserver-based auto-retraining scheme that would take all mail in allusers's Junk folders (again, these are mostly Mail.app and Entourageusers logging into my Mac OS X Server 10.4 system with cyrus as theIMAP server).
I can see how to do the message forwarding on the server (once themessage is old enough for the user to have reviewed his spam folder)but don't know enough about cyrus to know how to delete a message fromits database once it's been forwarded to the spam server.
Does anyone have experience with this kind of setup?

Thanks &
Cheers!
--Chris Ryland / Em Software, Inc. / www.emsoftware.com

Hello, this might help you with what you want to happen.... I sort ofgot it from Columbia university (search for cal-dspam).... just createa Spam and Ham IMAP folder for each user, run the attached script as aserver side job... I modified the script a bit to work for virtualdomain users (usernames like [EMAIL PROTECTED]). You might be able toimprove on it... ^_^


--
Peter Santiago         [EMAIL PROTECTED]
My website:            www.psinergybbs.com
My spamtrap address:   [EMAIL PROTECTED]

#!/usr/bin/perl -w 
 
use Mail::IMAPClient; 
#use IPC::Open3; 
use IO::Socket::UNIX; 
use IO::Socket; 
use Socket; 
use Data::Dumper; 
 
## cal-dspam-process: pull messages from a cyrus imap server and pass 
## them off to dspam for retraining. 
 
#  suggested invocation (on sedna, at any rate): 
## k5start -U -f ~/dspam.keytab -S imap -I sedna.astro.columbia.edu 
cal-dspam-process retrain 
 
my $action = $ARGV[0]; 
$action =~ m/^(report|retrain|sweep)$/  or die "Must choose a legitimate action 
(report, retrain, or sweep)";  
 
my $debug = (defined($ARGV[1]) && ('--verbose' eq $ARGV[1])); 
 
## read, write, error file handles: 
my ($wfh,$rfh,$efh); 
 
my ($server,$socket) = ('localhost', '/var/lib/imap/socket/imap'); 
 
#my $imt = 'imtest -x '.$socket.' '.$server; 
 
#my $pid = open3($wfh,$rfh,$efh,$imt); 
 
#warn "$imt opened process $pid\n" if $debug; 
 
#my $line = ''; 
 
#until ($line =~ /^Security strength factor:/i ) { 
#        defined($line = <$rfh>) or die "EOF\n"; 
#        print STDERR "Prolog: $line" if $debug; 
#} 
 
## why should we need to sleep?  in case the socket is not set up yet. 
## FIXME: better than a default sleep would be a poll on the socket as it 
exists. 
#sleep 1; 
#my $sock = IO::Socket::UNIX->new("$socket") 
#  or die "No socket: $!\n"; 
 
#print STDERR "<<<END OF PROLOG>>>\n" if $debug; 
my $imap = Mail::IMAPClient->new (Server=>'localhost',User => 'dspam', Password 
=> 'novirus');
$imap->Prewritemethod(\&Mail::IMAPClient::Strip_cr); 
$imap->Debug($debug); 
$imap->Debug_fh(\*STDERR); 
$imap->State($imap->Connected); 
#$imap->Socket($socket); 
 
 
sub folderscan { 
  my $imap = shift(); 
  my $foldername = shift(); 
  my $dspamclass = shift(); 
  #my @folders = $imap->list("",'user/*/'.$foldername.'@*'); 
  my @folders = $imap->list();
  #foreach $folder (@folders) {warn "Scanning: . $folder"};
  foreach $folder (@folders) { 
    if ($folder =~ s/^\* LIST .* "(user\/.*\/[EMAIL 
PROTECTED])"[[:space:]]*$/$1/ ) { 
      warn "Getting $folder"; 
      my $user = $folder; 
#      $user =~ s/^user\/(.*)\/[EMAIL PROTECTED]"$/$1/; 
      $user =~ s/\/$foldername//;
      $user =~ s/user\///;
      warn "User: $user";
      my $msgcount = $imap->message_count($folder);
      
      $imap->setacl($folder,'dspam','write')
                       or die "Could not set acl: [EMAIL PROTECTED]";
      
      if (defined($msgcount) && ($msgcount > 0)) { 
        warn "Checking on $folder (for user $user) with $msgcount messages\n"; 
        $imap->Select($folder); 
        my $fieldspec = "BODY[HEADER.FIELDS (X-DSPAM-Signature)]"; 
        my $hash = $imap->fetch_hash($fieldspec); 
        my @dealtwith = (); 
        my @nosigs = (); 
        my @failures = (); 
 
        while (($msgid, $data) = each (%$hash)) { 
          my $sig = $data->{$fieldspec}; 
          $sig =~ s/^X-DSPAM-Signature: //; 
          warn "$msgid: signature is $sig\n" if $debug; 
          if ($sig ne '') { 
              ## if the signature is good, invoke an external dspam process to 
retrain the learner 
            my $dspaminvocation = 'dspamc --client --user '.$user.' 
--class='.$dspamclass.' --source=error --signature='.$sig; 
            if ($action eq 'retrain') { 
              my $dspamval = system($dspaminvocation); 
              warn "$dspaminvocation returned $dspamval\n" if ($dspamval); 
              ## and mark it for transfer to the processed messages if 
              ## we were successful in invoking dspam: 
              if ($dspamval == 0) { 
                push(@dealtwith, $msgid); 
              } else { 
                push(@failures, $msgid); 
              } 
            } else { 
              warn "Would invoke \"$dspaminvocation\"\n" if $debug; 
            } 
          } else { 
            push(@nosigs, $msgid); 
            if ($action eq 'report') { 
              ## otherwise, what should we do?  just feed it as a corpus 
              ## message?  How should we extract the full text? 
              warn "Dealing with message $msgid which does not have a DSPAM 
signature:\n" if $debug; 
              warn $imap->message_string($msgid) if $debug; 
            } 
          } 
        } 
        # if we've dealt with at least one item: 
        foreach $dealtwith (@dealtwith){print "$dealtwith \n"}
        if (scalar @dealtwith) { 
          my $newf = 'user/dspam/processed'; 
          $imap->exists($newf) or $imap->create($newf) or warn "Could not 
create $newf\n"; 
          $newf .= '/'.$user;
          #print "+User: $newf\n";
          $newf =~ s/\@/\./g;
          $imap->exists($newf) or $imap->create($newf) or warn "Could not 
create $newf\n"; 
          $newf .= '/'.$foldername;           
          #print "+Folder: $newf\n";
          $imap->exists($newf) or $imap->create($newf) or warn "Could not 
create $newf\n"; 
         ## it's either this, or delete each message.  this strategy is less 
vicious: 
          my $moveval = $imap->move($newf, [EMAIL PROTECTED]); 
          warn "move returned $moveval\n"; 
        } 
        warn "TROUBLE: ".(scalar @nosigs)." messages in $folder without a dspam 
signature\n" if (scalar @nosigs); 
        # close() does an implicit expunge 
        $imap->close() or warn "failed to close folder $folder\n"; 

        ## print Data::Dumper->Dumpxs([$hash],['$hash']) if $debug; 
      } 
    } 
  } 
} 
 
folderscan($imap, 'HAM', 'innocent'); 
folderscan($imap, 'SPAM', 'spam'); 
 
$imap->logout; 
 
print STDERR "<<<END>>>\n" if $debug; 
 
exit;

Re: [dspam-users] retraining DSPAM on server with cyrus-imap

Reply via email to