Chris Ryland wrote:
I'm thinking of switching from a manual retraining scheme (forwarding
junk folder messages under Mac OS X using an Applescript to pick out
the DSPAM signature and forward a message to spam@<myhost>.com) to a
server-based auto-retraining scheme that would take all mail in all
users's Junk folders (again, these are mostly Mail.app and Entourage
users logging into my Mac OS X Server 10.4 system with cyrus as the
IMAP server).
I can see how to do the message forwarding on the server (once the
message is old enough for the user to have reviewed his spam folder)
but don't know enough about cyrus to know how to delete a message from
its database once it's been forwarded to the spam server.
Does anyone have experience with this kind of setup?
Thanks &
Cheers!
--Chris Ryland / Em Software, Inc. / www.emsoftware.com
Hello, this might help you with what you want to happen.... I sort of
got it from Columbia university (search for cal-dspam).... just create
a Spam and Ham IMAP folder for each user, run the attached script as a
server side job... I modified the script a bit to work for virtual
domain users (usernames like [EMAIL PROTECTED]). You might be able to
improve on it... ^_^
--
Peter Santiago [EMAIL PROTECTED]
My website: www.psinergybbs.com
My spamtrap address: [EMAIL PROTECTED]
#!/usr/bin/perl -w
use Mail::IMAPClient;
#use IPC::Open3;
use IO::Socket::UNIX;
use IO::Socket;
use Socket;
use Data::Dumper;
## cal-dspam-process: pull messages from a cyrus imap server and pass
## them off to dspam for retraining.
# suggested invocation (on sedna, at any rate):
## k5start -U -f ~/dspam.keytab -S imap -I sedna.astro.columbia.edu
cal-dspam-process retrain
my $action = $ARGV[0];
$action =~ m/^(report|retrain|sweep)$/ or die "Must choose a legitimate action
(report, retrain, or sweep)";
my $debug = (defined($ARGV[1]) && ('--verbose' eq $ARGV[1]));
## read, write, error file handles:
my ($wfh,$rfh,$efh);
my ($server,$socket) = ('localhost', '/var/lib/imap/socket/imap');
#my $imt = 'imtest -x '.$socket.' '.$server;
#my $pid = open3($wfh,$rfh,$efh,$imt);
#warn "$imt opened process $pid\n" if $debug;
#my $line = '';
#until ($line =~ /^Security strength factor:/i ) {
# defined($line = <$rfh>) or die "EOF\n";
# print STDERR "Prolog: $line" if $debug;
#}
## why should we need to sleep? in case the socket is not set up yet.
## FIXME: better than a default sleep would be a poll on the socket as it
exists.
#sleep 1;
#my $sock = IO::Socket::UNIX->new("$socket")
# or die "No socket: $!\n";
#print STDERR "<<<END OF PROLOG>>>\n" if $debug;
my $imap = Mail::IMAPClient->new (Server=>'localhost',User => 'dspam', Password
=> 'novirus');
$imap->Prewritemethod(\&Mail::IMAPClient::Strip_cr);
$imap->Debug($debug);
$imap->Debug_fh(\*STDERR);
$imap->State($imap->Connected);
#$imap->Socket($socket);
sub folderscan {
my $imap = shift();
my $foldername = shift();
my $dspamclass = shift();
#my @folders = $imap->list("",'user/*/'.$foldername.'@*');
my @folders = $imap->list();
#foreach $folder (@folders) {warn "Scanning: . $folder"};
foreach $folder (@folders) {
if ($folder =~ s/^\* LIST .* "(user\/.*\/[EMAIL
PROTECTED])"[[:space:]]*$/$1/ ) {
warn "Getting $folder";
my $user = $folder;
# $user =~ s/^user\/(.*)\/[EMAIL PROTECTED]"$/$1/;
$user =~ s/\/$foldername//;
$user =~ s/user\///;
warn "User: $user";
my $msgcount = $imap->message_count($folder);
$imap->setacl($folder,'dspam','write')
or die "Could not set acl: [EMAIL PROTECTED]";
if (defined($msgcount) && ($msgcount > 0)) {
warn "Checking on $folder (for user $user) with $msgcount messages\n";
$imap->Select($folder);
my $fieldspec = "BODY[HEADER.FIELDS (X-DSPAM-Signature)]";
my $hash = $imap->fetch_hash($fieldspec);
my @dealtwith = ();
my @nosigs = ();
my @failures = ();
while (($msgid, $data) = each (%$hash)) {
my $sig = $data->{$fieldspec};
$sig =~ s/^X-DSPAM-Signature: //;
warn "$msgid: signature is $sig\n" if $debug;
if ($sig ne '') {
## if the signature is good, invoke an external dspam process to
retrain the learner
my $dspaminvocation = 'dspamc --client --user '.$user.'
--class='.$dspamclass.' --source=error --signature='.$sig;
if ($action eq 'retrain') {
my $dspamval = system($dspaminvocation);
warn "$dspaminvocation returned $dspamval\n" if ($dspamval);
## and mark it for transfer to the processed messages if
## we were successful in invoking dspam:
if ($dspamval == 0) {
push(@dealtwith, $msgid);
} else {
push(@failures, $msgid);
}
} else {
warn "Would invoke \"$dspaminvocation\"\n" if $debug;
}
} else {
push(@nosigs, $msgid);
if ($action eq 'report') {
## otherwise, what should we do? just feed it as a corpus
## message? How should we extract the full text?
warn "Dealing with message $msgid which does not have a DSPAM
signature:\n" if $debug;
warn $imap->message_string($msgid) if $debug;
}
}
}
# if we've dealt with at least one item:
foreach $dealtwith (@dealtwith){print "$dealtwith \n"}
if (scalar @dealtwith) {
my $newf = 'user/dspam/processed';
$imap->exists($newf) or $imap->create($newf) or warn "Could not
create $newf\n";
$newf .= '/'.$user;
#print "+User: $newf\n";
$newf =~ s/\@/\./g;
$imap->exists($newf) or $imap->create($newf) or warn "Could not
create $newf\n";
$newf .= '/'.$foldername;
#print "+Folder: $newf\n";
$imap->exists($newf) or $imap->create($newf) or warn "Could not
create $newf\n";
## it's either this, or delete each message. this strategy is less
vicious:
my $moveval = $imap->move($newf, [EMAIL PROTECTED]);
warn "move returned $moveval\n";
}
warn "TROUBLE: ".(scalar @nosigs)." messages in $folder without a dspam
signature\n" if (scalar @nosigs);
# close() does an implicit expunge
$imap->close() or warn "failed to close folder $folder\n";
## print Data::Dumper->Dumpxs([$hash],['$hash']) if $debug;
}
}
}
}
folderscan($imap, 'HAM', 'innocent');
folderscan($imap, 'SPAM', 'spam');
$imap->logout;
print STDERR "<<<END>>>\n" if $debug;
exit;