Henry

Make sure the spam/ham folders are imap folders. Make sure they drag the 
messages into that folder and not email them as it'll muck up the headers 
otherwise.

Then grab a perl script (heck here's one below) to get messages from those 
folders and place into the bayes.

Make sure you're running this script as the user mailscanner run's as 
(mailnull, postfix etc) is not running as root.

#!/usr/bin/perl -w
use strict;
use Mail::IMAPClient;
use Shell;
use Env qw(HOME);
use Getopt::Long;

use File::Temp qw/ tempfile tempdir /;

my $imapserver = "myserver.domain.com";

# set to 1 to enable imapclient debugging
my $debug = 0;

# set to 1 if running under cron (disables output)
my $cron = 1;

my $filename;
my $fh;

my %options =
(
 uid => undef,
 pwd => undef
);

my $cmdsts = GetOptions ("uid=s" => \$options{uid}, "pwd=s" =>
\$options{pwd});

if (!$options {uid}) { die "[SPAMASSASSIN] uid not set
(-uid=username)\n"; }
if (!$options {pwd}) { die "[SPAMASSASSIN] pwd not set
(-pwd=password)\n"; }

my $uid = $options{uid};
my $pwd = $options{pwd};

# login to imap server
my $imap = Mail::IMAPClient->new (Server=>$imapserver, User=>$uid, Password=>$pw
d, Debug=>$debug)
        or die "Can't connect to [EMAIL PROTECTED]: $@ $\n";

if ($imap)
{
  my $count;

  # Deal with spam first
  learn_mail ($HOME."/spam/", ".spam", "spam", 0, "--spam --showdots");

  # Now deal with ham
  learn_mail ($HOME."/ham/", ".ham", "ham", 0, "--ham --showdots");

}
else
{
  die "[SPAMASSASSIN] Unable to logon to IMAP mail account!
$options{uid}\n";
}

exit;

#
# read and learn mail from imap server
#
# arguments
#  $dir         directory to place retrieved messages in
#  $ext         file extension to use on retrieved messages
#  $folder      imap folder name on server
#  $shared      0 if imap folder is in users mailbox
#               1 if imap folder is in shared name space or
#  $sa_args     additional arguments to specify to sa-learn
#               (e.g. --spam or --ham)
#
sub learn_mail {
  my $dir = shift (@_);
  my $ext = shift (@_);
  my $folder = shift (@_);
  my $shared = shift (@_);
  my $sa_args = shift (@_);

  my $count = 0;

  # tidy up directory before run
  clear_directory ($dir, $ext);

  # read mail from server
  $count = read_mail ($dir, $ext, $folder, $shared);
  if ($count > 0)
  {
    # learn about mail
    sa_learn ($dir, $ext, $sa_args);

    # tidy up files after sa-learn is called
    clear_directory ($dir, $ext);
  }
}


#
# reads mail from an imap folder and saves in a local directory
#
# arguments
#  $dir         directory to place retrieved messages in
#  $ext         file extension to use on retrieved messages
#  $folder      imap folder name on server
#  $shared      0 if imap folder is in users mailbox
#               1 if imap folder is in shared name space or
sub read_mail {
  my $dir = shift (@_);
  my $ext = shift (@_);
  my $folder = shift (@_);
  my $shared = shift (@_);
  my $count = 0;
  my $target = "";

  if ($shared)
  {
    # use a shared public folder instead
    my ($prefix, $sep) = @{$imap->namespace->[2][0]}
       or die "Can't get shared folder namespace or seperator: [EMAIL 
PROTECTED]";

    $target = $prefix.
       ($prefix =~ /\Q$sep\E$/ || $folder =~ /^\Q$sep/ ? "" : $sep).
       $folder;
  }
  else { $target = $folder; }

  $imap->select ($target) or die "Cannot select $target: [EMAIL PROTECTED]";

  # If a shared public folder is required uncomment the following
  # lines and comment out the previous $imap->select line

  # read through all messages
  my @msgs = $imap->search("ALL");
  foreach my $msg (@msgs)
  {
    ($fh, $filename) = tempfile (SUFFIX => $ext, DIR => $dir);
    $imap->message_to_file ($fh, $msg);
    close $fh;
    $count++;
  }
  $imap->delete_message (@msgs);

  if ($cron == 0) { print "Retrieved $count messages from $target\n"; }

  return $count;
}

#
# Removes files in directory $dir with extension $ext
#
sub clear_directory{
  my $dir = shift (@_);
  my $ext = shift (@_);

  opendir (DIR, $dir) or die "Couldn't open dir: $dir\n";
  my @files = readdir (DIR);
  close (DIR);

  for (my $i = 0; $i <= $#files; $i++ )
  {
    if ($files[$i] =~ /.*?$ext$/) { unlink ($dir.$files[$i]); }
  }
}


#
# execute sa-learn command
#
sub sa_learn {
  my $dir = shift (@_);
  my $ext = shift (@_);
  my $type = shift (@_);
  my $learncmd = "/usr/local/bin/sa-learn ".$type." --dir ".$dir;

  if ($cron == 0) { $learncmd .= " --showdots"; }
  else { $learncmd .= " > /dev/null 2>&1"; }

  #
  # Run sa-learn script on spam directory
  #
  my $sh = Shell->new;
  my @args = ($learncmd);

  system (@args) == 0 or die "system @args failed: $?";
}

--
Martin Hepworth
Snr Systems Administrator
Solid State Logic
Tel: +44 (0)1865 842300

> -----Original Message-----
> From: news [mailto:[EMAIL PROTECTED] On Behalf Of Henry Kwan
> Sent: 19 June 2008 03:10
> To: users@spamassassin.apache.org
> Subject: Moving ham/spam from Exchange folders to sa-learn?
>
>
> Hi,
>
> Currently running SA 3.25 via MailScanner frontend (CentOS5
> box in the DMZ) to Exchange2K7.  Have setup two public
> folders for users to dump spam/ham in.
> What's the usual way of moving these messages back to SA for
> learning?  The volume isn't that high so if there was a way
> to convert .MSG to a format that sa-learn understands, I
> could then just sftp it back onto the CentOS box.
>
> Any links or tips would be appreciated.
>
> Thanks.
>
>
>
>




**********************************************************************
Confidentiality : This e-mail and any attachments are intended for the 
addressee only and may be confidential. If they come to you in error 
you must take no action based on them, nor must you copy or show them 
to anyone. Please advise the sender by replying to this e-mail 
immediately and then delete the original from your computer.
Opinion : Any opinions expressed in this e-mail are entirely those of 
the author and unless specifically stated to the contrary, are not 
necessarily those of the author's employer.
Security Warning : Internet e-mail is not necessarily a secure 
communications medium and can be subject to data corruption. We advise 
that you consider this fact when e-mailing us. 
Viruses : We have taken steps to ensure that this e-mail and any 
attachments are free from known viruses but in keeping with good 
computing practice, you should ensure that they are virus free.

Red Lion 49 Ltd T/A Solid State Logic
Registered as a limited company in England and Wales 
(Company No:5362730)
Registered Office: 25 Spring Hill Road, Begbroke, Oxford OX5 1RU, 
United Kingdom
**********************************************************************

Reply via email to