On Sun, May 08, 2005 at 03:00:29PM -0500, Michael Parker wrote:
> 
> Ok, I'm going to start working on this.  We can decide on the verb and
> update later.
> 

Ok, here is the result.  The spamc portion isn't done yet, the C
language and I don't always get along.

This adds the TELL option to spamd which is really pretty flexible.
It allows you to tell spamd the type of message and what to do with
that message, for instance to learn a spam message you would:

TELL SPAMC/1.3
Message-class: spam
Set: local

to forget a learned message:

TELL SPAMC/1.3
Remove: local

To report a spam:

TELL SPAMC/1.3
Message-class: spam
Set: local, remove

to revoke:

TELL SPAMC/1.3
Message-class: spam
Set: local
Remove: remote


The return value basically echos back what it did, so if you specified
to Set: local, and it was successful it would return DidSet: local,
same thing for Remove: (ie DidRemove: local).  This lets the client
decide how successful things were, for instance when reporting or
learning is it really an error if it was unable to do the bayes
learning portion?

I'm not quite happy with what spamd is logging, in fact I know I want
to fix that, so any suggestions are welcome.

Also, anyone feel like taking on the spamc portion?  I've already lost
enough hair, I would hate to pull the rest of it out.

Michael

Index: lib/Mail/SpamAssassin/Client.pm
===================================================================
--- lib/Mail/SpamAssassin/Client.pm     (revision 169549)
+++ lib/Mail/SpamAssassin/Client.pm     (working copy)
@@ -219,10 +219,27 @@
 
   my $msgsize = length($msg.$EOL);
 
-  print $remote "LEARN $PROTOVERSION$EOL";
+  print $remote "TELL $PROTOVERSION$EOL";
   print $remote "Content-length: $msgsize$EOL";
   print $remote "User: $self->{username}$EOL" if ($self->{username});
-  print $remote "Learn-type: $learntype$EOL";
+
+  if ($learntype == 0) {
+    print $remote "Message-class: spam$EOL";
+    print $remote "Set: local$EOL";
+  }
+  elsif ($learntype == 1) {
+    print $remote "Message-class: ham$EOL";
+    print $remote "Set: local$EOL";
+  }
+  elsif ($learntype == 2) {
+    print $remote "Remove: local$EOL";
+  }
+  else { # bad learntype
+    $self->{resp_code} = 00;
+    $self->{resp_msg} = 'do not know';
+    return undef;
+  }
+
   print $remote "$EOL";
   print $remote $msg;
   print $remote "$EOL";
@@ -236,17 +253,19 @@
 
   return undef unless ($resp_code == 0);
 
-  my $learned_p = 0;
   my $found_blank_line_p = 0;
 
+  my $did_set;
+  my $did_remove;
+
   while (!$found_blank_line_p) {
     $line = <$remote>;
 
-    if ($line =~ /Learned: yes/i) {
-      $learned_p = 1;
+    if ($line =~ /DidSet: (.*)/i) {
+      $did_set = $1;
     }
-    elsif ($line =~ /Learned: no/i) {
-      $learned_p = 0;
+    elsif ($line =~ /DidRemove: (.*)/i) {
+      $did_remove = $1;
     }
     elsif ($line =~ /$EOL/) {
       $found_blank_line_p = 1;
@@ -255,7 +274,12 @@
 
   close $remote;
 
-  return $learned_p;
+  if ($learntype == 0 || $learntype == 1) {
+    return $did_set =~ /local/;
+  }
+  else { #safe since we've already checked the $learntype values
+    return $did_remove =~ /local/;
+  }
 }
 
 =head2 report
@@ -270,7 +294,49 @@
 sub report {
   my ($self, $msg) = @_;
 
-  return $self->_report_or_revoke($msg, 0);
+  $self->_clear_errors();
+
+  my $remote = $self->_create_connection();
+
+  return undef unless ($remote);
+
+  my $msgsize = length($msg.$EOL);
+
+  print $remote "TELL $PROTOVERSION$EOL";
+  print $remote "Content-length: $msgsize$EOL";
+  print $remote "User: $self->{username}$EOL" if ($self->{username});
+  print $remote "Message-class: spam$EOL";
+  print $remote "Set: local,remote$EOL";
+  print $remote "$EOL";
+  print $remote $msg;
+  print $remote "$EOL";
+
+  my $line = <$remote>;
+
+  my ($version, $resp_code, $resp_msg) = $self->_parse_response_line($line);
+
+  $self->{resp_code} = $resp_code;
+  $self->{resp_msg} = $resp_msg;
+
+  return undef unless ($resp_code == 0);
+
+  my $reported_p = 0;
+  my $found_blank_line_p = 0;
+
+  while (!$reported_p && !$found_blank_line_p) {
+    $line = <$remote>;
+
+    if ($line =~ /DidSet:\s+.*remote/i) {
+      $reported_p = 1;
+    }
+    elsif ($line =~ /^$EOL$/) {
+      $found_blank_line_p = 1;
+    }
+  }
+
+  close $remote;
+
+  return $reported_p;
 }
 
 =head2 revoke
@@ -285,7 +351,50 @@
 sub revoke {
   my ($self, $msg) = @_;
 
-  return $self->_report_or_revoke($msg, 1);
+  $self->_clear_errors();
+
+  my $remote = $self->_create_connection();
+
+  return undef unless ($remote);
+
+  my $msgsize = length($msg.$EOL);
+
+  print $remote "TELL $PROTOVERSION$EOL";
+  print $remote "Content-length: $msgsize$EOL";
+  print $remote "User: $self->{username}$EOL" if ($self->{username});
+  print $remote "Message-class: ham$EOL";
+  print $remote "Set: local$EOL";
+  print $remote "Remove: remote$EOL";
+  print $remote "$EOL";
+  print $remote $msg;
+  print $remote "$EOL";
+
+  my $line = <$remote>;
+
+  my ($version, $resp_code, $resp_msg) = $self->_parse_response_line($line);
+
+  $self->{resp_code} = $resp_code;
+  $self->{resp_msg} = $resp_msg;
+
+  return undef unless ($resp_code == 0);
+
+  my $revoked_p = 0;
+  my $found_blank_line_p = 0;
+
+  while (!$revoked_p && !$found_blank_line_p) {
+    $line = <$remote>;
+
+    if ($line =~ /DidRemove:\s+remote/i) {
+      $revoked_p = 1;
+    }
+    elsif ($line =~ /^$EOL$/) {
+      $found_blank_line_p = 1;
+    }
+  }
+
+  close $remote;
+
+  return $revoked_p;
 }
 
 
@@ -393,72 +502,5 @@
   $self->{resp_msg} = undef;
 }
 
-
-=head2 _report_or_revoke
-
-public instance (Boolean) report_or_revoke (String $msg, Integer $reporttype)
-
-Description:
-This method implements the report or revoke call.  C<$learntype> should
-be an integer, 0 for report or 1 for revoke.  The return value is a
-boolean indicating if the message was learned or not.
-
-An undef return value indicates that there was an error and you
-should check the resp_code/resp_error values to determine what
-the error was.
-
-=cut
-
-sub _report_or_revoke {
-  my ($self, $msg, $reporttype) = @_;
-
-  $self->_clear_errors();
-
-  my $remote = $self->_create_connection();
-
-  return undef unless ($remote);
-
-  my $msgsize = length($msg.$EOL);
-
-  print $remote "COLLABREPORT $PROTOVERSION$EOL";
-  print $remote "Content-length: $msgsize$EOL";
-  print $remote "User: $self->{username}$EOL" if ($self->{username});
-  print $remote "CollabReport-type: $reporttype$EOL";
-  print $remote "$EOL";
-  print $remote $msg;
-  print $remote "$EOL";
-
-  my $line = <$remote>;
-
-  my ($version, $resp_code, $resp_msg) = $self->_parse_response_line($line);
-
-  $self->{resp_code} = $resp_code;
-  $self->{resp_msg} = $resp_msg;
-
-  return undef unless ($resp_code == 0);
-
-  my $reported_p = 0;
-  my $found_blank_line_p = 0;
-
-  while (!$found_blank_line_p) {
-    $line = <$remote>;
-
-    if ($line =~ /Reported: yes/i) {
-      $reported_p = 1;
-    }
-    elsif ($line =~ /Reported: no/i) {
-      $reported_p = 0;
-    }
-    elsif ($line =~ /$EOL/) {
-      $found_blank_line_p = 1;
-    }
-  }
-
-  close $remote;
-
-  return $reported_p;
-}
-
-
 1;
 
Index: spamd/spamd.raw
===================================================================
--- spamd/spamd.raw     (revision 169549)
+++ spamd/spamd.raw     (working copy)
@@ -1047,30 +1047,6 @@
     info(sprintf("spamd: skipped large message in %3d seconds", time - 
$start));
   }
 
-  # COLLABREPORT must come before REPORT, since the regex is overgenerous
-  elsif (/(COLLABREPORT) SPAMC\/(.*)/) {
-    my $method = $1;
-    my $version = $2;
-    eval {
-      Mail::SpamAssassin::Util::trap_sigalrm_fully(sub {
-                                                    die "child processing 
timeout";
-                                                  });
-      alarm $timeout_child if ($timeout_child);
-      report($method, $version, $start, $remote_hostname, $remote_hostaddr);
-    };
-    alarm 0;
-
-    if ($@) {
-      if ($@ =~ /child processing timeout/) {
-        service_timeout("($timeout_child second timeout while trying to 
$method)");
-      } else {
-       warn "spamd: $@";
-      }
-      $client->close();
-      return 0;
-    }
-  }
-
   # It might be a CHECK message, meaning that we should just check
   # if it's spam or not, then return the appropriate response.
   # If we get the PROCESS command, the client is going to send a
@@ -1098,7 +1074,7 @@
     }
   }
 
-  elsif (/(LEARN) SPAMC\/(.*)/) {
+  elsif (/(TELL) SPAMC\/(.*)/) {
     my $method = $1;
     my $version = $2;
     eval {
@@ -1106,7 +1082,7 @@
                                                     die "child processing 
timeout";
                                                   });
       alarm $timeout_child if ($timeout_child);
-      learn($method, $version, $start, $remote_hostname, $remote_hostaddr);
+      dotell($method, $version, $start, $remote_hostname, $remote_hostaddr);
     };
     alarm 0;
 
@@ -1368,21 +1344,27 @@
   return 1;
 }
 
-sub learn {
+sub dotell {
   my ($method, $version, $start_time, $remote_hostname, $remote_hostaddr) = @_;
   local ($_);
   my $expected_length;
-  my $learn_type;
-  my $forget = 0;
-  my $isspam = 1;
 
   my $hdrs = {};
 
   return 0 unless (parse_headers($hdrs, $client));
 
   $expected_length = $hdrs->{expected_length};
-  $learn_type = $hdrs->{learn_type};
 
+  if ($hdrs->{set_local} && $hdrs->{remove_local}) {
+    protocol_error("Unable to set local and remove local in the same 
operation.");
+    return 0;
+  }
+
+  if ($hdrs->{set_remote} && $hdrs->{remove_remote}) {
+    protocol_error("Unable to set remote and remove remote in the same 
operation.");
+    return 0;
+  }
+
   &handle_setuid_to_user if ($setuid_to_user && $> == 0);
 
   if ($opt{'sql-config'} && !defined($current_user)) {
@@ -1413,30 +1395,6 @@
   $msgid        ||= "(unknown)";
   $current_user ||= "(unknown)";
 
-  my $learn_type_desc;
-  my $learn_type_desc_past;
-
-  if ($learn_type == 0) {
-    $learn_type_desc = "learning spam";
-    $learn_type_desc_past = "learned spam";
-    $isspam = 1;
-  }
-  elsif ($learn_type == 1) {
-    $learn_type_desc = "learning ham";
-    $learn_type_desc_past = "learned ham";
-    $isspam = 0;
-  }
-  elsif ($learn_type == 2) {
-    $learn_type_desc = "forgetting";
-    $learn_type_desc_past = "forgot";
-    $forget = 1;
-  }
-
-  info("spamd: $learn_type_desc"
-      . " message $msgid"
-      . ( $rmsgid ? " aka $rmsgid" : "" )
-      . " for ${current_user}:$>");
-
   # Check length if we're supposed to.
   if (defined $expected_length && $actual_length != $expected_length) {
     protocol_error("(Content-Length mismatch: Expected $expected_length bytes, 
got $actual_length bytes)");
@@ -1444,118 +1402,57 @@
     return 0;
   }
 
-  my $status = $spamtest->learn($mail, undef, $isspam, $forget);
-  my $hdr;
+  my @did_set;
+  my @did_remove;
 
-  if ($status->did_learn()) {
-    $hdr .= "Learned: Yes";
+  if ($hdrs->{set_local}) {
+    my $status = $spamtest->learn($mail, undef, ($hdrs->{message_class} eq 
'spam' ? 1 : 0), 0);
+
+    push(@did_set, 'local') if ($status->did_learn());
+    $status->finish();
   }
-  else {
-    $hdr .= "Learned: No";
-  }
 
-  print $client "SPAMD/1.1 $resphash{$resp} $resp\r\n",
-    $hdr . "\r\n\r\n";
+  if ($hdrs->{remove_local}) {
+    my $status = $spamtest->learn($mail, undef, undef, 1);
 
-  my $scantime = sprintf( "%.1f", time - $start_time );
-
-  info("spamd: $learn_type_desc_past message for $current_user:$> in"
-       . " $scantime seconds, $actual_length bytes");
-  $status->finish();    # added by jm to allow GC'ing
-  $mail->finish();
-  return 1;
-}
-
-sub report {
-  my ($method, $version, $start_time, $remote_hostname, $remote_hostaddr) = @_;
-  local ($_);
-  my $expected_length;
-  my $report_type;
-
-  my $hdrs = {};
-
-  return 0 unless (parse_headers($hdrs, $client));
-
-  $expected_length = $hdrs->{expected_length};
-  $report_type = $hdrs->{collabreport_type};
-
-  &handle_setuid_to_user if ($setuid_to_user && $> == 0);
-
-  if ($opt{'sql-config'} && !defined($current_user)) {
-    unless (handle_user_sql('nobody')) {
-      service_unavailable_error("Error fetching user preferences via SQL");
-      return 0;
-    }
+    push(@did_remove, 'local') if ($status->did_learn());
+    $status->finish();
   }
 
-  if ($opt{'ldap-config'} && !defined($current_user)) {
-    handle_user_ldap('nobody');
-  }
+  if ($hdrs->{set_remote}) {
+    require Mail::SpamAssassin::Reporter;
+    my $msgrpt = Mail::SpamAssassin::Reporter->new($spamtest, $mail);
 
-  my $resp = "EX_OK";
-
-  # generate mail object from input
-  my ($mail, $actual_length) = parse_body($client, $expected_length);
-
-  # Check length if we're supposed to.
-  if (defined $expected_length && $actual_length != $expected_length) {
-    protocol_error("(Content-Length mismatch: Expected $expected_length bytes, 
got $actual_length bytes)");
-    $mail->finish();
-    return 0;
+    push(@did_set, 'remote') if ($msgrpt->report());
   }
 
-  if ( $mail->get_header("X-Spam-Checker-Version") ) {
-    my $new_mail = 
$spamtest->parse($spamtest->remove_spamassassin_markup($mail), 1);
-    $mail->finish();
-    $mail = $new_mail;
-  }
+  if ($hdrs->{remove_remote}) {
+    require Mail::SpamAssassin::Reporter;
+    my $msgrpt = Mail::SpamAssassin::Reporter->new($spamtest, $mail);
 
-  # attempt to fetch the message ids
-  my ($msgid, $rmsgid) = parse_msgids($mail);
-
-  $msgid        ||= "(unknown)";
-  $current_user ||= "(unknown)";
-
-  my $report_type_desc;
-  my $report_type_desc_past;
-
-  if ($report_type == 0) {
-    $report_type_desc = "reporting spam";
-    $report_type_desc_past = "reported spam";
+    push(@did_remove, 'remote') if ($msgrpt->revoke());
   }
-  elsif ($report_type == 1) {
-    $report_type_desc = "revoking ham";
-    $report_type_desc_past = "revoked ham";
-  }
 
-  info("spamd: $report_type_desc"
-      . " message $msgid"
-      . ( $rmsgid ? " aka $rmsgid" : "" )
-      . " for ${current_user}:$>");
-
   my $hdr;
-  my $status = 1;
+  my $info_set_str;
+  my $info_remove_str;
 
-  if ($report_type) {
-    $status = $spamtest->revoke_as_spam($mail);
+  if (scalar(@did_set)) {
+    $hdr .= "DidSet: " . join(',', @did_set) . "\r\n";
+    $info_set_str = " Setting " . join(',', @did_set) . " ";
   }
-  else {
-    $status = $spamtest->report_as_spam($mail);
-  }
 
-  if ($status) {
-    $hdr .= "Reported: Yes";
+  if (scalar(@did_remove)) {
+    $hdr .= "DidRemove: " . join(',', @did_remove) . "\r\n";
+    $info_remove_str = " Removing " . join(',', @did_remove) . " ";
   }
-  else {
-    $hdr .= "Reported: No";
-  }
 
   print $client "SPAMD/1.1 $resphash{$resp} $resp\r\n",
     $hdr . "\r\n\r\n";
 
   my $scantime = sprintf( "%.1f", time - $start_time );
 
-  info("spamd: $report_type_desc_past message for $current_user:$> in"
+  info("spamd:$info_set_str$info_remove_str for $current_user:$> in"
        . " $scantime seconds, $actual_length bytes");
 
   $mail->finish();
@@ -1571,6 +1468,7 @@
   # max 255 headers
   for my $hcount ( 0 .. 255 ) {
     my $line = $client->getline;
+
     unless (defined $line) {
       protocol_error("(EOF during headers)");
       return 0;
@@ -1590,12 +1488,15 @@
     elsif ($header eq 'User') {
       return 0 unless &got_user_header($hdrs, $header, $value);
     }
-    elsif ($header eq 'Learn-type') {
-      return 0 unless &got_learn_type_header($hdrs, $header, $value);
+    elsif ($header eq 'Message-class') {
+      return 0 unless &got_message_class_header($hdrs, $header, $value);
     }
-    elsif ($header eq 'CollabReport-type') {
-      return 0 unless &got_collabreport_type_header($hdrs, $header, $value);
+    elsif ($header eq 'Set') {
+      return 0 unless &got_set_header($hdrs, $header, $value);
     }
+    elsif ($header eq 'Remove') {
+      return 0 unless &got_remove_header($hdrs, $header, $value);
+    }
   }
 
   # avoid too-many-headers DOS attack
@@ -1672,33 +1573,49 @@
   return 1;
 }
 
-sub got_learn_type_header {
+sub got_message_class_header {
   my ($hdrs, $header, $value) = @_;
-  if ($value !~ /^(\d*)$/) {
-    protocol_error("(Learn-type contains non-numeric bytes)");
+
+  unless (lc($value) ne 'spam' || lc($value) ne 'ham') {
+    protocol_error("(Message-class header contains invalid class)");
     return 0;
   }
-  my $type = $1;
-  if ($type != 0 && $type != 1 && $type != 2) {
-    protocol_error("(Learn-type contains invalid type)");
-    return 0;
+  $hdrs->{message_class} = $value;
+
+  return 1;
+}
+
+sub got_set_header {
+  my ($hdrs, $header, $value) = @_;
+
+  $hdrs->{set_local} = 0;
+  $hdrs->{set_remote} = 0;
+
+  if ($value =~ /local/i) {
+    $hdrs->{set_local} = 1;
   }
-  $hdrs->{learn_type} = $type;
+
+  if ($value =~ /remote/i) {
+    $hdrs->{set_remote} = 1;
+  }
+
   return 1;
 }
 
-sub got_collabreport_type_header {
+sub got_remove_header {
   my ($hdrs, $header, $value) = @_;
-  if ($value !~ /^(\d*)$/) {
-    protocol_error("(CollabReport-type contains non-numeric bytes)");
-    return 0;
+
+  $hdrs->{remove_local} = 0;
+  $hdrs->{remove_remote} = 0;
+
+  if ($value =~ /local/i) {
+    $hdrs->{remove_local} = 1;
   }
-  my $type = $1;
-  if ($type != 0 && $type != 1) {
-    protocol_error("(CollabReport-type contains invalid type)");
-    return 0;
+
+  if ($value =~ /remote/i) {
+    $hdrs->{remove_remote} = 1;
   }
-  $hdrs->{collabreport_type} = $type;
+
   return 1;
 }
 

Attachment: pgpwT7I0PFLEF.pgp
Description: PGP signature

Reply via email to