(note: not subscribed, please CC on resposnes)

SQLite does not support the RPAD function. Instead, we do it manually with 
sprintf.

Note that I do now know very much perl, so this is probably not the ideal way to do it, but this does seem to work quite well in my own testing.

--- SQL_orig.pm 2021-09-22 19:41:01.724517821 +0000
+++ /usr/share/perl5/Mail/SpamAssassin/BayesStore/SQL.pm        2021-09-22 
19:46:54.061166896 +0000
@@ -629,9 +629,7 @@
   return unless (defined($self->{_dbh}));

   # 0/0 tokens don't count, but in theory we shouldn't have any
-  my $token_select = $self->_token_select_string();
-
-  my $sql = "SELECT $token_select, spam_count, ham_count, atime
+  my $sql = "SELECT token, spam_count, ham_count, atime
                FROM bayes_token
               WHERE id = ?
                 AND (spam_count > 0 OR ham_count > 0)";
@@ -650,7 +648,8 @@
     return;
   }

-  while (my ($token, $spam_count, $ham_count, $atime) = 
$sth->fetchrow_array()) {
+  while (my ($token_empty, $spam_count, $ham_count, $atime) = 
$sth->fetchrow_array()) {
+    my $token = sprintf "%-5s", $token_empty;
     my $prob = $self->{bayes}->_compute_prob_for_token($token, $vars[1], 
$vars[2],
                                                      $spam_count, $ham_count);
     $prob ||= 0.5;
@@ -863,9 +862,7 @@
   my $results_index = 0;
   my $bunch_end;

-  my $token_select = $self->_token_select_string();
-
-  my $multi_sql = "SELECT $token_select, spam_count, ham_count, atime
+  my $multi_sql = "SELECT token, spam_count, ham_count, atime
                      FROM bayes_token
                     WHERE id = ?
                       AND token IN ";
@@ -914,6 +911,7 @@

       foreach my $result (@{$results}) {
        # Make sure that spam_count and ham_count are not negative
+       $result->[0] = sprintf "%-5s", $result->[0];
        $result->[1] = 0 if (!$result->[1] || $result->[1] < 0);
        $result->[2] = 0 if (!$result->[2] || $result->[2] < 0);
        # Make sure that atime has a value
@@ -1341,9 +1339,7 @@
   print "v\t$num_spam\tnum_spam\n"    or die "Error writing: $!";
   print "v\t$num_ham\tnum_nonspam\n"  or die "Error writing: $!";

-  my $token_select = $self->_token_select_string();
-
-  my $token_sql = "SELECT spam_count, ham_count, atime, $token_select
+  my $token_sql = "SELECT spam_count, ham_count, atime, token
                      FROM bayes_token
                     WHERE id = ?
                       AND (spam_count > 0 OR ham_count > 0)";
@@ -1367,7 +1363,8 @@
   }

   while (my @values = $sth->fetchrow_array()) {
-    $values[3] = unpack("H*", $values[3]);
+    my $token = sprintf "%-5s", $values[3];
+    $values[3] = unpack("H*", $token);
     print "t\t" . join("\t", @values) . "\n"
       or die "Error writing: $!";
   }
@@ -2340,22 +2337,6 @@
   return $num_lowfreq;
 }

-=head2 _token_select_string
-
-private instance (String) _token_select_string
-
-Description:
-This method returns the string to be used in SELECT statements to represent
-the token column.
-
-The default is to use the RPAD function to pad the token out to 5 characters.
-
-=cut
-
-sub _token_select_string {
-  return "RPAD(token, 5, ' ')";
-}
-
 sub sa_die { Mail::SpamAssassin::sa_die(@_); }

 1;

Reply via email to