Hi!

I'm new in this list :). I decided to install dspam at my unix systems
about two months ago. I choose hash_drv and enable-homedir options.

I have found some deficienties in hash_drv code and I decided to make
my own patch. My patch add some features to hash_drv and corrects
some mistakes:

1. I added file write checks, to avoid creation of cutted css files, when
dspam can't write (quota or disk limit). Dspam dies on cutted css files, so these chcecks are necessary.

2. I added counter (some kind of timestamp) to hash_drv. This is very useful with cssclean. Original css records in hash have 8-bytes hashcode and two 4-bytes counters for spam and nonspam. I decide to cut oldest 4 bits from these counters and use it to count cssclean operations.
So - if dspam increasing spam/nonspam counter, its zeroes my counter.
Every cssclean cleaning increases my counter - so if you clean css files
everyday, you have got some kind of timestamp for css records.
I maked modifications in cssclean code - so tokens are removed from
css when:
- nonspam*2+spam<5 and mycounter>60, or
- nonspam+spam<=1 and mycounter>15, or
- mycounter>120, or
- nonspam ~= spam and mycounter>15
    ~= near equal :) - abs(nonspam-spam)/(nonspam+spam)<.2

I added extra option for cssclean - "heavy". For example:
"/usr/bin/cssclean bogdan.css heavy" - with it rules are more strict:
- nonspam+spam<=1, or
- abs(nonspam-spam)/(nonspam+spam)<.1

This feature works with normal css files without any .css conversions,
so its safe to use it on working system - but if you will want to remove
this patch, you will need to restore old .css files from backup.

3. I changed path for tmp new css file:
-  snprintf(newfile, sizeof(newfile), "/tmp/%u.css", (unsigned int) getpid());
+  snprintf(newfile, sizeof(newfile), "%s.%d",filename,(unsigned int) getpid());
Its works much better.

4. I add timeout 300sec for hash_driver lock. If dspam can't get lock for
css file, its ends with error and deliver mail without modifications.
When I created classification group for all new users, it happens to
wait for lock over 1000sec (lots of mails for the same css file). Postix drops these mails, if dspam not finish in 1000sec - so internal timeout in
hash driver works much better.

5. It is from other patch posted on this group - coping css header
(statistics) with cssclean.

Have fun with using it :)

                                      Boguslaw Juza

------------------------------------------------------------------------

diff -Naur dspam-3.8.0.orig/src/hash_drv.c dspam-3.8.0/src/hash_drv.c
--- dspam-3.8.0.orig/src/hash_drv.c     2006-05-27 23:00:36.000000000 +0200
+++ dspam-3.8.0/src/hash_drv.c  2007-11-20 11:19:26.000000000 +0100
@@ -328,19 +328,29 @@

     f = fopen(filename, "w");
     if (!f) {
-      LOG(LOG_ERR, ERR_IO_FILE_OPEN, filename, strerror(errno));
+      LOG(LOG_ERR, ERR_IO_FILE_WRITE, filename, strerror(errno));
       return EFILE;
     }

-    fwrite(&header, sizeof(struct _hash_drv_header), 1, f);
+ if(fwrite(&header, sizeof(struct _hash_drv_header), 1, f)!=1){ + fclose(f); + unlink(filename); + LOG(LOG_ERR, ERR_IO_FILE_WRITE, filename, strerror(errno)); + return EFILE;
+    }
     for(i=0;i<header.hash_rec_max;i++)
-      fwrite(&rec, sizeof(struct _hash_drv_spam_record), 1, f);
+      if(fwrite(&rec, sizeof(struct _hash_drv_spam_record), 1, f)!=1) {
+ fclose(f); + unlink(filename); + LOG(LOG_ERR, ERR_IO_FILE_WRITE, filename, strerror(errno)); + return EFILE; + }
     fclose(f);
     map->fd = open(filename, open_flags);
   }

   if (map->fd < 0) {
-    LOG(LOG_ERR, ERR_IO_FILE_OPEN, filename, strerror(errno));
+    LOG(LOG_ERR, ERR_IO_FILE_WRITE, filename, strerror(errno));
     return EFILE;
   }

@@ -694,8 +704,8 @@

   stat->probability   = 0.00000;
   stat->status        = 0;
-  stat->innocent_hits = rec.nonspam;
-  stat->spam_hits     = rec.spam;
+  stat->innocent_hits = rec.nonspam & 0x0fffffff;
+  stat->spam_hits     = rec.spam & 0x0fffffff;

   return 0;
 }
@@ -710,9 +720,13 @@
   struct _hash_drv_storage *s = (struct _hash_drv_storage *) CTX->storage;

   rec.hashcode = token;
+
   rec.nonspam = (stat->innocent_hits > 0) ? stat->innocent_hits : 0;
   rec.spam = (stat->spam_hits > 0) ? stat->spam_hits : 0;

+  if(rec.nonspam>0x0fffffff)rec.nonspam=0x0fffffff;
+  if(rec.spam>0x0fffffff)rec.spam=0x0fffffff;
+
   return _hash_drv_set_spamrecord(s->map, &rec, stat->offset);
 }

@@ -738,7 +752,12 @@
     LOG(LOG_ERR, ERR_IO_FILE_WRITE, filename, strerror(errno));
     return EFAILURE;
   }
-  fwrite(SIG->data, SIG->length, 1, file);
+  if(fwrite(SIG->data, SIG->length, 1, file)!=1) {
+ fclose(file); + unlink(filename); + LOG(LOG_ERR, ERR_IO_FILE_WRITE, filename, strerror(errno)); + return(EFAILURE); + }
   fclose(file);

   return 0;
@@ -1041,7 +1060,7 @@
 {
   struct _hash_drv_header header;
   struct _hash_drv_spam_record rec;
-  int i;
+  int i, lastsize;

   _hash_drv_close(map);

@@ -1062,10 +1081,20 @@

   LOGDEBUG("adding extent last: %d(%ld) new: %d(%ld) pctincrease: %1.2f", 
extents, last_extent_size, extents+1, header.hash_rec_max, (map->pctincrease/100.0));

-  lseek (map->fd, 0, SEEK_END);
-  write (map->fd, &header, sizeof(struct _hash_drv_header));
+  lastsize=lseek (map->fd, 0, SEEK_END);
+  if(write (map->fd, &header, sizeof(struct _hash_drv_header))!=sizeof(struct 
_hash_drv_header)) {
+    ftruncate(map->fd,lastsize);
+    close(map->fd);
+    LOG(LOG_WARNING, "unable to resize hash. open failed: %s", 
strerror(errno));
+    return EFAILURE;
+  }
for(i=0;i<header.hash_rec_max;i++) - write (map->fd, &rec, sizeof(struct _hash_drv_spam_record));
+    if(write (map->fd, &rec, sizeof(struct 
_hash_drv_spam_record))!=sizeof(struct _hash_drv_spam_record)) {
+      ftruncate(map->fd,lastsize);
+      close(map->fd);
+      LOG(LOG_WARNING, "unable to resize hash. open failed: %s", 
strerror(errno));
+      return EFAILURE;
+    }
   close(map->fd);

_hash_drv_open(map->filename, map, 0, map->max_seek, diff -Naur dspam-3.8.0.orig/src/hash_drv.h dspam-3.8.0/src/hash_drv.h
--- dspam-3.8.0.orig/src/hash_drv.h     2006-05-27 23:00:36.000000000 +0200
+++ dspam-3.8.0/src/hash_drv.h  2007-11-18 14:02:48.000000000 +0100
@@ -77,8 +77,8 @@
 typedef struct _hash_drv_spam_record
 {
   unsigned long long hashcode;
-  long nonspam;
-  long spam;
+  unsigned long nonspam;
+  unsigned long spam;
 } *hash_drv_spam_record_t;

int _hash_drv_get_spamtotals diff -Naur dspam-3.8.0.orig/src/tools.hash_drv/cssclean.c dspam-3.8.0/src/tools.hash_drv/cssclean.c
--- dspam-3.8.0.orig/src/tools.hash_drv/cssclean.c      2006-05-27 
23:00:36.000000000 +0200
+++ dspam-3.8.0/src/tools.hash_drv/cssclean.c   2007-11-20 13:29:09.000000000 
+0100
@@ -68,23 +68,26 @@

 #define SYNTAX "syntax: cssclean [filename]"

-int cssclean(const char *filename);
+int cssclean(const char *filename, int heavy);

 int main(int argc, char *argv[]) {
   int r;
+  int heavy=0;

   if (argc<2) {
     fprintf(stderr, "%s\n", SYNTAX);
     exit(EXIT_FAILURE);
   }

+  if((argc>=3)&&(!strcmp(argv[2],"heavy")))heavy=1;
+
   agent_config = read_config(NULL);
   if (!agent_config) {
     LOG(LOG_ERR, ERR_AGENT_READ_CONFIG);
     exit(EXIT_FAILURE);
   }

-  r = cssclean(argv[1]);
+  r = cssclean(argv[1],heavy);

   if (r) {
     fprintf(stderr, "cssclean failed on error %d\n", r);
@@ -93,7 +96,7 @@
   exit(EXIT_SUCCESS);
 }

-int cssclean(const char *filename) {
+int cssclean(const char *filename, int heavy) {
   int i;
   hash_drv_header_t header;
   void *offset;
@@ -101,6 +104,8 @@
   hash_drv_spam_record_t rec;
   unsigned long filepos;
   char newfile[128];
+  unsigned long spam, nonspam, cntr;
+  int drop, prb;

   unsigned long hash_rec_max = HASH_REC_MAX;
   unsigned long max_seek     = HASH_SEEK_MAX;
@@ -132,7 +137,7 @@
   if (READ_ATTRIB("HashMaxSeek"))
      max_seek = strtol(READ_ATTRIB("HashMaxSeek"), NULL, 0);

-  snprintf(newfile, sizeof(newfile), "/tmp/%u.css", (unsigned int) getpid());
+  snprintf(newfile, sizeof(newfile), "%s.%d",filename,(unsigned int) getpid());

   if (_hash_drv_open(filename, &old, 0, max_seek,
                      max_extents, extent_size, pctincrease, flags))
@@ -151,8 +156,39 @@
   header = old.addr;
   while(filepos < old.file_len) {
     for(i=0;i<header->hash_rec_max;i++) {
+
       rec = old.addr+filepos;
-      if (rec->hashcode && rec->nonspam + rec->spam > 1) {
+
+      nonspam = rec->nonspam & 0x0fffffff;
+      spam = rec->spam & 0x0fffffff;
+      cntr = ((rec->nonspam>>28) & 0x0f) |
+             ((rec->spam>>24) & 0xf0);
+
+      if(cntr<255)cntr++;
+      rec->nonspam=nonspam|((cntr&0x0f)<<28);
+      rec->spam=spam|((cntr&0xf0)<<24);
+
+      if(nonspam+spam>0)
+        prb=(abs(nonspam-spam)*1000)/(nonspam+spam);
+      else
+        prb=1000;
+
+      drop=0;
+
+      if(heavy) {
+        if( (nonspam+spam<=1) ||
+            (prb<100)
+          )drop=1;
+      }
+      else {
+        if( ((nonspam*2+spam<5)&&(cntr>60)) ||
+            ((nonspam+spam<=1)&&(cntr>15))  ||
+            ((prb<200)&&(cntr>15)) ||
+ (cntr>120) + ) drop=1;
+      }
+
+      if (rec->hashcode && !drop) {
         if (_hash_drv_set_spamrecord(&new, rec, 0)) {
           LOG(LOG_WARNING, "aborting on error");
           _hash_drv_close(&new);
@@ -168,6 +204,7 @@
     filepos += sizeof(struct _hash_drv_header);
   }

+  bcopy (old.header, new.header, sizeof(struct _hash_drv_header));
   _hash_drv_close(&new);
   _hash_drv_close(&old);
   rename(newfile, filename);
diff -Naur dspam-3.8.0.orig/src/util.c dspam-3.8.0/src/util.c
--- dspam-3.8.0.orig/src/util.c 2006-05-13 03:12:59.000000000 +0200
+++ dspam-3.8.0/src/util.c      2007-11-20 11:15:09.000000000 +0100
@@ -40,6 +40,7 @@
 #include <stdio.h>
 #include <math.h>
 #include <fcntl.h>
+#include <signal.h>

 #ifdef TIME_WITH_SYS_TIME
 #   include <sys/time.h>
@@ -629,18 +630,27 @@
   return MIN(s, 1.0);
 }

+void timeout(){}
+
 int _ds_get_fcntl_lock(int fd) {
 #ifdef _WIN32
   return 0;
 #else
   struct flock f;
+  int r;

   f.l_type = F_WRLCK;
   f.l_whence = SEEK_SET;
   f.l_start = 0;
   f.l_len = 0;

-  return fcntl(fd, F_SETLKW, &f);
+  signal(SIGALRM,timeout);
+  alarm(300);
+  r=fcntl(fd, F_SETLKW, &f);
+  alarm(0);
+  signal(SIGALRM,SIG_DFL);
+
+  return r;
 #endif
 }

Reply via email to