As I mentioned earlier on the train... the patch I wrote last night
to avoid segfaults when cyr_expire hit broken cyrus.cache files.

We had maybe 30 of them spread amongst our machines, and each one
would cause a segfault in cyr_expire.  Unfortunately, there wasn't
enough information being logged to see which mailbox caused the issue.

I could use the last "cyr_expire.*expunged $n messages from $mailbox"
message in the syslog to get a rough idea, then use the -p option to
run over the same area and strace the process.  Rather crap though,
and still segfaulting.

So I wrote the other bit to log the error and keep going instead.
Along with our monitoring this means that the weekly cyr_expire
runs will continue to work fine (Sunday was low-IO day, not so much
now we run cyr_expire there!) and on Monday I can read my emails and
go clean up the affected mailboxes.

Later once I have a bit more data I can try and figure out why the
corruption - but I suspect it's mainly crashes in the past.  Hopefully
this is a once-off job anyway.

I think the "stop segfaults" code is worth inclusion:

            for (cache_ent = 0; cache_ent < NUM_CACHE_FIELDS; cache_ent++) {
                cacheitem = CACHE_ITEM_NEXT(cacheitem);
+               if ((cacheitem < (mailbox->cache_base + cache_offset)) ||
+                   (cacheitem > (mailbox->cache_base + mailbox->cache_len))) {
+                   syslog(LOG_ERR, "IOERROR: reading cache record for %s: got 
bogus offset %d for %u/%lu; try reconstruct",
+                                mailbox->name, cacheitem - 
(mailbox->cache_base + cache_offset), msgno, mailbox->exists);
+                   return IMAP_IOERROR;
+               }
            }


The -p option - well, it's handy for cleaning up just one 
user's expunged files if they have something sensitive in
there, or are wasting particularly much disk space for
whatever reason and you don't want the IO hit/time use of
running a complete cyr_expire run.  Also you can run with
different options (we use -X 7 -D 7 -E 1 usually) to clean
that mailbox up early.

http://cyrus.brong.fastmail.fm is updated with this patch too.

Bron.
Index: cyrus-imapd-2.3.9/imap/cyr_expire.c
===================================================================
--- cyrus-imapd-2.3.9.orig/imap/cyr_expire.c	2007-09-02 08:05:41.000000000 -0400
+++ cyrus-imapd-2.3.9/imap/cyr_expire.c	2007-09-02 08:06:39.000000000 -0400
@@ -75,7 +75,7 @@
 void usage(void)
 {
     fprintf(stderr,
-	    "cyr_expire [-C <altconfig>] -E <days> [-X <expunge-days>] [-a] [-v]\n");
+	    "cyr_expire [-C <altconfig>] -E <days> [-X <expunge-days>] [-p prefix] [-a] [-v]\n");
     exit(-1);
 }
 
@@ -350,6 +350,7 @@
     extern char *optarg;
     int opt, r = 0, expire_days = 0, expunge_days = 0, delete_days = 0;
     char *alt_config = NULL;
+    char *find_prefix = NULL;
     char buf[100];
     struct hash_table expire_table;
     struct expire_rock erock;
@@ -364,7 +365,7 @@
     memset(&erock, 0, sizeof(erock));
     memset(&drock, 0, sizeof(drock));
 
-    while ((opt = getopt(argc, argv, "C:D:E:X:va")) != EOF) {
+    while ((opt = getopt(argc, argv, "C:D:E:X:p:va")) != EOF) {
 	switch (opt) {
 	case 'C': /* alt config file */
 	    alt_config = optarg;
@@ -385,6 +386,10 @@
 	    expunge_days = atoi(optarg);
 	    break;
 
+	case 'p':
+	    find_prefix = optarg;
+	    break;
+
 	case 'v':
 	    erock.verbose++;
 	    drock.verbose++;
@@ -438,7 +443,11 @@
 		expunge_days);
     }
 
-    strlcpy(buf, "*", sizeof(buf));
+    if (find_prefix) {
+	strlcpy(buf, find_prefix, sizeof(buf));
+    } else {
+	strlcpy(buf, "*", sizeof(buf));
+    }
     mboxlist_findall(NULL, buf, 1, 0, 0, &expire, &erock);
 
     syslog(LOG_NOTICE, "expunged %lu out of %lu messages from %lu mailboxes",
Index: cyrus-imapd-2.3.9/imap/mailbox.c
===================================================================
--- cyrus-imapd-2.3.9.orig/imap/mailbox.c	2007-09-02 08:05:40.000000000 -0400
+++ cyrus-imapd-2.3.9/imap/mailbox.c	2007-09-02 09:27:50.000000000 -0400
@@ -1938,6 +1938,12 @@
 	    cacheitembegin = cacheitem = mailbox->cache_base + cache_offset;
 	    for (cache_ent = 0; cache_ent < NUM_CACHE_FIELDS; cache_ent++) {
 		cacheitem = CACHE_ITEM_NEXT(cacheitem);
+		if ((cacheitem < (mailbox->cache_base + cache_offset)) || 
+		    (cacheitem > (mailbox->cache_base + mailbox->cache_len))) {
+		    syslog(LOG_ERR, "IOERROR: reading cache record for %s: got bogus offset %d for %u/%lu; try reconstruct",
+				 mailbox->name, cacheitem - (mailbox->cache_base + cache_offset), msgno, mailbox->exists);
+		    return IMAP_IOERROR;
+		}
 	    }
 	    cache_record_size = (cacheitem - cacheitembegin);
 	    *new_cache_total_size += cache_record_size;
Index: cyrus-imapd-2.3.9/man/cyr_expire.8
===================================================================
--- cyrus-imapd-2.3.9.orig/man/cyr_expire.8	2007-09-02 08:05:40.000000000 -0400
+++ cyrus-imapd-2.3.9/man/cyr_expire.8	2007-09-02 08:06:39.000000000 -0400
@@ -56,6 +56,9 @@
 .BI \-X " expunge-days"
 ]
 [
+.B \-p " mailbox-prefix"
+]
+[
 .B \-v
 ]
 .SH DESCRIPTION
@@ -102,6 +105,10 @@
 (when using the "delayed" expunge mode).  The default is 0 (zero)
 days, which will expunge \fBall\fR previously deleted messages.
 .TP
+\fB\-p \fImailbox-prefix\fR
+Only find mailboxes starting with this prefix.  e.g.
+"user.justgotspammedlots"
+.TP
 .B \-v
 Enable verbose output.
 .TP

Reply via email to