Some time ago I complained about very slow access to compressed mboxes. Unfortunately it looks like that it is very little interest in it, so I have to investigate some things by myself.
Firstly: some rationale. Why do I prefer use mbox/maildir over mdbox. Short answer "bus factor" for support mdbox (not only dovecot) Longer answer: if something goes wrong withm maildir/mbox i can use other tools (mutt, or formail or even text editor) and with mdbox ... I am not ISP, I use dovecot as a "gateway" to my (rather huge) mail archive. Most of these mails are rather valuable for me, so I prefer use something "well-known-and-tested". (I can't do like most ISP's do: write in "Terms of Service" that mail can be lost or damaged and we give no warranty :) ) So then: Below my patch. It contains 2 changes: 1. when buffer is compressed, we try to save last marked offset. 2. Increase temporary buffer for decompression. without these changes 1.5 GB of bzip compressed mbox with ~20K messages can't be open in 1.5 day After applying 1. change it can be open in ~1.5 h With both changes it was a few minutes. Maybe it is a good idea to add config parameter to specify size of decompress buffer? Patch is against v2.0.18
diff -x '*.o' -x '*.lo' -x '*.la' -u -r ../dovecot-2.0.18/src/lib/istream.c ./src/lib/istream.c --- ../dovecot-2.0.18/src/lib/istream.c 2011-12-13 12:38:27.000000000 +0100 +++ ./src/lib/istream.c 2012-04-14 10:27:23.790724625 +0200 @@ -452,6 +452,22 @@ stream->pos -= stream->skip; stream->skip = 0; + +} + +void i_stream_compress1(struct istream_private *stream, size_t bytes ) +{ + + size_t lskip ; + + lskip = (stream->skip > bytes ? bytes : stream->skip ); + + memmove(stream->w_buffer, stream->w_buffer + lskip , + stream->pos - lskip); + stream->pos -= lskip; + stream->skip -= lskip; + + } void i_stream_grow_buffer(struct istream_private *stream, size_t bytes) diff -x '*.o' -x '*.lo' -x '*.la' -u -r ../dovecot-2.0.18/src/lib/istream-internal.h ./src/lib/istream-internal.h --- ../dovecot-2.0.18/src/lib/istream-internal.h 2011-12-13 12:38:27.000000000 +0100 +++ ./src/lib/istream-internal.h 2012-04-13 00:06:27.700298378 +0200 @@ -51,6 +51,7 @@ i_stream_create(struct istream_private *stream, struct istream *parent, int fd); void i_stream_compress(struct istream_private *stream); +void i_stream_compress1(struct istream_private *stream, size_t bytes ); void i_stream_grow_buffer(struct istream_private *stream, size_t bytes); bool i_stream_get_buffer_space(struct istream_private *stream, size_t wanted_size, size_t *size_r); diff -x '*.o' -x '*.lo' -x '*.la' -u -r ../dovecot-2.0.18/src/plugins/zlib/istream-bzlib.c ./src/plugins/zlib/istream-bzlib.c --- ../dovecot-2.0.18/src/plugins/zlib/istream-bzlib.c 2012-02-09 18:32:48.000000000 +0100 +++ ./src/plugins/zlib/istream-bzlib.c 2012-04-14 10:35:04.349800777 +0200 @@ -9,12 +9,14 @@ #include <bzlib.h> #define CHUNK_SIZE (1024*64) +#define BUFF_SIZE (1024*1024*16) struct bzlib_istream { struct istream_private istream; - + bz_stream zs; uoff_t eof_offset, stream_size; + uoff_t marked_offset; size_t prev_size, high_pos; struct stat last_parent_statbuf; @@ -48,7 +50,6 @@ uoff_t high_offset; size_t size; int ret; - high_offset = stream->istream.v_offset + (stream->pos - stream->skip); if (zstream->eof_offset == high_offset) { i_assert(zstream->high_pos == 0 || @@ -87,7 +88,14 @@ if (stream->pos == stream->buffer_size) { if (stream->skip > 0) { /* lose our buffer cache */ - i_stream_compress(stream); + /* try to save our buffer cache as much as possible */ + + if (zstream->marked && (stream-> skip - (stream->istream.v_offset - zstream->marked_offset)) >0 ){ + + i_stream_compress1(stream, stream-> skip - (stream->istream.v_offset - zstream->marked_offset)); + } else { + i_stream_compress(stream); + } } if (stream->pos == stream->buffer_size) @@ -215,8 +223,12 @@ struct bzlib_istream *zstream = (struct bzlib_istream *) stream; uoff_t start_offset = stream->istream.v_offset - stream->skip; + if (mark) + zstream->marked_offset = v_offset; if (v_offset < start_offset) { /* have to seek backwards */ + + i_stream_bzlib_reset(zstream); start_offset = 0; } else if (zstream->high_pos != 0) { @@ -243,6 +255,7 @@ } i_stream_skip(&stream->istream, avail); + } while (i_stream_read(&stream->istream) >= 0); if (stream->istream.v_offset != v_offset) { @@ -260,8 +273,11 @@ } } - if (mark) + if (mark){ zstream->marked = TRUE; + zstream->marked_offset = v_offset; + } + } static const struct stat * @@ -329,7 +345,9 @@ i_stream_bzlib_init(zstream); zstream->istream.iostream.close = i_stream_bzlib_close; - zstream->istream.max_buffer_size = input->real_stream->max_buffer_size; + // zstream->istream.max_buffer_size = (input->real_stream->max_buffer_size); + zstream->istream.max_buffer_size = BUFF_SIZE; + zstream->istream.read = i_stream_bzlib_read; zstream->istream.seek = i_stream_bzlib_seek; zstream->istream.stat = i_stream_bzlib_stat;
-- Gdyby ktoś miał zbędny Toshiba G450 - to chętnie przejmę ;) < asuffield> a workstation is anything you can stick on somebodies desk and con them into using -- in #debian-devel