Hello,
I have done my planned speedup for _ic_fetch() for 2.0.1. It is
attached. Note that this is a patch against 2.0, NOT cvs-head. (Whether
dbmysql.patch is applied or not doesn't matter).
I have switched to a more correct way of creating patches; apply with
patch -p1, inside the dbmail directory.
The code is extensively commented (search for "/*MR" to read my
comments), and has sanity checks in place that fall back to the old way.
After some testing and some "peer review" I think it could go into
2.0.1.
Unfortunately, while it is exactly what I was planning, and there is
only one query per message now, the speedup turned out to be way smaller
than I expected. Previously it fetched 20 messages per second on my
machine; now it fetches about 30 messages per second.
I have a further idea on fixing that, but it's a "HACK HACK HACK" thing.
I'll try it and then tell you more ;)
Yours, Mikhail Ramendik
diff -ur dbmail-orig/imapcommands.c dbmail-new/imapcommands.c
--- dbmail-orig/imapcommands.c 2004-10-26 14:39:45.000000000 +0400
+++ dbmail-new/imapcommands.c 2004-10-26 16:47:05.000000000 +0400
@@ -1,6 +1,8 @@
/*
Copyright (C) 1999-2004 IC & S [EMAIL PROTECTED]
+ Modified by Mikhail Ramendik [EMAIL PROTECTED] (MR)
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either
@@ -2146,6 +2148,11 @@
int no_parsing_at_all = 1, getrfcsize = 0, getinternaldate =
0, getflags = 0;
char *lastchar = NULL;
+/*MR added this*/
+ int we_have_prefetched_data;
+ u64_t index_in_msginfo, index_in_msginfo_next;
+/*MR end of addition*/
+
memset(&fetch_list, 0, sizeof(fetch_list));
memset(&headermsg, 0, sizeof(headermsg));
@@ -2198,17 +2205,30 @@
return 1;
}
- if (fetchitem.msgparse_needed) {
- no_parsing_at_all = 0;
- } else if (no_parsing_at_all) {
- if (fetchitem.getFlags)
- getflags = 1;
- if (fetchitem.getSize)
- getrfcsize = 1;
- if (fetchitem.getInternalDate)
- getinternaldate = 1;
- /* the msgUID will be retrieved anyway so if it is requested, it will be fetched */
- }
+/*MR here the changes start.
+
+ The idea is simple. The code that gets the items that don't require parsing -
+ flags, rfcsize, internaldate - in one quesry was there before, for the
+ no_parsing_at_all case. Now we use it for all cases, instead of getting this data
+ with separate queries for every message. I'll call this code "prefetch".
+
+ These messages theoretically should not change between the prefetch and the message
+ fetch. We alreasy have a set of UIDs in ud->mailbox.seq_list, and it SHOULD be the same.
+ But just to make sure, we will do a sanity check anyway, and so the system will survive
+ an inserted or appended message. (And log the fact).
+*/
+
+ if (fetchitem.msgparse_needed) no_parsing_at_all = 0;
+
+ /*MR as we use the prefetch code in all cases, we need the values for it in all cases as well*/
+ if (fetchitem.getFlags)
+ getflags = 1;
+ if (fetchitem.getSize)
+ getrfcsize = 1;
+ if (fetchitem.getInternalDate)
+ getinternaldate = 1;
+ /* the msgUID will be retrieved anyway so if it is requested, it will be fetched */
+
if (fetchitem.msgparse_needed && only_main_header_parsing) {
/* check to see wether all the information can be retrieved from the
@@ -2334,43 +2354,49 @@
fetch_end--;
}
- if (no_parsing_at_all) {
- trace(TRACE_DEBUG,
- "ic_fetch(): no parsing at all");
- /* all the info we need can be retrieved by a single
- * call to db_get_msginfo_range()
- */
- if (!imapcommands_use_uid) {
- /* find the msgUID's to use */
- lo = ud->mailbox.seq_list[fetch_start];
- hi = ud->mailbox.seq_list[fetch_end];
+/*MR here is the prefetch code. It was under if (no_parsing_at_all) ; it's not under it now.*/
- } else {
- lo = fetch_start;
- hi = fetch_end;
- }
- /* (always retrieve uid) */
- result =
- db_get_msginfo_range(lo, hi, ud->mailbox.uid,
- getflags, getinternaldate,
- getrfcsize, 1, &msginfo,
- &nmatching);
+ /* all the info we need can be retrieved by a single
+ * call to db_get_msginfo_range()
+ MR: we need this info anyway so let's retrieve it*/
+ if (!imapcommands_use_uid) {
+ /* find the msgUID's to use */
+ lo = ud->mailbox.seq_list[fetch_start];
+ hi = ud->mailbox.seq_list[fetch_end];
+ } else {
+ lo = fetch_start;
+ hi = fetch_end;
+ }
- if (result == -1) {
- list_freelist(&fetch_list.start);
- ci_write(ci->tx,
- "* BYE internal dbase error\r\n");
- return -1;
- }
+ /* (always retrieve uid) */
+ /*MR IMPORTANT WARNING. My code will depend on the msginfo data being ordered by UID,
+ ascending. This is what db_get_msginfo_range currently does. If it's not su, stuff
+ won't break but will fall back to the old method*/
+ result =
+ db_get_msginfo_range(lo, hi, ud->mailbox.uid,
+ getflags, getinternaldate,
+ getrfcsize, 1, &msginfo,
+ &nmatching);
+ if (result == -1) {
+ list_freelist(&fetch_list.start);
+ ci_write(ci->tx,
+ "* BYE internal dbase error\r\n");
+ return -1;
+ }
- if (result == -2) {
- list_freelist(&fetch_list.start);
- ci_write(ci->tx, "* BYE out of memory\r\n");
- return -1;
- }
+ if (result == -2) {
+ list_freelist(&fetch_list.start);
+ ci_write(ci->tx, "* BYE out of memory\r\n");
+ return -1;
+ }
+/*MR The following code is indeed for the non-parsed case*/
+ if (no_parsing_at_all) {
+ trace(TRACE_DEBUG,
+ "ic_fetch(): no parsing at all");
+
for (i = 0; i < nmatching; i++) {
if (getrfcsize && msginfo[i].rfcsize == 0) {
/* parse the message to calc the size */
@@ -2519,11 +2545,31 @@
}
/* if there is no parsing at all, this loop is not needed */
+/*MR this is where my main changes lie. We have msginfo already, with the data for
+ all these messages. But, we introduce a sanity check here; if the uid in the
+ msginfo is wrong, we get the data the old way.
+
+ Note: we depend on two things. First, that the data in msginfo is arranged by
+ UID ascending - as the current code in db_get_msginfo_range() does. And second,
+ if we are fetching by sequential number not UID, then for a larger sequential
+ number the UID will also be larger. This is what RFC3501 says.
+
+ If any of these dependencies break, the code is likely to work the old slow way.
+ But it should not break anyway.
+*/
+ index_in_msginfo_next = 0;
+/*MR index_in_msginfo will hold the current index in msginfo
+ index_in_msginfo_next will be the value for the next loop run. We could determine it at the
+ end of the previous loop run - but I really can't be sure where the loop ends in this code.
+ I'd rather waste some RAM bytes than risk a hidden bug*/
+
for (i = fetch_start; i <= fetch_end && !no_parsing_at_all;
i++) {
thisnum =
(imapcommands_use_uid ? i : ud->mailbox.
seq_list[i]);
+ /*MR the code was present before but is very useful for my purpose.
+ thisnum now holds the uid, so we'll use it for the sanity check*/
insert_rfcsize = 0;
if (imapcommands_use_uid) {
@@ -2557,8 +2603,34 @@
bad_response_send = 0;
isfirstfetchout = 1;
-
- while (curr && !bad_response_send) {
+
+ /*MR index_in_msginfo becomes what it should be for this loop run*/
+ index_in_msginfo = index_in_msginfo_next;
+
+ /*MR here is the sanity check*/
+ if (index_in_msginfo < nmatching) {
+ /*MR at least the index is valid, let's check the UID*/
+ if (thisnum == msginfo[index_in_msginfo].uid) {
+ /*MR all OK*/
+ we_have_prefetched_data = 1;
+ } else {
+ /*MR sanity check failed!*/
+ trace(TRACE_DEBUG,
+ "Prefetched UID not the same as real UID, retrieving data separately for this message");
+ we_have_prefetched_data = 0;
+ };
+ } else {
+ /*MR the index is invalid */
+ trace(TRACE_DEBUG,
+ "There are more real messages than prefetched data, retrieving data separately for this message");
+ we_have_prefetched_data = 0;
+ };
+
+ /*MR if the current index_in_msginfo is used, then use the next one for the next loop run,
+ otherwise keep the same */
+ index_in_msginfo_next = (we_have_prefetched_data ? index_in_msginfo+1 : index_in_msginfo);
+
+ while (curr && !bad_response_send) {
fi = (fetch_items_t *) curr->data;
fflush(ci->tx);
@@ -2566,18 +2638,23 @@
/* check RFC822.SIZE request */
if (fi->getSize) {
- /* ok, try to fetch size from dbase */
- if (db_get_rfcsize
- (thisnum, ud->mailbox.uid,
- &rfcsize) == -1) {
- ci_write(ci->tx,
- "\r\n* BYE internal dbase error\r\n");
- list_freelist(&fetch_list.
- start);
- return -1;
- }
-
- if (rfcsize == 0) {
+ /*MR if we do have prefetched data, let's use it. If not, let's query*/
+
+ if (we_have_prefetched_data) {
+ rfcsize = msginfo[index_in_msginfo].rfcsize;
+ } else {
+ /* ok, try to fetch size from dbase */
+ if (db_get_rfcsize
+ (thisnum, ud->mailbox.uid,
+ &rfcsize) == -1) {
+ ci_write(ci->tx,
+ "\r\n* BYE internal dbase error\r\n");
+ list_freelist(&fetch_list.start);
+ return -1;
+ };
+ };
+
+ if (rfcsize == 0) {
/* field is empty in dbase, message needs
to be parsed */
fi->msgparse_needed = 1;
@@ -2701,17 +2778,21 @@
}
if (fi->getInternalDate) {
- result =
- db_get_msgdate(ud->mailbox.uid,
+ /*MR if we do have prefetched data, let's use it. If not, let's query*/
+ if (we_have_prefetched_data) {
+ strncpy(date,msginfo[index_in_msginfo].internaldate,IMAP_INTERNALDATE_LEN);
+ } else {
+ result =
+ db_get_msgdate(ud->mailbox.uid,
thisnum, date);
- if (result == -1) {
- ci_write(ci->tx,
- "\r\n* BYE internal dbase error\r\n");
- list_freelist(&fetch_list.
- start);
- db_free_msg(&headermsg);
- return -1;
- }
+ if (result == -1) {
+ ci_write(ci->tx,
+ "\r\n* BYE internal dbase error\r\n");
+ list_freelist(&fetch_list.start);
+ db_free_msg(&headermsg);
+ return -1;
+ };
+ };
if (isfirstfetchout)
isfirstfetchout = 0;
@@ -3768,19 +3849,27 @@
isfirstout = 1;
- result =
- db_get_msgflag_all(thisnum,
+ /*MR if we do have prefetched data, let's use it. If not, let's query*/
+
+ if (we_have_prefetched_data) {
+ for (j = 0; j < IMAP_NFLAGS; j++) {
+ msgflags[j]=msginfo[index_in_msginfo].flags[j];
+ };
+ } else {
+ result =
+ db_get_msgflag_all(thisnum,
ud->mailbox.
uid,
msgflags);
- if (result == -1) {
- ci_write(ci->tx,
- "\r\n* BYE internal dbase error\r\n");
- list_freelist(&fetch_list.
+ if (result == -1) {
+ ci_write(ci->tx,
+ "\r\n* BYE internal dbase error\r\n");
+ list_freelist(&fetch_list.
start);
- db_free_msg(&headermsg);
- return -1;
- }
+ db_free_msg(&headermsg);
+ return -1;
+ };
+ };
for (j = 0; j < IMAP_NFLAGS; j++) {
if (msgflags[j]) {