--On Sunday, August 1, 2004 11:25 AM -0400 Bill Stoddard <[EMAIL PROTECTED]> wrote:

Too many changes in one patch. Break this up into multiple consumable in 15
minute patches and I'll review them.

Some more work, analysis, and tests yielded apr_file_gets() and MD5 as two more bottlenecks.


I've already committed a fix to APR HEAD for apr_file_gets() - what it would do was constantly call apr_file_read() for one byte - but there was an enormous overhead implied in that. And, mod_disk_cache was relying upon that to load the headers from the disk. That was a super-big bottleneck and is responsible for the big jump here in performance. Modules like mod_negotiation also go *a lot* faster after that change.

The other bottleneck I looked at was MD5 as the on-disk naming scheme. I think MD5 is a poor choice here because it's not very fast. However, we were calling MD5 *twice* - one for the header and one for the data file - and each had the same MD5 input! So, I implemented a lazy caching routine there. Ideally, switching to a variant of the times-33 hash might work out better. *shrug*

Here's some completely unscientific benchmarks on my Mac (flood/localhost):

No cache:       Requests: 3500 Time: 27.02 Req/Sec: 129.66
mod_mem_cache:  Requests: 3500 Time: 25.18 Req/Sec: 139.23
mod_disk_cache: Requests: 3500 Time: 13.58 Req/Sec: 257.83

(My test case is using mod_autoindex, mod_negotiation, and small files.)

* modules/experimental/mod_disk_cache.c: Don't call MD5 twice for the same value.

Index: modules/experimental/mod_disk_cache.c
===================================================================
RCS file: /home/cvs/httpd-2.0/modules/experimental/mod_disk_cache.c,v
retrieving revision 1.52
diff -u -r1.52 mod_disk_cache.c
--- modules/experimental/mod_disk_cache.c       18 Mar 2004 21:40:12 -0000      1.52
+++ modules/experimental/mod_disk_cache.c       2 Aug 2004 10:03:23 -0000
@@ -36,6 +36,7 @@
#endif
    char *datafile;          /* name of file where the data will go */
    char *hdrsfile;          /* name of file where the hdrs will go */
+    char *hashfile;          /* Computed hash key for this URI */
    char *name;
    apr_time_t version;      /* update count of the file */
    apr_file_t *fd;          /* data file */
@@ -88,20 +89,26 @@
 */
#define CACHE_HEADER_SUFFIX ".header"
#define CACHE_DATA_SUFFIX   ".data"
-static char *header_file(apr_pool_t *p, int dirlevels, int dirlength,
-                         const char *root, const char *name)
+static char *header_file(apr_pool_t *p, disk_cache_conf *conf,
+                         disk_cache_object_t *dobj, const char *name)
{
-    char *hashfile;
-    hashfile = generate_name(p, dirlevels, dirlength, name);
-    return apr_pstrcat(p, root, "/", hashfile, CACHE_HEADER_SUFFIX, NULL);
+    if (!dobj->hashfile) {
+        dobj->hashfile = generate_name(p, conf->dirlevels, conf->dirlength,
+                                       name);
+    }
+    return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
+                       CACHE_HEADER_SUFFIX, NULL);
}

-static char *data_file(apr_pool_t *p, int dirlevels, int dirlength,
-                       const char *root, const char *name)
+static char *data_file(apr_pool_t *p, disk_cache_conf *conf,
+                       disk_cache_object_t *dobj, const char *name)
{
-    char *hashfile;
-    hashfile = generate_name(p, dirlevels, dirlength, name);
-    return apr_pstrcat(p, root, "/", hashfile, CACHE_DATA_SUFFIX, NULL);
+    if (!dobj->hashfile) {
+        dobj->hashfile = generate_name(p, conf->dirlevels, conf->dirlength,
+                                       name);
+    }
+    return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
+                       CACHE_DATA_SUFFIX, NULL);
}

static void mkdir_structure(disk_cache_conf *conf, char *file, apr_pool_t *pool)
@@ -136,8 +143,7 @@
if (dobj->fd) {
apr_file_flush(dobj->fd);
if (!dobj->datafile) {
- dobj->datafile = data_file(r->pool, conf->dirlevels, conf->dirlength,
- conf->cache_root, h->cache_obj->key);
+ dobj->datafile = data_file(r->pool, conf, dobj, h->cache_obj->key);
}
/* Remove old file with the same name. If remove fails, then
* perhaps we need to create the directory tree where we are
@@ -362,8 +368,6 @@
static int error_logged = 0;
disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
&disk_cache_module);
- char *data;
- char *headers;
apr_file_t *fd;
apr_file_t *hfd;
apr_finfo_t finfo;
@@ -387,44 +391,39 @@
return DECLINED;
}


-    data = data_file(r->pool, conf->dirlevels, conf->dirlength,
-                     conf->cache_root, key);
-    headers = header_file(r->pool, conf->dirlevels, conf->dirlength,
-                          conf->cache_root, key);
+    /* Create and init the cache object */
+    h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(cache_object_t));
+    obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(disk_cache_object_t));
+
+    info = &(obj->info);
+    obj->key = (char *) key;
+    dobj->name = (char *) key;
+    dobj->datafile = data_file(r->pool, conf, dobj, key);
+    dobj->hdrsfile = header_file(r->pool, conf, dobj, key);

    /* Open the data file */
-    rc = apr_file_open(&fd, data, APR_READ|APR_BINARY, 0, r->pool);
+    rc = apr_file_open(&dobj->fd, dobj->datafile, APR_READ|APR_BINARY, 0,
+                       r->pool);
    if (rc != APR_SUCCESS) {
        /* XXX: Log message */
        return DECLINED;
    }

    /* Open the headers file */
-    rc = apr_file_open(&hfd, headers, APR_READ|APR_BINARY, 0, r->pool);
+    rc = apr_file_open(&dobj->hfd, dobj->hdrsfile, APR_READ|APR_BINARY, 0,
+                       r->pool);
    if (rc != APR_SUCCESS) {
        /* XXX: Log message */
        return DECLINED;
    }

-    /* Create and init the cache object */
-    h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(cache_object_t));
-    obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(disk_cache_object_t));
-
-    info = &(obj->info);
-    obj->key = (char *) key;
-    dobj->name = (char *) key;
-    dobj->fd = fd;
-    dobj->hfd = hfd;
-    dobj->datafile = data;
-    dobj->hdrsfile = headers;
-
-    rc = apr_file_info_get(&finfo, APR_FINFO_SIZE, fd);
+    rc = apr_file_info_get(&finfo, APR_FINFO_SIZE, dobj->fd);
    if (rc == APR_SUCCESS) {
        dobj->file_size = finfo.size;
    }

    /* Read the bytes to setup the cache_info fields */
-    rc = file_cache_read_mydata(hfd, info, dobj);
+    rc = file_cache_read_mydata(dobj->hfd, info, dobj);
    if (rc != APR_SUCCESS) {
        /* XXX log message */
        return DECLINED;
@@ -544,10 +543,7 @@

    if (!hfd)  {
        if (!dobj->hdrsfile) {
-            dobj->hdrsfile = header_file(r->pool,
-                                         conf->dirlevels,
-                                         conf->dirlength,
-                                         conf->cache_root,
+            dobj->hdrsfile = header_file(r->pool, conf, dobj,
                                         h->cache_obj->key);
        }

Reply via email to