Too many changes in one patch. Break this up into multiple consumable in 15 minute patches and I'll review them.
Some more work, analysis, and tests yielded apr_file_gets() and MD5 as two more bottlenecks.
I've already committed a fix to APR HEAD for apr_file_gets() - what it would do was constantly call apr_file_read() for one byte - but there was an enormous overhead implied in that. And, mod_disk_cache was relying upon that to load the headers from the disk. That was a super-big bottleneck and is responsible for the big jump here in performance. Modules like mod_negotiation also go *a lot* faster after that change.
The other bottleneck I looked at was MD5 as the on-disk naming scheme. I think MD5 is a poor choice here because it's not very fast. However, we were calling MD5 *twice* - one for the header and one for the data file - and each had the same MD5 input! So, I implemented a lazy caching routine there. Ideally, switching to a variant of the times-33 hash might work out better. *shrug*
Here's some completely unscientific benchmarks on my Mac (flood/localhost):
No cache: Requests: 3500 Time: 27.02 Req/Sec: 129.66 mod_mem_cache: Requests: 3500 Time: 25.18 Req/Sec: 139.23 mod_disk_cache: Requests: 3500 Time: 13.58 Req/Sec: 257.83
(My test case is using mod_autoindex, mod_negotiation, and small files.)
* modules/experimental/mod_disk_cache.c: Don't call MD5 twice for the same value.
Index: modules/experimental/mod_disk_cache.c =================================================================== RCS file: /home/cvs/httpd-2.0/modules/experimental/mod_disk_cache.c,v retrieving revision 1.52 diff -u -r1.52 mod_disk_cache.c --- modules/experimental/mod_disk_cache.c 18 Mar 2004 21:40:12 -0000 1.52 +++ modules/experimental/mod_disk_cache.c 2 Aug 2004 10:03:23 -0000 @@ -36,6 +36,7 @@ #endif char *datafile; /* name of file where the data will go */ char *hdrsfile; /* name of file where the hdrs will go */ + char *hashfile; /* Computed hash key for this URI */ char *name; apr_time_t version; /* update count of the file */ apr_file_t *fd; /* data file */ @@ -88,20 +89,26 @@ */ #define CACHE_HEADER_SUFFIX ".header" #define CACHE_DATA_SUFFIX ".data" -static char *header_file(apr_pool_t *p, int dirlevels, int dirlength, - const char *root, const char *name) +static char *header_file(apr_pool_t *p, disk_cache_conf *conf, + disk_cache_object_t *dobj, const char *name) { - char *hashfile; - hashfile = generate_name(p, dirlevels, dirlength, name); - return apr_pstrcat(p, root, "/", hashfile, CACHE_HEADER_SUFFIX, NULL); + if (!dobj->hashfile) { + dobj->hashfile = generate_name(p, conf->dirlevels, conf->dirlength, + name); + } + return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile, + CACHE_HEADER_SUFFIX, NULL); }
-static char *data_file(apr_pool_t *p, int dirlevels, int dirlength, - const char *root, const char *name) +static char *data_file(apr_pool_t *p, disk_cache_conf *conf, + disk_cache_object_t *dobj, const char *name) { - char *hashfile; - hashfile = generate_name(p, dirlevels, dirlength, name); - return apr_pstrcat(p, root, "/", hashfile, CACHE_DATA_SUFFIX, NULL); + if (!dobj->hashfile) { + dobj->hashfile = generate_name(p, conf->dirlevels, conf->dirlength, + name); + } + return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile, + CACHE_DATA_SUFFIX, NULL); }
static void mkdir_structure(disk_cache_conf *conf, char *file, apr_pool_t *pool)
@@ -136,8 +143,7 @@
if (dobj->fd) {
apr_file_flush(dobj->fd);
if (!dobj->datafile) {
- dobj->datafile = data_file(r->pool, conf->dirlevels, conf->dirlength,
- conf->cache_root, h->cache_obj->key);
+ dobj->datafile = data_file(r->pool, conf, dobj, h->cache_obj->key);
}
/* Remove old file with the same name. If remove fails, then
* perhaps we need to create the directory tree where we are
@@ -362,8 +368,6 @@
static int error_logged = 0;
disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
&disk_cache_module);
- char *data;
- char *headers;
apr_file_t *fd;
apr_file_t *hfd;
apr_finfo_t finfo;
@@ -387,44 +391,39 @@
return DECLINED;
}
- data = data_file(r->pool, conf->dirlevels, conf->dirlength, - conf->cache_root, key); - headers = header_file(r->pool, conf->dirlevels, conf->dirlength, - conf->cache_root, key); + /* Create and init the cache object */ + h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(cache_object_t)); + obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(disk_cache_object_t)); + + info = &(obj->info); + obj->key = (char *) key; + dobj->name = (char *) key; + dobj->datafile = data_file(r->pool, conf, dobj, key); + dobj->hdrsfile = header_file(r->pool, conf, dobj, key);
/* Open the data file */ - rc = apr_file_open(&fd, data, APR_READ|APR_BINARY, 0, r->pool); + rc = apr_file_open(&dobj->fd, dobj->datafile, APR_READ|APR_BINARY, 0, + r->pool); if (rc != APR_SUCCESS) { /* XXX: Log message */ return DECLINED; }
/* Open the headers file */ - rc = apr_file_open(&hfd, headers, APR_READ|APR_BINARY, 0, r->pool); + rc = apr_file_open(&dobj->hfd, dobj->hdrsfile, APR_READ|APR_BINARY, 0, + r->pool); if (rc != APR_SUCCESS) { /* XXX: Log message */ return DECLINED; }
- /* Create and init the cache object */ - h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(cache_object_t)); - obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(disk_cache_object_t)); - - info = &(obj->info); - obj->key = (char *) key; - dobj->name = (char *) key; - dobj->fd = fd; - dobj->hfd = hfd; - dobj->datafile = data; - dobj->hdrsfile = headers; - - rc = apr_file_info_get(&finfo, APR_FINFO_SIZE, fd); + rc = apr_file_info_get(&finfo, APR_FINFO_SIZE, dobj->fd); if (rc == APR_SUCCESS) { dobj->file_size = finfo.size; }
/* Read the bytes to setup the cache_info fields */ - rc = file_cache_read_mydata(hfd, info, dobj); + rc = file_cache_read_mydata(dobj->hfd, info, dobj); if (rc != APR_SUCCESS) { /* XXX log message */ return DECLINED; @@ -544,10 +543,7 @@
if (!hfd) { if (!dobj->hdrsfile) { - dobj->hdrsfile = header_file(r->pool, - conf->dirlevels, - conf->dirlength, - conf->cache_root, + dobj->hdrsfile = header_file(r->pool, conf, dobj, h->cache_obj->key); }