Niklas Edmundsson wrote:
> On Sat, 30 Sep 2006, Davi Arnaut wrote:
>
>> Hi,
>>
>> Wouldn't you avoid a lot of complexity in this patch
>> if you just deleted from the brigade the implicitly
>> created heap buckets while reading file buckets ?
>>
>> Something like:
>>
>> store_body:
>> .. if (is_file_bucket(bucket))
>> copy_file_bucket(bucket, bb);
>
> Probably, but that doesn't allow for creating a thread/process that
> does the copying in the background, which is my long term goal.
>
> Also, simply doing bucket_delete like that means that the file will
> never be sent to the client, which is a bad thing IMO ;)
Shame on me, but I said "something like".. :)
I guess the attached patch does the same (plus mmap, et cetera) and is
much simpler. Comments ?
--
Davi Arnaut
Index: trunk/modules/cache/mod_disk_cache.c
===================================================================
--- trunk.orig/modules/cache/mod_disk_cache.c 2006-10-01 12:56:55.000000000
-0300
+++ trunk/modules/cache/mod_disk_cache.c 2006-10-01 12:57:34.000000000
-0300
@@ -984,126 +984,51 @@
return APR_SUCCESS;
}
+apr_status_t copy_file_bucket(apr_bucket *b, apr_file_t *fd, apr_off_t
*file_size)
+{
+ apr_size_t len;
+ const char *str;
+ apr_size_t written;
+ apr_status_t rv, rc;
+ apr_bucket *ref, *e, *s;
-static apr_status_t copy_body(apr_pool_t *p,
- apr_file_t *srcfd, apr_off_t srcoff,
- apr_file_t *destfd, apr_off_t destoff,
- apr_off_t len)
-{
- apr_status_t rc;
- apr_size_t size;
- apr_finfo_t finfo;
- apr_time_t starttime = apr_time_now();
-
- char *buf = apr_palloc(p, CACHE_BUF_SIZE);
- if (!buf) {
- return APR_ENOMEM;
- }
-
- if(srcoff != 0) {
- rc = apr_file_seek(srcfd, APR_SET, &srcoff);
- if(rc != APR_SUCCESS) {
- return rc;
- }
- }
+ rv = apr_bucket_copy(b, &e);
- if(destoff != 0) {
- rc = apr_file_seek(destfd, APR_SET, &destoff);
- if(rc != APR_SUCCESS) {
- return rc;
- }
+ if (rv != APR_SUCCESS) {
+ return rv;
}
- /* Tried doing this with mmap, but sendfile on Linux got confused when
- sending a file while it was being written to from an mmapped area.
- The traditional way seems to be good enough, and less complex.
- */
- while(len > 0) {
- size=MIN(len, CACHE_BUF_SIZE);
+ APR_BUCKET_INIT(e);
+ s = APR_BUCKET_NEXT(e);
- rc = apr_file_read_full (srcfd, buf, size, NULL);
- if(rc != APR_SUCCESS) {
- return rc;
- }
+ do {
+ rv = apr_bucket_read(e, &str, &len, APR_BLOCK_READ);
- rc = apr_file_write_full(destfd, buf, size, NULL);
- if(rc != APR_SUCCESS) {
- return rc;
+ if (rv != APR_SUCCESS && rv != APR_EOF) {
+ break;
}
- len -= size;
- }
-
- /* Check if file has changed during copying. This is not 100% foolproof
- due to NFS attribute caching when on NFS etc. */
- /* FIXME: Can we assume that we're always copying an entire file? In that
- case we can check if the current filesize matches the length
- we think it is */
- rc = apr_file_info_get(&finfo, APR_FINFO_MTIME, srcfd);
- if(rc != APR_SUCCESS) {
- return rc;
- }
- if(starttime < finfo.mtime) {
- return APR_EGENERAL;
- }
-
- return APR_SUCCESS;
-}
-
-
-static apr_status_t replace_brigade_with_cache(cache_handle_t *h,
- request_rec *r,
- apr_bucket_brigade *bb)
-{
- apr_status_t rv;
- int flags;
- apr_bucket *e;
- core_dir_config *pdcfg = ap_get_module_config(r->per_dir_config,
- &core_module);
- disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
- flags = APR_READ|APR_BINARY;
-#if APR_HAS_SENDFILE
- flags |= ((pdcfg->enable_sendfile == ENABLE_SENDFILE_OFF)
- ? 0 : APR_SENDFILE_ENABLED);
-#endif
+ rc = apr_file_write_full(fd, str, len, &written);
- rv = apr_file_open(&dobj->fd, dobj->datafile, flags, 0, r->pool);
- if (rv != APR_SUCCESS) {
- ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server,
- "disk_cache: Error opening datafile %s for URL %s",
- dobj->datafile, dobj->name);
- return rv;
- }
+ if (rc != APR_SUCCESS) {
+ rv = rc;
+ break;
+ }
- /* First, empty the brigade */
- e = APR_BRIGADE_FIRST(bb);
- while (e != APR_BRIGADE_SENTINEL(bb)) {
- apr_bucket *d;
- d = e;
+ ref = e;
+ *file_size += written;
e = APR_BUCKET_NEXT(e);
- apr_bucket_delete(d);
- }
+ apr_bucket_delete(ref);
+ } while (e != s && rv == APR_SUCCESS);
- /* Then, populate it with our cached instance */
- rv = recall_body(h, r->pool, bb);
- if (rv != APR_SUCCESS) {
- ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server,
- "disk_cache: Error serving URL %s from cache",
dobj->name);
- return rv;
- }
- ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
- "disk_cache: Serving cached body for URL %s", dobj->name);
-
- return APR_SUCCESS;
+ return (rv == APR_EOF) ? APR_SUCCESS : rv;
}
-
static apr_status_t store_body(cache_handle_t *h, request_rec *r,
apr_bucket_brigade *bb)
{
apr_bucket *e;
apr_status_t rv;
- int copy_file = FALSE;
disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
&disk_cache_module);
@@ -1129,129 +1054,62 @@
dobj->file_size = 0;
}
- /* Check if this is a complete single sequential file, eligable for
- * file copy.
- */
- if(dobj->file_size == 0 && APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(bb)))
- {
- apr_off_t begin = -1;
- apr_off_t pos = -1;
- apr_file_t *fd = NULL;
- apr_bucket_file *a;
-
- copy_file = TRUE;
-
- for (e = APR_BRIGADE_FIRST(bb);
- e != APR_BRIGADE_SENTINEL(bb);
- e = APR_BUCKET_NEXT(e))
- {
- if(APR_BUCKET_IS_EOS(e)) {
- break;
- }
- if(!APR_BUCKET_IS_FILE(e)) {
- copy_file = FALSE;
- break;
- }
-
- a = e->data;
+ ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server,
+ "disk_cache: Caching body for URL %s", dobj->name);
- if(begin < 0) {
- begin = pos = e->start;
- fd = a->fd;
- }
+ for (e = APR_BRIGADE_FIRST(bb);
+ e != APR_BRIGADE_SENTINEL(bb);
+ e = APR_BUCKET_NEXT(e))
+ {
+ const char *str;
+ apr_size_t length, written;
- if(fd != a->fd || pos != e->start) {
- copy_file = FALSE;
- break;
+ if (APR_BUCKET_IS_FILE(e)) {
+ rv = copy_file_bucket(e, dobj->fd, &dobj->file_size);
+ if (rv != APR_SUCCESS) {
+ ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server,
+ "disk_cache: Error when copying file bucket for "
+ "URL %s", dobj->name);
+ file_cache_errorcleanup(dobj, r);
+ return rv;
}
-
- pos += e->length;
+ continue;
}
- if(copy_file) {
- dobj->file_size = pos;
+ rv = apr_bucket_read(e, &str, &length, APR_BLOCK_READ);
+ if (rv != APR_SUCCESS) {
+ ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server,
+ "disk_cache: Error when reading bucket for URL %s",
+ dobj->name);
+ file_cache_errorcleanup(dobj, r);
+ return rv;
}
- }
-
- if(copy_file) {
- apr_bucket_file *a;
- ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server,
- "disk_cache: Copying body for URL %s, len %"
- APR_OFF_T_FMT, dobj->name, dobj->file_size);
-
- e = APR_BRIGADE_FIRST(bb);
- a = e->data;
-
- rv = copy_body(r->pool, a->fd, e->start, dobj->tfd, 0,
- dobj->file_size);
- if(rv != APR_SUCCESS) {
+ rv = apr_file_write_full(dobj->fd, str, length, &written);
+ if (rv != APR_SUCCESS) {
ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server,
- "disk_cache: Copying body failed, "
- "URL %s", dobj->name);
+ "disk_cache: Error when writing cache file for "
+ "URL %s", dobj->name);
file_cache_errorcleanup(dobj, r);
return rv;
}
- }
- else {
- ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server,
- "disk_cache: Caching body for URL %s", dobj->name);
-
- for (e = APR_BRIGADE_FIRST(bb);
- e != APR_BRIGADE_SENTINEL(bb);
- e = APR_BUCKET_NEXT(e))
- {
- const char *str;
- apr_size_t length, written;
-
- /* Ignore the non-data-buckets */
- if(APR_BUCKET_IS_METADATA(e)) {
- continue;
- }
-
- rv = apr_bucket_read(e, &str, &length, APR_BLOCK_READ);
- if (rv != APR_SUCCESS) {
- ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server,
- "disk_cache: Error when reading bucket for URL
%s",
- dobj->name);
- file_cache_errorcleanup(dobj, r);
- return rv;
- }
- rv = apr_file_write_full(dobj->fd, str, length, &written);
- if (rv != APR_SUCCESS) {
- ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server,
- "disk_cache: Error when writing cache file for "
- "URL %s", dobj->name);
- file_cache_errorcleanup(dobj, r);
- return rv;
- }
- dobj->file_size += written;
- if (dobj->file_size > conf->maxfs) {
- ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
- "disk_cache: URL %s failed the size check "
- "(%" APR_OFF_T_FMT " > %" APR_OFF_T_FMT ")",
- dobj->name, dobj->file_size, conf->maxfs);
- file_cache_errorcleanup(dobj, r);
- return APR_EGENERAL;
- }
+ dobj->file_size += written;
+ if (dobj->file_size > conf->maxfs) {
+ ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
+ "disk_cache: URL %s failed the size check "
+ "(%" APR_OFF_T_FMT " > %" APR_OFF_T_FMT ")",
+ dobj->name, dobj->file_size, conf->maxfs);
+ file_cache_errorcleanup(dobj, r);
+ return APR_EGENERAL;
}
}
-
- /* Drop out here if this wasn't the end */
- if (!APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(bb))) {
- return APR_SUCCESS;
- }
-
- if(!copy_file) {
- ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
- "disk_cache: Done caching URL %s, len %" APR_OFF_T_FMT,
- dobj->name, dobj->file_size);
-
- /* FIXME: Do we really need to check r->no_cache here since we checked
- it in the beginning? */
- if (r->no_cache || r->connection->aborted) {
+ /* Was this the final bucket? If yes, close the temp file and perform
+ * sanity checks.
+ */
+ if (APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(bb))) {
+ if (r->connection->aborted || r->no_cache) {
ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server,
"disk_cache: Discarding body for URL %s "
"because connection has been aborted.",
@@ -1269,31 +1127,17 @@
file_cache_errorcleanup(dobj, r);
return APR_EGENERAL;
}
- }
-
- /* All checks were fine. Move tempfile to final destination */
- /* Link to the perm file, and close the descriptor */
- rv = file_cache_el_final(dobj, r);
- if(rv != APR_SUCCESS) {
- file_cache_errorcleanup(dobj, r);
- return rv;
- }
- ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
- "disk_cache: Body for URL %s cached.", dobj->name);
-
- /* Redirect to cachefile if we copied a plain file */
- if(copy_file) {
- rv = replace_brigade_with_cache(h, r, bb);
- if(rv != APR_SUCCESS) {
- return rv;
- }
+ /* All checks were fine. Move tempfile to final destination */
+ /* Link to the perm file, and close the descriptor */
+ file_cache_el_final(dobj, r);
+ ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
+ "disk_cache: Body for URL %s cached.", dobj->name);
}
return APR_SUCCESS;
}
-
static void *create_config(apr_pool_t *p, server_rec *s)
{
disk_cache_conf *conf = apr_pcalloc(p, sizeof(disk_cache_conf));
Index: trunk/modules/cache/mod_disk_cache.h
===================================================================
--- trunk.orig/modules/cache/mod_disk_cache.h 2006-10-01 12:56:55.000000000
-0300
+++ trunk/modules/cache/mod_disk_cache.h 2006-10-01 12:57:46.000000000
-0300
@@ -28,8 +28,6 @@
#define CACHE_DATA_SUFFIX ".data"
#define CACHE_VDIR_SUFFIX ".vary"
-#define CACHE_BUF_SIZE 65536
-
#define AP_TEMPFILE_PREFIX "/"
#define AP_TEMPFILE_BASE "aptmp"
#define AP_TEMPFILE_SUFFIX "XXXXXX"