Niklas Edmundsson wrote: > On Sat, 30 Sep 2006, Davi Arnaut wrote: > >> Hi, >> >> Wouldn't you avoid a lot of complexity in this patch >> if you just deleted from the brigade the implicitly >> created heap buckets while reading file buckets ? >> >> Something like: >> >> store_body: >> .. if (is_file_bucket(bucket)) >> copy_file_bucket(bucket, bb); > > Probably, but that doesn't allow for creating a thread/process that > does the copying in the background, which is my long term goal. > > Also, simply doing bucket_delete like that means that the file will > never be sent to the client, which is a bad thing IMO ;)
Shame on me, but I said "something like".. :) I guess the attached patch does the same (plus mmap, et cetera) and is much simpler. Comments ? -- Davi Arnaut
Index: trunk/modules/cache/mod_disk_cache.c =================================================================== --- trunk.orig/modules/cache/mod_disk_cache.c 2006-10-01 12:56:55.000000000 -0300 +++ trunk/modules/cache/mod_disk_cache.c 2006-10-01 12:57:34.000000000 -0300 @@ -984,126 +984,51 @@ return APR_SUCCESS; } +apr_status_t copy_file_bucket(apr_bucket *b, apr_file_t *fd, apr_off_t *file_size) +{ + apr_size_t len; + const char *str; + apr_size_t written; + apr_status_t rv, rc; + apr_bucket *ref, *e, *s; -static apr_status_t copy_body(apr_pool_t *p, - apr_file_t *srcfd, apr_off_t srcoff, - apr_file_t *destfd, apr_off_t destoff, - apr_off_t len) -{ - apr_status_t rc; - apr_size_t size; - apr_finfo_t finfo; - apr_time_t starttime = apr_time_now(); - - char *buf = apr_palloc(p, CACHE_BUF_SIZE); - if (!buf) { - return APR_ENOMEM; - } - - if(srcoff != 0) { - rc = apr_file_seek(srcfd, APR_SET, &srcoff); - if(rc != APR_SUCCESS) { - return rc; - } - } + rv = apr_bucket_copy(b, &e); - if(destoff != 0) { - rc = apr_file_seek(destfd, APR_SET, &destoff); - if(rc != APR_SUCCESS) { - return rc; - } + if (rv != APR_SUCCESS) { + return rv; } - /* Tried doing this with mmap, but sendfile on Linux got confused when - sending a file while it was being written to from an mmapped area. - The traditional way seems to be good enough, and less complex. - */ - while(len > 0) { - size=MIN(len, CACHE_BUF_SIZE); + APR_BUCKET_INIT(e); + s = APR_BUCKET_NEXT(e); - rc = apr_file_read_full (srcfd, buf, size, NULL); - if(rc != APR_SUCCESS) { - return rc; - } + do { + rv = apr_bucket_read(e, &str, &len, APR_BLOCK_READ); - rc = apr_file_write_full(destfd, buf, size, NULL); - if(rc != APR_SUCCESS) { - return rc; + if (rv != APR_SUCCESS && rv != APR_EOF) { + break; } - len -= size; - } - - /* Check if file has changed during copying. This is not 100% foolproof - due to NFS attribute caching when on NFS etc. */ - /* FIXME: Can we assume that we're always copying an entire file? In that - case we can check if the current filesize matches the length - we think it is */ - rc = apr_file_info_get(&finfo, APR_FINFO_MTIME, srcfd); - if(rc != APR_SUCCESS) { - return rc; - } - if(starttime < finfo.mtime) { - return APR_EGENERAL; - } - - return APR_SUCCESS; -} - - -static apr_status_t replace_brigade_with_cache(cache_handle_t *h, - request_rec *r, - apr_bucket_brigade *bb) -{ - apr_status_t rv; - int flags; - apr_bucket *e; - core_dir_config *pdcfg = ap_get_module_config(r->per_dir_config, - &core_module); - disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj; - flags = APR_READ|APR_BINARY; -#if APR_HAS_SENDFILE - flags |= ((pdcfg->enable_sendfile == ENABLE_SENDFILE_OFF) - ? 0 : APR_SENDFILE_ENABLED); -#endif + rc = apr_file_write_full(fd, str, len, &written); - rv = apr_file_open(&dobj->fd, dobj->datafile, flags, 0, r->pool); - if (rv != APR_SUCCESS) { - ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, - "disk_cache: Error opening datafile %s for URL %s", - dobj->datafile, dobj->name); - return rv; - } + if (rc != APR_SUCCESS) { + rv = rc; + break; + } - /* First, empty the brigade */ - e = APR_BRIGADE_FIRST(bb); - while (e != APR_BRIGADE_SENTINEL(bb)) { - apr_bucket *d; - d = e; + ref = e; + *file_size += written; e = APR_BUCKET_NEXT(e); - apr_bucket_delete(d); - } + apr_bucket_delete(ref); + } while (e != s && rv == APR_SUCCESS); - /* Then, populate it with our cached instance */ - rv = recall_body(h, r->pool, bb); - if (rv != APR_SUCCESS) { - ap_log_error(APLOG_MARK, APLOG_ERR, 0, r->server, - "disk_cache: Error serving URL %s from cache", dobj->name); - return rv; - } - ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, - "disk_cache: Serving cached body for URL %s", dobj->name); - - return APR_SUCCESS; + return (rv == APR_EOF) ? APR_SUCCESS : rv; } - static apr_status_t store_body(cache_handle_t *h, request_rec *r, apr_bucket_brigade *bb) { apr_bucket *e; apr_status_t rv; - int copy_file = FALSE; disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj; disk_cache_conf *conf = ap_get_module_config(r->server->module_config, &disk_cache_module); @@ -1129,129 +1054,62 @@ dobj->file_size = 0; } - /* Check if this is a complete single sequential file, eligable for - * file copy. - */ - if(dobj->file_size == 0 && APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(bb))) - { - apr_off_t begin = -1; - apr_off_t pos = -1; - apr_file_t *fd = NULL; - apr_bucket_file *a; - - copy_file = TRUE; - - for (e = APR_BRIGADE_FIRST(bb); - e != APR_BRIGADE_SENTINEL(bb); - e = APR_BUCKET_NEXT(e)) - { - if(APR_BUCKET_IS_EOS(e)) { - break; - } - if(!APR_BUCKET_IS_FILE(e)) { - copy_file = FALSE; - break; - } - - a = e->data; + ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server, + "disk_cache: Caching body for URL %s", dobj->name); - if(begin < 0) { - begin = pos = e->start; - fd = a->fd; - } + for (e = APR_BRIGADE_FIRST(bb); + e != APR_BRIGADE_SENTINEL(bb); + e = APR_BUCKET_NEXT(e)) + { + const char *str; + apr_size_t length, written; - if(fd != a->fd || pos != e->start) { - copy_file = FALSE; - break; + if (APR_BUCKET_IS_FILE(e)) { + rv = copy_file_bucket(e, dobj->fd, &dobj->file_size); + if (rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "disk_cache: Error when copying file bucket for " + "URL %s", dobj->name); + file_cache_errorcleanup(dobj, r); + return rv; } - - pos += e->length; + continue; } - if(copy_file) { - dobj->file_size = pos; + rv = apr_bucket_read(e, &str, &length, APR_BLOCK_READ); + if (rv != APR_SUCCESS) { + ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, + "disk_cache: Error when reading bucket for URL %s", + dobj->name); + file_cache_errorcleanup(dobj, r); + return rv; } - } - - if(copy_file) { - apr_bucket_file *a; - ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server, - "disk_cache: Copying body for URL %s, len %" - APR_OFF_T_FMT, dobj->name, dobj->file_size); - - e = APR_BRIGADE_FIRST(bb); - a = e->data; - - rv = copy_body(r->pool, a->fd, e->start, dobj->tfd, 0, - dobj->file_size); - if(rv != APR_SUCCESS) { + rv = apr_file_write_full(dobj->fd, str, length, &written); + if (rv != APR_SUCCESS) { ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, - "disk_cache: Copying body failed, " - "URL %s", dobj->name); + "disk_cache: Error when writing cache file for " + "URL %s", dobj->name); file_cache_errorcleanup(dobj, r); return rv; } - } - else { - ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server, - "disk_cache: Caching body for URL %s", dobj->name); - - for (e = APR_BRIGADE_FIRST(bb); - e != APR_BRIGADE_SENTINEL(bb); - e = APR_BUCKET_NEXT(e)) - { - const char *str; - apr_size_t length, written; - - /* Ignore the non-data-buckets */ - if(APR_BUCKET_IS_METADATA(e)) { - continue; - } - - rv = apr_bucket_read(e, &str, &length, APR_BLOCK_READ); - if (rv != APR_SUCCESS) { - ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, - "disk_cache: Error when reading bucket for URL %s", - dobj->name); - file_cache_errorcleanup(dobj, r); - return rv; - } - rv = apr_file_write_full(dobj->fd, str, length, &written); - if (rv != APR_SUCCESS) { - ap_log_error(APLOG_MARK, APLOG_ERR, rv, r->server, - "disk_cache: Error when writing cache file for " - "URL %s", dobj->name); - file_cache_errorcleanup(dobj, r); - return rv; - } - dobj->file_size += written; - if (dobj->file_size > conf->maxfs) { - ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, - "disk_cache: URL %s failed the size check " - "(%" APR_OFF_T_FMT " > %" APR_OFF_T_FMT ")", - dobj->name, dobj->file_size, conf->maxfs); - file_cache_errorcleanup(dobj, r); - return APR_EGENERAL; - } + dobj->file_size += written; + if (dobj->file_size > conf->maxfs) { + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: URL %s failed the size check " + "(%" APR_OFF_T_FMT " > %" APR_OFF_T_FMT ")", + dobj->name, dobj->file_size, conf->maxfs); + file_cache_errorcleanup(dobj, r); + return APR_EGENERAL; } } - - /* Drop out here if this wasn't the end */ - if (!APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(bb))) { - return APR_SUCCESS; - } - - if(!copy_file) { - ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, - "disk_cache: Done caching URL %s, len %" APR_OFF_T_FMT, - dobj->name, dobj->file_size); - - /* FIXME: Do we really need to check r->no_cache here since we checked - it in the beginning? */ - if (r->no_cache || r->connection->aborted) { + /* Was this the final bucket? If yes, close the temp file and perform + * sanity checks. + */ + if (APR_BUCKET_IS_EOS(APR_BRIGADE_LAST(bb))) { + if (r->connection->aborted || r->no_cache) { ap_log_error(APLOG_MARK, APLOG_INFO, 0, r->server, "disk_cache: Discarding body for URL %s " "because connection has been aborted.", @@ -1269,31 +1127,17 @@ file_cache_errorcleanup(dobj, r); return APR_EGENERAL; } - } - - /* All checks were fine. Move tempfile to final destination */ - /* Link to the perm file, and close the descriptor */ - rv = file_cache_el_final(dobj, r); - if(rv != APR_SUCCESS) { - file_cache_errorcleanup(dobj, r); - return rv; - } - ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, - "disk_cache: Body for URL %s cached.", dobj->name); - - /* Redirect to cachefile if we copied a plain file */ - if(copy_file) { - rv = replace_brigade_with_cache(h, r, bb); - if(rv != APR_SUCCESS) { - return rv; - } + /* All checks were fine. Move tempfile to final destination */ + /* Link to the perm file, and close the descriptor */ + file_cache_el_final(dobj, r); + ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server, + "disk_cache: Body for URL %s cached.", dobj->name); } return APR_SUCCESS; } - static void *create_config(apr_pool_t *p, server_rec *s) { disk_cache_conf *conf = apr_pcalloc(p, sizeof(disk_cache_conf)); Index: trunk/modules/cache/mod_disk_cache.h =================================================================== --- trunk.orig/modules/cache/mod_disk_cache.h 2006-10-01 12:56:55.000000000 -0300 +++ trunk/modules/cache/mod_disk_cache.h 2006-10-01 12:57:46.000000000 -0300 @@ -28,8 +28,6 @@ #define CACHE_DATA_SUFFIX ".data" #define CACHE_VDIR_SUFFIX ".vary" -#define CACHE_BUF_SIZE 65536 - #define AP_TEMPFILE_PREFIX "/" #define AP_TEMPFILE_BASE "aptmp" #define AP_TEMPFILE_SUFFIX "XXXXXX"