On 09/24/2009 04:25 PM, poir...@apache.org wrote:
> Author: poirier
> Date: Thu Sep 24 14:25:19 2009
> New Revision: 818492
> 
> URL: http://svn.apache.org/viewvc?rev=818492&view=rev
> Log:
> mod_cache: don't cache incomplete responses, per RFC 2616, 13.8.
> 
> PR: 15866
> 
> Modified:
>     httpd/httpd/trunk/CHANGES
>     httpd/httpd/trunk/modules/cache/mod_cache.c
>     httpd/httpd/trunk/modules/cache/mod_cache.h
> 

> Modified: httpd/httpd/trunk/modules/cache/mod_cache.c
> URL: 
> http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/cache/mod_cache.c?rev=818492&r1=818491&r2=818492&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/modules/cache/mod_cache.c (original)
> +++ httpd/httpd/trunk/modules/cache/mod_cache.c Thu Sep 24 14:25:19 2009

> @@ -312,6 +313,100 @@
>      return ap_pass_brigade(f->next, bb);
>  }
>  
> +/* Find out the size of the cached response body.
> +   Returns -1 if unable to do so.
> + */
> +static apr_off_t get_cached_response_body_size(cache_request_rec *cache, 
> request_rec *r)
> +{
> +    cache_handle_t *h = cache->handle;
> +    apr_off_t size = -1;
> +    apr_bucket_brigade *bb;
> +    apr_status_t rv;
> +
> +    /* There's not an API to ask the cache provider for the size
> +       directly, so retrieve it and count the bytes.
> +
> +       But it's not as inefficient as it might look.  mod_disk_cache
> +       will just create a file bucket and set its length to the file
> +       size, and we'll access that length here without ever having to
> +       read the cached file.
> +
> +       If there's some other cache provider that has to read the whole
> +       cached body to fill in the brigade, though, that would make
> +       this rather expensive.

Exactly for this reason I don't like this approach. Currently we have only
mod_disk_cache but this might change in the future and the provider API is
open. So there might be third party modules affected by this.

Why don't we just count the bytes as we store them? My first thought would be
to write a wrapper function for cache->provider->store_body() that could look
like the following and would be called instead of cache->provider->store_body():

apr_status_t store_body(cache_request_rec *cache, request_rec *r, 
apr_bucket_brigade *bb)
{
    apr_status_t rv;

    rv = cache->provider->store_body(cache->handle, r, bb);
    if (rv == APR_SUCCESS) {
        apr_off_t len;

        /*
         * As store_body needs to read the buckets in the brigade anyway
         * it is very unlikely that the brigade still contains buckets of
         * undefined length. But if yes, don't force a read but just accept
         * a failure for our size check.
         */
        apr_brigade_length(bb, 0, &len);
        if ((len != -1) && (cache->actual_size != -1)) {
            cache->actual_size += len;
        }
        else {
            cache->actual_size = -1;
        }
    }
    return rv;
}

actual_size would be another apr_off_t field that needs to be added to 
cache_request_rec
and would need to be initialized with 0.
And the part from validate_content_length would transfer to

    if (cache->size != -1) { /* We have a content-length to check */
        if ((cache->actual_size != -1) && (cache->size != cache->actual_size)) {


> +
> +       XXX Add a standard cache provider API to get the size of the
> +       cached data.
> +    */
> +
> +    bb = apr_brigade_create(r->pool, r->connection->bucket_alloc);
> +    rv = cache->provider->recall_body(h, r->pool, bb);
> +    if (rv != APR_SUCCESS) {
> +        ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r,
> +                      "cache: error recalling body");
> +        /* try to remove cache entry, it's probably messed up */
> +        cache->provider->remove_url(h, r->pool);
> +    }
> +    else {
> +        int all_buckets_here = 0;
> +        apr_bucket *e;
> +
> +        size=0;
> +        for (e = APR_BRIGADE_FIRST(bb);
> +             e != APR_BRIGADE_SENTINEL(bb);
> +             e = APR_BUCKET_NEXT(e)) {
> +            if (APR_BUCKET_IS_EOS(e)) {
> +                all_buckets_here=1;
> +                break;
> +            }
> +            if (APR_BUCKET_IS_FLUSH(e)) {
> +                continue;
> +            }
> +            if (e->length == (apr_size_t)-1) {
> +                break;
> +            }
> +            size += e->length;
> +        }
> +        if (!all_buckets_here) {
> +            size = -1;
> +        }
> +    }
> +    apr_brigade_destroy(bb);
> +    return size;
> +}
> +
> +/* Check that the response body we cached has the same length 
> + * as the Content-Length header, if available.  If not, cancel
> + * caching this response.
> + */
> +static int validate_content_length(ap_filter_t *f, cache_request_rec *cache, 
> request_rec *r)
> +{
> +    int rv = OK;
> +
> +    if (cache->size != -1) { /* We have a content-length to check */
> +        apr_off_t actual_len = get_cached_response_body_size(cache, r);
> +        if ((actual_len != -1) && (cache->size != actual_len)) {
> +            ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r,
> +                          "cache: Content-Length header was %"APR_OFF_T_FMT" 
> "
> +                          "but response body size was %"APR_OFF_T_FMT
> +                          ", removing url from cache: %s",
> +                          cache->size, actual_len,
> +                          r->unparsed_uri
> +                          );
> +            ap_remove_output_filter(f);
> +            rv = cache->provider->remove_url(cache->handle, r->pool);
> +            if (rv != OK) {
> +                /* Probably a mod_disk_cache cache area has been (re)mounted
> +                 * read-only, or that there is a permissions problem.
> +                 */
> +                ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
> +                             "cache: attempt to remove url from cache 
> unsuccessful.");
> +            }
> +        }
> +    }
> +    return rv;
> +}
> +
>  

Regards

RĂ¼diger

Reply via email to