On 08/27/2009 12:46 AM, minf...@apache.org wrote:
> Author: minfrin
> Date: Wed Aug 26 22:46:35 2009
> New Revision: 808212
> 
> URL: http://svn.apache.org/viewvc?rev=808212&view=rev
> Log:
> mod_cache: Introduce the thundering herd lock, a mechanism to keep
> the flood of requests at bay that strike a backend webserver as
> a cached entity goes stale.
> 
> Modified:
>     httpd/httpd/trunk/CHANGES
>     httpd/httpd/trunk/docs/manual/mod/mod_cache.xml
>     httpd/httpd/trunk/modules/cache/cache_storage.c
>     httpd/httpd/trunk/modules/cache/cache_util.c
>     httpd/httpd/trunk/modules/cache/mod_cache.c
>     httpd/httpd/trunk/modules/cache/mod_cache.h
> 


> Modified: httpd/httpd/trunk/modules/cache/cache_util.c
> URL: 
> http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/cache/cache_util.c?rev=808212&r1=808211&r2=808212&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/modules/cache/cache_util.c (original)
> +++ httpd/httpd/trunk/modules/cache/cache_util.c Wed Aug 26 22:46:35 2009

> @@ -378,7 +546,64 @@
>          return 1;    /* Cache object is fresh (enough) */
>      }
>  
> -    return 0;        /* Cache object is stale */
> +    /*
> +     * At this point we are stale, but: if we are under load, we may let
> +     * a significant number of stale requests through before the first
> +     * stale request successfully revalidates itself, causing a sudden
> +     * unexpected thundering herd which in turn brings angst and drama.
> +     *
> +     * So.
> +     *
> +     * We want the first stale request to go through as normal. But the
> +     * second and subsequent request, we must pretend to be fresh until
> +     * the first request comes back with either new content or confirmation
> +     * that the stale content is still fresh.
> +     *
> +     * To achieve this, we create a very simple file based lock based on
> +     * the key of the cached object. We attempt to open the lock file with
> +     * exclusive write access. If we succeed, woohoo! we're first, and we
> +     * follow the stale path to the backend server. If we fail, oh well,
> +     * we follow the fresh path, and avoid being a thundering herd.
> +     *
> +     * The lock lives only as long as the stale request that went on ahead.
> +     * If the request succeeds, the lock is deleted. If the request fails,
> +     * the lock is deleted, and another request gets to make a new lock
> +     * and try again.
> +     *
> +     * At any time, a request marked "no-cache" will force a refresh,
> +     * ignoring the lock, ensuring an extended lockout is impossible.
> +     *
> +     * A lock that exceeds a maximum age will be deleted, and another
> +     * request gets to make a new lock and try again.
> +     */
> +    status = ap_cache_try_lock(conf, r, (char *)h->cache_obj->key);
> +    if (APR_SUCCESS == status) {
> +        /* we obtained a lock, follow the stale path */
> +        ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
> +                     "Cache lock obtained for stale cached URL, "
> +                     "revalidating entry: %s",
> +                     r->unparsed_uri);
> +        return 0;
> +    }
> +    else if (APR_EEXIST == status) {
> +        /* lock already exists, return stale data anyway, with a warning */
> +        ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
> +                     "Cache already locked for stale cached URL, "
> +                     "pretend it is fresh: %s",
> +                     r->unparsed_uri);
> +        apr_table_merge(h->resp_hdrs, "Warning",
> +                        "110 Response is stale");

The original code above uses some magic to avoid stomping on an already
existing Warning header:

            /* make sure we don't stomp on a previous warning */
            if ((warn_head == NULL) ||
                ((warn_head != NULL) && (ap_strstr_c(warn_head, "110") == NULL))
) {
                apr_table_merge(h->resp_hdrs, "Warning",
                                "110 Response is stale");

Shouldn't we do the same thing here as well?


> +        return 1;
> +    }
> +    else {
> +        /* some other error occurred, just treat the object as stale */
> +        ap_log_error(APLOG_MARK, APLOG_DEBUG, status, r->server,
> +                     "Attempt to obtain a cache lock for stale "
> +                     "cached URL failed, revalidating entry anyway: %s",
> +                     r->unparsed_uri);
> +        return 0;
> +    }
> +
>  }
>  
>  /*

> Modified: httpd/httpd/trunk/modules/cache/mod_cache.c
> URL: 
> http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/cache/mod_cache.c?rev=808212&r1=808211&r2=808212&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/modules/cache/mod_cache.c (original)
> +++ httpd/httpd/trunk/modules/cache/mod_cache.c Wed Aug 26 22:46:35 2009
> @@ -111,41 +111,58 @@
>      if (rv != OK) {
>          if (rv == DECLINED) {
>              if (!lookup) {
> +                char *key = NULL;
>  
> -                /*
> -                 * Add cache_save filter to cache this request. Choose
> -                 * the correct filter by checking if we are a subrequest
> -                 * or not.
> +                /* try to obtain a cache lock at this point. if we succeed,
> +                 * we are the first to try and cache this url. if we fail,
> +                 * it means someone else is already trying to cache this
> +                 * url, and we should just let the request through to the
> +                 * backend without any attempt to cache. this stops
> +                 * duplicated simultaneous attempts to cache an entity.
>                   */
> -                if (r->main) {
> -                    ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
> -                                 r->server,
> -                                 "Adding CACHE_SAVE_SUBREQ filter for %s",
> -                                 r->uri);
> -                    
> ap_add_output_filter_handle(cache_save_subreq_filter_handle,
> -                                                NULL, r, r->connection);
> +                rv = ap_cache_try_lock(conf, r, NULL);

Hm. If the resource was present in the cache and stale we already tried to 
acquire the
lock in ap_cache_check_freshness. It seems wrong to try the lock twice.

> +                if (APR_SUCCESS == rv) {
> +
> +                    /*
> +                     * Add cache_save filter to cache this request. Choose
> +                     * the correct filter by checking if we are a subrequest
> +                     * or not.
> +                     */
> +                    if (r->main) {
> +                        ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
> +                                r->server,
> +                                "Adding CACHE_SAVE_SUBREQ filter for %s",
> +                                r->uri);
> +                        
> ap_add_output_filter_handle(cache_save_subreq_filter_handle,
> +                                NULL, r, r->connection);
> +                    }
> +                    else {
> +                        ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
> +                                r->server, "Adding CACHE_SAVE filter for %s",
> +                                r->uri);
> +                        ap_add_output_filter_handle(cache_save_filter_handle,
> +                                NULL, r, r->connection);
> +                    }
> +
> +                    ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, 
> r->server,
> +                            "Adding CACHE_REMOVE_URL filter for %s",
> +                            r->uri);
> +
> +                    /* Add cache_remove_url filter to this request to remove 
> a
> +                     * stale cache entry if needed. Also put the current 
> cache
> +                     * request rec in the filter context, as the request that
> +                     * is available later during running the filter maybe
> +                     * different due to an internal redirect.
> +                     */
> +                    cache->remove_url_filter =
> +                        
> ap_add_output_filter_handle(cache_remove_url_filter_handle,
> +                                cache, r, r->connection);
>                  }
>                  else {
>                      ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS,
> -                                 r->server, "Adding CACHE_SAVE filter for 
> %s",
> -                                 r->uri);
> -                    ap_add_output_filter_handle(cache_save_filter_handle,
> -                                                NULL, r, r->connection);
> +                                 r->server, "Cache locked for url, not 
> caching "
> +                                 "response: %s", r->uri);
>                  }
> -
> -                ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
> -                             "Adding CACHE_REMOVE_URL filter for %s",
> -                             r->uri);
> -
> -                /* Add cache_remove_url filter to this request to remove a
> -                 * stale cache entry if needed. Also put the current cache
> -                 * request rec in the filter context, as the request that
> -                 * is available later during running the filter maybe
> -                 * different due to an internal redirect.
> -                 */
> -                cache->remove_url_filter =
> -                    
> ap_add_output_filter_handle(cache_remove_url_filter_handle,
> -                                                cache, r, r->connection);
>              }
>              else {
>                  if (cache->stale_headers) {

> @@ -368,7 +390,24 @@
>              ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
>                           "cache: Cache provider's store_body failed!");
>              ap_remove_output_filter(f);
> +
> +            /* give someone else the chance to cache the file */
> +            ap_cache_remove_lock(conf, r, cache->handle ?

IMHO if cache->handle would be NULL we wouldn't be here, because we would back 
out
before.

> +                    (char *)cache->handle->cache_obj->key : NULL, in);
> +        }
> +
> +        /* proactively remove the lock as soon as we see the eos bucket */
> +        for (e = APR_BRIGADE_FIRST(in);
> +             e != APR_BRIGADE_SENTINEL(in);
> +             e = APR_BUCKET_NEXT(e))
> +        {
> +            if (APR_BUCKET_IS_EOS(e)) {
> +                ap_cache_remove_lock(conf, r, cache->handle ?
> +                        (char *)cache->handle->cache_obj->key : NULL, in);
> +                break;
> +            }
>          }
> +
>          return ap_pass_brigade(f->next, in);
>      }
>  

> @@ -955,6 +1023,13 @@
>      /* array of identifiers that should not be used for key calculation */
>      ps->ignore_session_id = apr_array_make(p, 10, sizeof(char *));
>      ps->ignore_session_id_set = CACHE_IGNORE_SESSION_ID_UNSET;
> +    ps->lock = 0; /* thundering herd lock defaults to off */
> +    ps->lock_set = 0;

Why don't you initialize lockmaxage_set and lockpath_set here as well.

> +    apr_temp_dir_get(&tmppath, p);
> +    if (tmppath) {
> +        ps->lockpath = apr_pstrcat(p, tmppath, DEFAULT_CACHE_LOCKPATH, NULL);
> +    }
> +    ps->lockmaxage = apr_time_from_sec(DEFAULT_CACHE_MAXAGE);
>      return ps;
>  }
>  

Overall the patch is hard to read since you are mixing typo fixes, style fixes
and white space fixes with your functional changes. It would make it easier
to read if you would separate these out in a separate patch.

Regards

RĂ¼diger

Reply via email to