On Mon, Aug 08, 2005 at 10:33:47AM +0200, [EMAIL PROTECTED] wrote:
> >>Is a traversal really needed? What about going back the full path of the
> >>header / data file to the cache root and removing each component on the
> >>way by calling apr_dir_remove on each component until it fails?
> > 
> > I'm not sure if apr_dir_remove guarantees failure when operated on
> > non-empty directories. If it does then that's an easy enough way.
> 
> I should be at least on Unix because it simply calls rmdir and this returns
> ENOTEMPTY on the removal of non empty directories. This results in
> apr_dir_remove returning APR_ENOTEMPTY.

I'm just being paranoid, but I'm not certain of the behaviour of the
win32, beos or other future implementations, and the APR itself makes no
claims about apr_dir_remove's behaviour - it just says "Remove directory
from the file system" which is pretty ambiguous. I've now checked the
win32 and beos implementations, and they seem safe, so I've submitted a
silly doxygen comment patch to [EMAIL PROTECTED] to try and get rid of the 
problem
for good.

> >     In cache_remove_url, you have code which tries to determine if
> >     the cache->handle or cache->stale_handle should be removed, but
> >     shouldn't it always be the stale_handle? You only add the
> >     remove_url filter if cache_select_url() != OK, which means
> >     cache->handle will always be NULL.
> 
>         You are right. In the current usage of cache_remove_url this
>         check does not make sense, but I wanted to keep the scope of
>         this function somewhat broder to allow to call this function
>         in the case the caller wants to remove the entity behind
>         cache->handle.

x. At the very least, I'd definitely make stale_handle the default if
both pointers are non-NULL :)

O.k., I've merged our two patches, but I've changed a few things, tell
me if there's anothing you think is wrong;

  1. Did some minor whitespace/style cleanups, nothing
     big. also changed the instances of CACHE_REMOVAL_URL
     in comments to CACHE_REMOVE_URL, and a few small things like
     that cought. Also made the log messages consistent
     with the rest of mod_cache, and added some to aid 
     testing.

  2. modified the mod_disk_cache code to delete the data only
     if the header could be deleted first.

  3. renamed the cache_remove_url_filter pointer in the cache 
     struct to remove_url_filter, cache->cache_$something seemed
     redundant and it makes the formatting neater :-)

  4. Fixed a bug in the mod_disk_cache code. It was using
     dobj->datafile in the dobj->hdrsfile section for log
     output. 

  5. Swapped the order of the ternary check in cache_storage,
     so that we'll remove the stale_handle if both the stale_hand
     and handle are non-NULL.

The patch is attached, and I've been testing it for the last few hours
without problem. The code is now running on ftp.heanet.ie. (along
with htcacheclean -t).

-- 
Colm MacCárthaigh                        Public Key: [EMAIL PROTECTED]
Index: modules/cache/mod_mem_cache.c
===================================================================
--- modules/cache/mod_mem_cache.c       (revision 230717)
+++ modules/cache/mod_mem_cache.c       (working copy)
@@ -601,7 +601,7 @@
 /* remove_url()
  * Notes:
  */
-static int remove_url(const char *key) 
+static int remove_url(cache_handle_t *h, apr_pool_t *p) 
 {
     cache_object_t *obj;
     int cleanup = 0;
@@ -609,8 +609,8 @@
     if (sconf->lock) {
         apr_thread_mutex_lock(sconf->lock);
     }
-  
-    obj = cache_find(sconf->cache_cache, key);       
+ 
+    obj = h->cache_obj; 
     if (obj) {
         cache_remove(sconf->cache_cache, obj);
         /* For performance, cleanup cache object after releasing the lock */
Index: modules/cache/mod_cache.c
===================================================================
--- modules/cache/mod_cache.c   (revision 230717)
+++ modules/cache/mod_cache.c   (working copy)
@@ -29,6 +29,7 @@
  */
 static ap_filter_rec_t *cache_save_filter_handle;
 static ap_filter_rec_t *cache_out_filter_handle;
+static ap_filter_rec_t *cache_remove_url_filter_handle;
 
 /*
  * CACHE handler
@@ -123,6 +124,19 @@
                 /* add cache_save filter to cache this request */
                 ap_add_output_filter_handle(cache_save_filter_handle, NULL, r,
                                             r->connection);
+
+                ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
+                             "Adding CACHE_REMOVE_URL filter.");
+
+                /* Add cache_remove_url filter to this request to remove a
+                 * stale cache entry if needed. Also put the current cache
+                 * request rec in the filter context, as the request that
+                 * is available later during running the filter maybe
+                 * different due to an internal redirect.
+                 */
+                cache->remove_url_filter = 
+                    
ap_add_output_filter_handle(cache_remove_url_filter_handle, 
+                                                cache, r, r->connection);
             }
             else if (cache->stale_headers) {
                 ap_log_error(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r->server,
@@ -441,11 +455,6 @@
     if (reason) {
         ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
                      "cache: %s not cached. Reason: %s", url, reason);
-        /* remove this object from the cache 
-         * BillS Asks.. Why do we need to make this call to remove_url?
-         * leave it in for now..
-         */
-        cache_remove_url(r, url);
 
         /* remove this filter from the chain */
         ap_remove_output_filter(f);
@@ -546,6 +555,13 @@
     ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
                  "cache: Caching url: %s", url);
 
+    /* We are actually caching this response. So it does not
+     * make sense to remove this entity any more.
+     */ 
+    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
+                 "cache: Removing CACHE_REMOVE_URL filter.");
+    ap_remove_output_filter(cache->remove_url_filter);
+
     /*
      * We now want to update the cache file header information with
      * the new date, last modified, expire and content length and write
@@ -666,7 +682,13 @@
         ap_cache_accept_headers(cache->handle, r, 1);
     }
 
-    /* Write away header information to cache. */
+    /* Write away header information to cache. It is possible that we are
+     * trying to update headers for an entity which has already been cached.
+     * 
+     * This may fail, due to an unwritable cache area. E.g. filesystem full,
+     * permissions problems or a read-only (re)mount. This must be handled 
+     * later. 
+     */
     rv = cache->provider->store_headers(cache->handle, r, info);
 
     /* Did we just update the cached headers on a revalidated response?
@@ -675,7 +697,7 @@
      * the same way as with a regular response, but conditions are now checked
      * against the cached or merged response headers.
      */
-    if (rv == APR_SUCCESS && cache->stale_handle) {
+    if (cache->stale_handle) {
         apr_bucket_brigade *bb;
         apr_bucket *bkt;
         int status;
@@ -699,12 +721,39 @@
         }
 
         cache->block_response = 1;
+
+        /* Before returning we need to handle the possible case of an
+         * unwritable cache. Rather than leaving the entity in the cache
+         * and having it constantly re-validated, now that we have recalled 
+         * the body it is safe to try and remove the url from the cache.
+         */
+        if (rv != APR_SUCCESS) {
+            ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
+                         "cache: updating headers with store_headers failed. "
+                         "Removing cached url.");
+    
+            rv = cache->provider->remove_url(cache->stale_handle, r->pool);
+            if (rv != OK) {
+                /* Probably a mod_disk_cache cache area has been (re)mounted 
+                 * read-only, or that there is a permissions problem. 
+                 */
+                ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
+                     "cache: attempt to remove url from cache unsuccessful.");
+            }
+        }
+
         return ap_pass_brigade(f->next, bb);
     }
+  
+    if(rv != APR_SUCCESS) {
+        ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
+                     "cache: store_headers failed");
+        ap_remove_output_filter(f);
 
-    if (rv == APR_SUCCESS) {
-        rv = cache->provider->store_body(cache->handle, r, in);
+        return ap_pass_brigade(f->next, in);
     }
+
+    rv = cache->provider->store_body(cache->handle, r, in);
     if (rv != APR_SUCCESS) {
         ap_log_error(APLOG_MARK, APLOG_DEBUG, rv, r->server,
                      "cache: store_body failed");
@@ -714,6 +763,53 @@
     return ap_pass_brigade(f->next, in);
 }
 
+/*
+ * CACHE_REMOVE_URL filter
+ * ---------------
+ *
+ * This filter gets added in the quick handler every time the CACHE_SAVE filter
+ * gets inserted. Its purpose is to remove a confirmed  stale cache entry from 
+ * the cache.
+ * 
+ * CACHE_REMOVE_URL has to be a protocol filter to ensure that is run even if
+ * the response is a canned error message, which removes the content filters
+ * and thus the CACHE_SAVE filter from the chain. 
+ *
+ * CACHE_REMOVE_URL expects cache request rec within its context because the
+ * request this filter runs on can be different from the one whose cache entry
+ * should be removed, due to internal redirects. 
+ */
+
+static int cache_remove_url_filter(ap_filter_t *f, apr_bucket_brigade *in)
+{
+    request_rec *r = f->r;
+    cache_request_rec *cache;
+
+    /* Setup cache_request_rec */
+    cache = (cache_request_rec *) f->ctx;
+
+    if (!cache) {
+        /* user likely configured CACHE_REMOVE_URL manually; they should 
really 
+         * use mod_cache configuration to do that. So:
+         * 1. Remove ourselves 
+         * 2. Do nothing and bail out
+         */
+        ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
+                     "cache: CACHE_REMOVE_URL enabled unexpectedly");
+        ap_remove_output_filter(f);
+        return ap_pass_brigade(f->next, in);
+    }
+    /*
+     * Now remove this cache entry from the cache
+     */ 
+    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, r->server,
+                 "cache: Removing url %s from the cache", f->r->unparsed_uri);
+    cache_remove_url(cache, r->pool);
+    /* remove ourselves */
+    ap_remove_output_filter(f);
+    return ap_pass_brigade(f->next, in);
+}
+
 /* -------------------------------------------------------------- */
 /* Setup configurable data */
 
@@ -967,6 +1063,7 @@
     return OK;
 }
 
+
 static const command_rec cache_cmds[] =
 {
     /* XXX
@@ -1033,6 +1130,15 @@
                                   cache_out_filter, 
                                   NULL,
                                   AP_FTYPE_CONTENT_SET+1);
+    /* CACHE_REMOVE_URL has to be a protocol filter to ensure that is
+     * run even if the response is a canned error message, which
+     * removes the content filters.
+     */
+    cache_remove_url_filter_handle =
+        ap_register_output_filter("CACHE_REMOVE_URL",
+                                  cache_remove_url_filter,
+                                  NULL,
+                                  AP_FTYPE_PROTOCOL);
     ap_hook_post_config(cache_post_config, NULL, NULL, APR_HOOK_REALLY_FIRST);
 }
 
Index: modules/cache/mod_cache.h
===================================================================
--- modules/cache/mod_cache.h   (revision 230717)
+++ modules/cache/mod_cache.h   (working copy)
@@ -197,7 +197,7 @@
                            const char *urlkey, apr_off_t len);
     int (*open_entity) (cache_handle_t *h, request_rec *r,
                            const char *urlkey);
-    int (*remove_url) (const char *urlkey);
+    int (*remove_url) (cache_handle_t *h, apr_pool_t *p);
 } cache_provider;
 
 /* A linked-list of authn providers. */
@@ -225,6 +225,7 @@
     apr_time_t exp;                     /* expiration */
     apr_time_t lastmod;                 /* last-modified time */
     cache_info *info;                   /* current cache info */
+    ap_filter_t *remove_url_filter;     /* Enable us to remove the filter */
 } cache_request_rec;
 
 
@@ -271,7 +272,7 @@
 /**
  * cache_storage.c
  */
-int cache_remove_url(request_rec *r, char *url);
+int cache_remove_url(cache_request_rec *cache, apr_pool_t *p);
 int cache_create_entity(request_rec *r, char *url, apr_off_t size);
 int cache_select_url(request_rec *r, char *url);
 apr_status_t cache_generate_key_default( request_rec *r, apr_pool_t*p, 
char**key );
Index: modules/cache/mod_disk_cache.c
===================================================================
--- modules/cache/mod_disk_cache.c      (revision 230717)
+++ modules/cache/mod_disk_cache.c      (working copy)
@@ -514,9 +514,51 @@
     return OK;
 }
 
-static int remove_url(const char *key)
+static int remove_url(cache_handle_t *h, apr_pool_t *p)
 {
-    /* XXX: Delete file from cache! */
+    apr_status_t rc;
+    disk_cache_object_t *dobj;
+
+    /* Get disk cache object from cache handle */
+    dobj = (disk_cache_object_t *) h->cache_obj->vobj;
+    if (!dobj) {
+        return DECLINED;
+    }
+
+    /* Delete headers file */
+    if (dobj->hdrsfile) {
+        ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
+                     "disk_cache: Deleting %s from cache.", dobj->hdrsfile);
+        
+        rc = apr_file_remove(dobj->hdrsfile, p);
+        if ((rc != APR_SUCCESS) && (rc != APR_ENOENT)) {
+            /* Will only result in an output if httpd is started with -e debug.
+             * For reason see log_error_core for the case s == NULL.
+             */
+            ap_log_error(APLOG_MARK, APLOG_DEBUG, rc, NULL,
+                   "disk_cache: Failed to delete headers file %s from cache.",
+                         dobj->hdrsfile);
+            return DECLINED;
+        }
+    }
+
+     /* Delete data file */
+    if (dobj->datafile) {
+        ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, NULL,
+                     "disk_cache: Deleting %s from cache.", dobj->datafile);
+ 
+        rc = apr_file_remove(dobj->datafile, p);
+        if ((rc != APR_SUCCESS) && (rc != APR_ENOENT)) {
+            /* Will only result in an output if httpd is started with -e debug.
+             * For reason see log_error_core for the case s == NULL.
+             */
+            ap_log_error(APLOG_MARK, APLOG_DEBUG, rc, NULL,
+                      "disk_cache: Failed to delete data file %s from cache.",
+                         dobj->datafile);
+            return DECLINED;
+        }
+    }
+
     return OK;
 }
 
Index: modules/cache/cache_storage.c
===================================================================
--- modules/cache/cache_storage.c       (revision 230717)
+++ modules/cache/cache_storage.c       (working copy)
@@ -28,24 +28,28 @@
  * delete all URL entities from the cache
  *
  */
-int cache_remove_url(request_rec *r, char *url)
+int cache_remove_url(cache_request_rec *cache, apr_pool_t *p)
 {
     cache_provider_list *list;
     apr_status_t rv;
+    cache_handle_t *h;
+    
     char *key;
-    cache_request_rec *cache = (cache_request_rec *) 
-                         ap_get_module_config(r->request_config, 
&cache_module);
 
-    rv = cache_generate_key(r,r->pool,&key);
-    if (rv != APR_SUCCESS) {
-        return rv;
+    list = cache->providers;
+    
+    /* Remove the stale cache entry if present. If not, we're
+     * being called from outside of a request; remove the 
+     * non-stalle handle.
+     */
+    h = cache->stale_handle ? cache->stale_handle : cache->handle;
+    if (!h) {
+       return OK;
     }
 
-    list = cache->providers;
-
     /* for each specified cache type, delete the URL */
-    while(list) {
-        list->provider->remove_url(key);
+    while(list) {    
+        list->provider->remove_url(h, p);
         list = list->next;
     }
     return OK;

Reply via email to