This patch sends an If-None-Match  request, when we need to re-validate
if a cached object which has a strong ETag is still valid.

This is also done in the cases an HTTP client request contains HTTP
headers prohibiting a from-cache response (i.e., a "reload" request).
The use of If-None-Match request in this context violates RFC 2616 and
requires using reload-into-ims option within refresh_pattern squid.conf
directive.

The exact definition of a "reload request" and the adjustment/removal of
"reload" headers is the same as currently used for reload-into-ims
option support. This patch is not modifying that code/logic, just adding
an If-None-Match header in addition to the IMS header that Squid already
adds.

This is a Measurement Factory Project
=== modified file 'src/HttpRequest.cc'
--- src/HttpRequest.cc	2013-06-03 14:05:16 +0000
+++ src/HttpRequest.cc	2013-06-26 14:07:00 +0000
@@ -160,40 +160,42 @@
     }
 
     if (range) {
         delete range;
         range = NULL;
     }
 
     myportname.clean();
 
     notes = NULL;
 
     tag.clean();
 #if USE_AUTH
     extacl_user.clean();
     extacl_passwd.clean();
 #endif
     extacl_log.clean();
 
     extacl_message.clean();
 
+    etag.clean();
+
 #if USE_ADAPTATION
     adaptHistory_ = NULL;
 #endif
 #if ICAP_CLIENT
     icapHistory_ = NULL;
 #endif
 }
 
 void
 HttpRequest::reset()
 {
     clean();
     init();
 }
 
 HttpRequest *
 HttpRequest::clone() const
 {
     HttpRequest *copy = new HttpRequest(method, protocol, urlpath.termedBuf());
     // TODO: move common cloning clone to Msg::copyTo() or copy ctor
@@ -205,40 +207,41 @@
     copy->body_pipe = body_pipe;
 
     strncpy(copy->login, login, sizeof(login)); // MAX_LOGIN_SZ
     strncpy(copy->host, host, sizeof(host)); // SQUIDHOSTNAMELEN
     copy->host_addr = host_addr;
 
     copy->port = port;
     // urlPath handled in ctor
     copy->canonical = canonical ? xstrdup(canonical) : NULL;
 
     // range handled in hdrCacheInit()
     copy->ims = ims;
     copy->imslen = imslen;
     copy->hier = hier; // Is it safe to copy? Should we?
 
     copy->errType = errType;
 
     // XXX: what to do with copy->peer_login?
 
     copy->lastmod = lastmod;
+    copy->etag = etag;
     copy->vary_headers = vary_headers ? xstrdup(vary_headers) : NULL;
     // XXX: what to do with copy->peer_domain?
 
     copy->myportname = myportname;
     copy->tag = tag;
 #if USE_AUTH
     copy->extacl_user = extacl_user;
     copy->extacl_passwd = extacl_passwd;
 #endif
     copy->extacl_log = extacl_log;
     copy->extacl_message = extacl_message;
 
     assert(copy->inheritProperties(this));
 
     return copy;
 }
 
 bool
 HttpRequest::inheritProperties(const HttpMsg *aMsg)
 {

=== modified file 'src/HttpRequest.h'
--- src/HttpRequest.h	2013-06-03 14:05:16 +0000
+++ src/HttpRequest.h	2013-06-26 14:07:00 +0000
@@ -202,40 +202,43 @@
     char *peer_domain;		/* Configured peer forceddomain */
 
     String myportname; // Internal tag name= value from port this requests arrived in.
 
     NotePairs::Pointer notes; ///< annotations added by the note directive and helpers
 
     String tag;			/* Internal tag for this request */
 
     String extacl_user;		/* User name returned by extacl lookup */
 
     String extacl_passwd;	/* Password returned by extacl lookup */
 
     String extacl_log;		/* String to be used for access.log purposes */
 
     String extacl_message;	/* String to be used for error page purposes */
 
 #if FOLLOW_X_FORWARDED_FOR
     String x_forwarded_for_iterator; /* XXX a list of IP addresses */
 #endif /* FOLLOW_X_FORWARDED_FOR */
 
+    /// A strong etag of the cached entry. Used for refreshing that entry.
+    String etag;
+
 public:
     bool multipartRangeRequest() const;
 
     bool parseFirstLine(const char *start, const char *end);
 
     int parseHeader(const char *parse_start, int len);
 
     virtual bool expectingBody(const HttpRequestMethod& unused, int64_t&) const;
 
     bool bodyNibbled() const; // the request has a [partially] consumed body
 
     int prefixLen();
 
     void swapOut(StoreEntry * e);
 
     void pack(Packer * p);
 
     static void httpRequestPack(void *obj, Packer *p);
 
     static HttpRequest * CreateFromUrlAndMethod(char * url, const HttpRequestMethod& method);

=== modified file 'src/Store.h'
--- src/Store.h	2012-10-29 04:59:58 +0000
+++ src/Store.h	2013-06-26 14:07:00 +0000
@@ -123,40 +123,42 @@
     void hideMemObject(); ///< no mem_obj for callers until createMemObject
     void dump(int debug_lvl) const;
     void hashDelete();
     void hashInsert(const cache_key *);
     void registerAbort(STABH * cb, void *);
     void reset();
     void setMemStatus(mem_status_t);
     void timestampsSet();
     void unregisterAbort();
     void destroyMemObject();
     int checkTooSmall();
 
     void delayAwareRead(const Comm::ConnectionPointer &conn, char *buf, int len, AsyncCall::Pointer callback);
 
     void setNoDelay (bool const);
     bool modifiedSince(HttpRequest * request) const;
     /// has ETag matching at least one of the If-Match etags
     bool hasIfMatchEtag(const HttpRequest &request) const;
     /// has ETag matching at least one of the If-None-Match etags
     bool hasIfNoneMatchEtag(const HttpRequest &request) const;
+    /// whether this entry has an ETag; if yes, puts ETag value into parameter
+    bool hasEtag(ETag &etag) const;
 
     /** What store does this entry belong too ? */
     virtual RefCount<SwapDir> store() const;
 
     MemObject *mem_obj;
     MemObject *hidden_mem_obj; ///< mem_obj created before URLs were known
     RemovalPolicyNode repl;
     /* START OF ON-DISK STORE_META_STD TLV field */
     time_t timestamp;
     time_t lastref;
     time_t expires;
     time_t lastmod;
     uint64_t swap_file_sz;
     uint16_t refcount;
     uint16_t flags;
     /* END OF ON-DISK STORE_META_STD */
 
     /// unique ID inside a cache_dir for swapped out entries; -1 for others
     sfileno swap_filen:25; // keep in sync with SwapFilenMax
 

=== modified file 'src/cf.data.pre'
--- src/cf.data.pre	2013-06-18 06:22:13 +0000
+++ src/cf.data.pre	2013-06-26 14:07:01 +0000
@@ -4844,44 +4844,46 @@
 		 ignore-private
 		 ignore-auth
 		 max-stale=NN
 		 refresh-ims
 		 store-stale
 
 		override-expire enforces min age even if the server
 		sent an explicit expiry time (e.g., with the
 		Expires: header or Cache-Control: max-age). Doing this
 		VIOLATES the HTTP standard.  Enabling this feature
 		could make you liable for problems which it causes.
 
 		Note: override-expire does not enforce staleness - it only extends
 		freshness / min. If the server returns a Expires time which
 		is longer than your max time, Squid will still consider
 		the object fresh for that period of time.
 
 		override-lastmod enforces min age even on objects
 		that were modified recently.
 
-		reload-into-ims changes client no-cache or ``reload''
-		to If-Modified-Since requests. Doing this VIOLATES the
-		HTTP standard. Enabling this feature could make you
-		liable for problems which it causes.
+		reload-into-ims changes a client no-cache or ``reload''
+		request for a cached entry into a conditional request using
+		If-Modified-Since and/or If-None-Match headers, provided the
+		cached entry has a Last-Modified and/or a strong ETag header.
+		Doing this VIOLATES the HTTP standard. Enabling this feature
+		could make you liable for problems which it causes.
 
 		ignore-reload ignores a client no-cache or ``reload''
 		header. Doing this VIOLATES the HTTP standard. Enabling
 		this feature could make you liable for problems which
 		it causes.
 
 		ignore-no-store ignores any ``Cache-control: no-store''
 		headers received from a server. Doing this VIOLATES
 		the HTTP standard. Enabling this feature could make you
 		liable for problems which it causes.
 
 		ignore-must-revalidate ignores any ``Cache-Control: must-revalidate``
 		headers received from a server. Doing this VIOLATES
 		the HTTP standard. Enabling this feature could make you
 		liable for problems which it causes.
 
 		ignore-private ignores any ``Cache-control: private''
 		headers received from a server. Doing this VIOLATES
 		the HTTP standard. Enabling this feature could make you
 		liable for problems which it causes.

=== modified file 'src/client_side_reply.cc'
--- src/client_side_reply.cc	2013-06-07 04:35:25 +0000
+++ src/client_side_reply.cc	2013-06-26 14:07:01 +0000
@@ -18,40 +18,41 @@
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, write to the Free Software
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
  *
  */
 #include "squid.h"
 #include "acl/FilledChecklist.h"
 #include "acl/Gadgets.h"
 #include "anyp/PortCfg.h"
 #include "client_side_reply.h"
 #include "errorpage.h"
+#include "ETag.h"
 #include "fd.h"
 #include "fde.h"
 #include "format/Token.h"
 #include "FwdState.h"
 #include "globals.h"
 #include "globals.h"
 #include "HttpHeaderTools.h"
 #include "HttpReply.h"
 #include "HttpRequest.h"
 #include "ip/QosConfig.h"
 #include "ipcache.h"
 #include "log/access_log.h"
 #include "MemObject.h"
 #include "mime_header.h"
 #include "neighbors.h"
 #include "refresh.h"
 #include "RequestFlags.h"
 #include "SquidConfig.h"
 #include "SquidTime.h"
 #include "Store.h"
@@ -270,40 +271,47 @@
     }
 
     http->request->flags.refresh = true;
 #if STORE_CLIENT_LIST_DEBUG
     /* Prevent a race with the store client memory free routines
      */
     assert(storeClientIsThisAClient(sc, this));
 #endif
     /* Prepare to make a new temporary request */
     saveState();
     entry = storeCreateEntry(url,
                              http->log_uri, http->request->flags, http->request->method);
     /* NOTE, don't call StoreEntry->lock(), storeCreateEntry() does it */
     sc = storeClientListAdd(entry, this);
 #if USE_DELAY_POOLS
     /* delay_id is already set on original store client */
     sc->setDelayId(DelayId::DelayClient(http));
 #endif
 
     http->request->lastmod = old_entry->lastmod;
+
+    if (!http->request->header.has(HDR_IF_NONE_MATCH)) {
+        ETag etag = {NULL, -1}; // TODO: make that a default ETag constructor
+        if (old_entry->hasEtag(etag) && !etag.weak)
+            http->request->etag = etag.str;
+    }
+
     debugs(88, 5, "clientReplyContext::processExpired : lastmod " << entry->lastmod );
     http->storeEntry(entry);
     assert(http->out.offset == 0);
     assert(http->request->clientConnectionManager == http->getConn());
 
     /*
      * A refcounted pointer so that FwdState stays around as long as
      * this clientReplyContext does
      */
     Comm::ConnectionPointer conn = http->getConn() != NULL ? http->getConn()->clientConnection : NULL;
     FwdState::Start(conn, http->storeEntry(), http->request, http->al);
 
     /* Register with storage manager to receive updates when data comes in. */
 
     if (EBIT_TEST(entry->flags, ENTRY_ABORTED))
         debugs(88, DBG_CRITICAL, "clientReplyContext::processExpired: Found ENTRY_ABORTED object");
 
     {
         /* start counting the length from 0 */
         StoreIOBuffer localTempBuffer(HTTP_REQBUF_SZ, 0, tempbuf);
@@ -539,42 +547,43 @@
 
     if (e->checkNegativeHit()
             && !r->flags.noCacheHack()
        ) {
         http->logType = LOG_TCP_NEGATIVE_HIT;
         sendMoreData(result);
     } else if (!http->flags.internal && refreshCheckHTTP(e, r)) {
         debugs(88, 5, "clientCacheHit: in refreshCheck() block");
         /*
          * We hold a stale copy; it needs to be validated
          */
         /*
          * The 'needValidation' flag is used to prevent forwarding
          * loops between siblings.  If our copy of the object is stale,
          * then we should probably only use parents for the validation
          * request.  Otherwise two siblings could generate a loop if
          * both have a stale version of the object.
          */
         r->flags.needValidation = true;
 
-        if (e->lastmod < 0) {
-            debugs(88, 3, "validate HIT object? NO. Missing Last-Modified header. Do MISS.");
+        ETag etag = {NULL, -1}; // TODO: make that a default ETag constructor
+        if (e->lastmod < 0 && (!e->hasEtag(etag) || etag.weak)) {
+            debugs(88, 3, "validate HIT object? NO. Missing Last-Modified or strong ETag header. Do MISS.");
             /*
              * Previous reply didn't have a Last-Modified header,
              * we cannot revalidate it.
              */
             http->logType = LOG_TCP_MISS;
             processMiss();
         } else if (r->flags.noCache) {
             debugs(88, 3, "validate HIT object? NO. Client sent CC:no-cache. Do CLIENT_REFRESH_MISS");
             /*
              * This did not match a refresh pattern that overrides no-cache
              * we should honour the client no-cache header.
              */
             http->logType = LOG_TCP_CLIENT_REFRESH_MISS;
             processMiss();
         } else if (r->protocol == AnyP::PROTO_HTTP) {
             debugs(88, 3, "validate HIT object? YES.");
             /*
              * Object needs to be revalidated
              * XXX This could apply to FTP as well, if Last-Modified is known.
              */

=== modified file 'src/http.cc'
--- src/http.cc	2013-06-03 14:05:16 +0000
+++ src/http.cc	2013-06-26 14:07:01 +0000
@@ -1686,44 +1686,51 @@
  * build request headers and append them to a given MemBuf
  * used by buildRequestPrefix()
  * note: initialised the HttpHeader, the caller is responsible for Clean()-ing
  */
 void
 HttpStateData::httpBuildRequestHeader(HttpRequest * request,
                                       StoreEntry * entry,
                                       const AccessLogEntryPointer &al,
                                       HttpHeader * hdr_out,
                                       const HttpStateFlags &flags)
 {
     /* building buffer for complex strings */
 #define BBUF_SZ (MAX_URL+32)
     LOCAL_ARRAY(char, bbuf, BBUF_SZ);
     LOCAL_ARRAY(char, ntoabuf, MAX_IPSTRLEN);
     const HttpHeader *hdr_in = &request->header;
     const HttpHeaderEntry *e = NULL;
     HttpHeaderPos pos = HttpHeaderInitPos;
     assert (hdr_out->owner == hoRequest);
 
-    /* append our IMS header */
+    /* use our IMS header if the cached entry has Last-Modified time */
     if (request->lastmod > -1)
         hdr_out->putTime(HDR_IF_MODIFIED_SINCE, request->lastmod);
 
+    // Add our own If-None-Match field if the cached entry has a strong ETag.
+    // copyOneHeaderFromClientsideRequestToUpstreamRequest() adds client ones.
+    if (request->etag.defined()) {
+        hdr_out->addEntry(new HttpHeaderEntry(HDR_IF_NONE_MATCH, NULL,
+                          request->etag.termedBuf()));
+    }
+
     bool we_do_ranges = decideIfWeDoRanges (request);
 
     String strConnection (hdr_in->getList(HDR_CONNECTION));
 
     while ((e = hdr_in->getEntry(&pos)))
         copyOneHeaderFromClientsideRequestToUpstreamRequest(e, strConnection, request, hdr_out, we_do_ranges, flags);
 
     /* Abstraction break: We should interpret multipart/byterange responses
      * into offset-length data, and this works around our inability to do so.
      */
     if (!we_do_ranges && request->multipartRangeRequest()) {
         /* don't cache the result */
         request->flags.cachable = false;
         /* pretend it's not a range request */
         delete request->range;
         request->range = NULL;
         request->flags.isRanged = false;
     }
 
     /* append Via */

=== modified file 'src/store.cc'
--- src/store.cc	2013-03-18 04:55:51 +0000
+++ src/store.cc	2013-06-26 14:07:01 +0000
@@ -1952,40 +1952,51 @@
 
     if (mod_time > request->ims) {
         debugs(88, 3, "--> YES: entry newer than client");
         return true;
     } else if (mod_time < request->ims) {
         debugs(88, 3, "-->  NO: entry older than client");
         return false;
     } else if (request->imslen < 0) {
         debugs(88, 3, "-->  NO: same LMT, no client length");
         return false;
     } else if (request->imslen == object_length) {
         debugs(88, 3, "-->  NO: same LMT, same length");
         return false;
     } else {
         debugs(88, 3, "--> YES: same LMT, different length");
         return true;
     }
 }
 
 bool
+StoreEntry::hasEtag(ETag &etag) const
+{
+    if (const HttpReply *reply = getReply()) {
+        etag = reply->header.getETag(HDR_ETAG);
+        if (etag.str)
+            return true;
+    }
+    return false;
+}
+
+bool
 StoreEntry::hasIfMatchEtag(const HttpRequest &request) const
 {
     const String reqETags = request.header.getList(HDR_IF_MATCH);
     return hasOneOfEtags(reqETags, false);
 }
 
 bool
 StoreEntry::hasIfNoneMatchEtag(const HttpRequest &request) const
 {
     const String reqETags = request.header.getList(HDR_IF_NONE_MATCH);
     // weak comparison is allowed only for HEAD or full-body GET requests
     const bool allowWeakMatch = !request.flags.isRanged &&
                                 (request.method == Http::METHOD_GET || request.method == Http::METHOD_HEAD);
     return hasOneOfEtags(reqETags, allowWeakMatch);
 }
 
 /// whether at least one of the request ETags matches entity ETag
 bool
 StoreEntry::hasOneOfEtags(const String &reqETags, const bool allowWeakMatch) const
 {

Reply via email to