Hello,
The attached patch adds reply_from_cache and reply_to_cache
squid.conf directives to control caching of responses using response info.
The reply_from_cache directive can prevent serving of HITs while
reply_to_cache can prevent storage of MISSes. The two can be combined or
used independently.
As you know, the existing "cache" directive does both at the same time.
However, the "cache" directive is checked before Squid has access to the
response and, hence, could not use response-based ACLs such as
http_status. Response-based ACLs may be essential when fine-tuning
caching. Squid Bug 3937 (StoreID can lead to 302 infinite loop) is a
good use case.
The patch also updates old "cache" directive documentation to provide
more information, to help folks distinguish the three related
directives, and to polish for clarity.
Caution: reply_from_cache is one more case that can trigger bug 3480
segfaults.
Thank you,
Alex.
Added reply_from_cache and reply_to_cache squid.conf directives
to control caching of responses using response info.
The existing "cache" directive is checked before Squid has access to the
response and, hence, could not use response-based ACLs such as http_status.
Response-based ACLs may be essential when fine-tuning caching. Squid Bug 3937
(StoreID can lead to 302 infinite loop) is a good use case.
Updated old "cache" directive documentation to provide more information, to
help folks distinguish the three related directives, and to polish for
clarity.
=== modified file 'src/Server.cc'
--- src/Server.cc 2013-07-25 07:21:54 +0000
+++ src/Server.cc 2013-10-11 22:01:11 +0000
@@ -14,40 +14,41 @@
* incorporates software developed and/or copyrighted by other
* sources; see the CREDITS file for full details.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.
*
*/
#include "squid.h"
+#include "acl/FilledChecklist.h"
#include "acl/Gadgets.h"
#include "base/TextException.h"
#include "comm/Connection.h"
#include "comm/forward.h"
#include "comm/Write.h"
#include "fd.h"
#include "err_detail_type.h"
#include "errorpage.h"
#include "HttpReply.h"
#include "HttpRequest.h"
#include "Server.h"
#include "SquidTime.h"
#include "StatCounters.h"
#include "Store.h"
#include "tools.h"
#include "URL.h"
#if USE_ADAPTATION
#include "adaptation/AccessCheck.h"
#include "adaptation/Answer.h"
@@ -156,40 +157,42 @@ ServerStateData::setVirginReply(HttpRepl
HttpReply *
ServerStateData::finalReply()
{
assert(theFinalReply);
return theFinalReply;
}
HttpReply *
ServerStateData::setFinalReply(HttpReply *rep)
{
debugs(11,5, HERE << this << " setting final reply to " << rep);
assert(!theFinalReply);
assert(rep);
theFinalReply = rep;
HTTPMSGLOCK(theFinalReply);
// give entry the reply because haveParsedReplyHeaders() expects it there
entry->replaceHttpReply(theFinalReply, false); // but do not write yet
haveParsedReplyHeaders(); // update the entry/reply (e.g., set timestamps)
+ if (EBIT_TEST(entry->flags, ENTRY_CACHABLE) && blockCaching())
+ entry->release();
entry->startWriting(); // write the updated entry to store
return theFinalReply;
}
// called when no more server communication is expected; may quit
void
ServerStateData::serverComplete()
{
debugs(11,5,HERE << "serverComplete " << this);
if (!doneWithServer()) {
closeServer();
assert(doneWithServer());
}
completed = true;
HttpRequest *r = originalRequest();
r->hier.total_response_time = r->hier.first_conn_start.tv_sec ?
@@ -510,40 +513,58 @@ ServerStateData::maybePurgeOthers()
// and probably only if the response was successful
if (theFinalReply->sline.status() >= 400)
return;
// XXX: should we use originalRequest() here?
const char *reqUrl = urlCanonical(request);
debugs(88, 5, "maybe purging due to " << RequestMethodStr(request->method) << ' ' << reqUrl);
purgeEntriesByUrl(request, reqUrl);
purgeEntriesByHeader(request, reqUrl, theFinalReply, HDR_LOCATION);
purgeEntriesByHeader(request, reqUrl, theFinalReply, HDR_CONTENT_LOCATION);
}
/// called when we have final (possibly adapted) reply headers; kids extend
void
ServerStateData::haveParsedReplyHeaders()
{
Must(theFinalReply);
maybePurgeOthers();
}
+/// whether to prevent caching of an otherwise cachable response
+bool
+ServerStateData::blockCaching()
+{
+ if (const Acl::Tree *acl = Config.accessList.replyToCache) {
+ // This relatively expensive check is not in StoreEntry::checkCachable:
+ // That method lacks HttpRequest and may be called too many times.
+ ACLFilledChecklist ch(acl, originalRequest(), NULL);
+ ch.reply = const_cast<HttpReply*>(entry->getReply()); // ACLFilledChecklist API bug
+ HTTPMSGLOCK(ch.reply);
+ if (ch.fastCheck() != ACCESS_ALLOWED) { // when in doubt, block
+ debugs(20, 3, "reply_to_cache prohibits caching");
+ return true;
+ }
+ }
+ return false;
+}
+
HttpRequest *
ServerStateData::originalRequest()
{
return request;
}
#if USE_ADAPTATION
/// Initiate an asynchronous adaptation transaction which will call us back.
void
ServerStateData::startAdaptation(const Adaptation::ServiceGroupPointer &group, HttpRequest *cause)
{
debugs(11, 5, "ServerStateData::startAdaptation() called");
// check whether we should be sending a body as well
// start body pipe to feed ICAP transaction if needed
assert(!virginBodyDestination);
HttpReply *vrep = virginReply();
assert(!vrep->body_pipe);
int64_t size = 0;
if (vrep->expectingBody(cause->method, size) && size) {
virginBodyDestination = new BodyPipe(this);
=== modified file 'src/Server.h'
--- src/Server.h 2013-06-07 04:35:25 +0000
+++ src/Server.h 2013-10-11 19:56:46 +0000
@@ -114,40 +114,42 @@ protected:
virtual void completeForwarding(); /**< default calls fwd->complete() */
// BodyConsumer for HTTP: consume request body.
bool startRequestBodyFlow();
void handleMoreRequestBodyAvailable();
void handleRequestBodyProductionEnded();
virtual void handleRequestBodyProducerAborted() = 0;
// sending of the request body to the server
void sendMoreRequestBody();
// has body; kids overwrite to increment I/O stats counters
virtual void sentRequestBody(const CommIoCbParams &io) = 0;
virtual void doneSendingRequestBody() = 0;
virtual void closeServer() = 0; /**< end communication with the server */
virtual bool doneWithServer() const = 0; /**< did we end communication? */
/// Entry-dependent callbacks use this check to quit if the entry went bad
bool abortOnBadEntry(const char *abortReason);
+ bool blockCaching();
+
#if USE_ADAPTATION
void startAdaptation(const Adaptation::ServiceGroupPointer &group, HttpRequest *cause);
void adaptVirginReplyBody(const char *buf, ssize_t len);
void cleanAdaptation();
virtual bool doneWithAdaptation() const; /**< did we end ICAP communication? */
// BodyConsumer for ICAP: consume adapted response body.
void handleMoreAdaptedBodyAvailable();
void handleAdaptedBodyProductionEnded();
void handleAdaptedBodyProducerAborted();
void handleAdaptedHeader(HttpMsg *msg);
void handleAdaptationCompleted();
void handleAdaptationBlocked(const Adaptation::Answer &answer);
void handleAdaptationAborted(bool bypassable = false);
/// called by StoreEntry when it has more buffer space available
void resumeBodyStorage();
/// called when the entire adapted response body is consumed
void endAdaptedBodyConsumption();
=== modified file 'src/SquidConfig.h'
--- src/SquidConfig.h 2013-08-01 20:45:56 +0000
+++ src/SquidConfig.h 2013-10-11 17:38:50 +0000
@@ -357,40 +357,42 @@ public:
int client_dst_passthru;
int dns_mdns;
} onoff;
int pipeline_max_prefetch;
int forward_max_tries;
int connect_retries;
class ACL *aclList;
struct {
acl_access *http;
acl_access *adapted_http;
acl_access *icp;
acl_access *miss;
acl_access *NeverDirect;
acl_access *AlwaysDirect;
acl_access *ASlists;
acl_access *noCache;
+ acl_access *replyFromCache;
+ acl_access *replyToCache;
acl_access *stats_collection;
#if SQUID_SNMP
acl_access *snmp;
#endif
#if USE_HTTP_VIOLATIONS
acl_access *brokenPosts;
#endif
acl_access *redirector;
acl_access *store_id;
acl_access *reply;
AclAddress *outgoing_address;
#if USE_HTCP
acl_access *htcp;
acl_access *htcp_clr;
#endif
#if USE_SSL
acl_access *ssl_bump;
=== modified file 'src/cf.data.pre'
--- src/cf.data.pre 2013-08-29 09:21:53 +0000
+++ src/cf.data.pre 2013-10-11 21:55:43 +0000
@@ -4793,54 +4793,124 @@ LOC: Config.onoff.store_id_bypass
DEFAULT: on
DOC_START
When this is 'on', a request will not go through the
helper if all helpers are busy. If this is 'off'
and the helper queue grows too large, Squid will exit
with a FATAL error and ask you to increase the number of
helpers. You should only enable this if the helperss
are not critical to your caching system. If you use
helpers for critical caching components, and you enable this
option, users may not get objects from cache.
DOC_END
COMMENT_START
OPTIONS FOR TUNING THE CACHE
-----------------------------------------------------------------------------
COMMENT_END
NAME: cache no_cache
TYPE: acl_access
DEFAULT: none
-DEFAULT_DOC: Allow caching, unless rules exist in squid.conf.
+DEFAULT_DOC: By default, this directive is unused and has no effect.
LOC: Config.accessList.noCache
DOC_START
- A list of ACL elements which, if matched and denied, cause the request to
- not be satisfied from the cache and the reply to not be cached.
- In other words, use this to force certain objects to never be cached.
-
- You must use the words 'allow' or 'deny' to indicate whether items
- matching the ACL should be allowed or denied into the cache.
+ Requests denied by this directive will not be served from the cache
+ and their responses will not be stored in the cache. This directive
+ has no effect on other transactions and on already cached responses.
+
+ This and the two other similar caching directives listed below are
+ checked at different transaction processing stages, have different
+ access to response information, and affect different cache operations:
+
+ * cache: Checked before Squid makes a hit/miss determination.
+ No access to reply information!
+ Denies both serving a hit and storing a miss.
+ * reply_from_cache: Checked after a hit was detected.
+ Has access to reply (hit) information.
+ Denies serving a hit only.
+ * reply_to_cache: Checked before storing a cachable miss.
+ Has access to reply (miss) information.
+ Denies storing a miss only.
+
+ If you are not sure which of the three directives to use, apply the
+ following rule of thumb:
+
+ * If your directive ACLs need no response info, use "cache". Otherwise:
+ * If you do not want the response cached, use reply_to_cache; and/or
+ * if you do not want a hit on a cached response, use reply_from_cache.
This clause supports both fast and slow acl types.
See http://wiki.squid-cache.org/SquidFaq/SquidAcl for details.
DOC_END
+NAME: reply_from_cache
+TYPE: acl_access
+DEFAULT: none
+DEFAULT_DOC: By default, this directive is unused and has no effect.
+LOC: Config.accessList.replyFromCache
+DOC_START
+ Responses denied by this directive will not be served from the cache
+ (but may still be cached, see reply_to_cache). This directive has no
+ effect on the responses it allows and on the cached objects.
+
+ Please see the "cache" directive for a summary of differences among
+ reply_to_cache, reply_from_cache, and cache directives.
+
+ Unlike the "cache" directive, reply_from_cache only supports fast acl
+ types. See http://wiki.squid-cache.org/SquidFaq/SquidAcl for details.
+
+ For example:
+
+ # apply custom Store ID mapping to some URLs
+ acl MapMe dstdomain .c.example.com
+ store_id_program ...
+ store_id_access allow MapMe
+
+ # but prevent caching of special responses
+ # such as 302 redirects that cause StoreID loops
+ acl Ordinary http_status 200-299
+ reply_to_cache deny MapMe !Ordinary
+
+ # and do not serve any previously stored special responses
+ # from the cache (in case they were already cached before
+ # the above reply_to_cache rule was in effect).
+ reply_from_cache deny MapMe !Ordinary
+DOC_END
+
+NAME: reply_to_cache
+TYPE: acl_access
+DEFAULT: none
+DEFAULT_DOC: By default, this directive is unused and has no effect.
+LOC: Config.accessList.replyToCache
+DOC_START
+ Responses denied by this directive will not be cached (but may still
+ be served from the cache, see reply_from_cache). This directive has no
+ effect on the responses it allows and on the already cached responses.
+
+ Please see the "cache" directive for a summary of differences among
+ reply_to_cache, reply_from_cache, and cache directives. See the
+ reply_from_cache directive for a usage example.
+
+ Unlike the "cache" directive, reply_to_cache only supports fast acl
+ types. See http://wiki.squid-cache.org/SquidFaq/SquidAcl for details.
+DOC_END
+
NAME: max_stale
COMMENT: time-units
TYPE: time_t
LOC: Config.maxStale
DEFAULT: 1 week
DOC_START
This option puts an upper limit on how stale content Squid
will serve from the cache if cache validation fails.
Can be overriden by the refresh_pattern max-stale option.
DOC_END
NAME: refresh_pattern
TYPE: refreshpattern
LOC: Config.Refresh
DEFAULT: none
DOC_START
usage: refresh_pattern [-i] regex min percent max [options]
By default, regular expressions are CASE-SENSITIVE. To make
them case-insensitive, use the -i option.
=== modified file 'src/client_side_reply.cc'
--- src/client_side_reply.cc 2013-07-15 07:49:43 +0000
+++ src/client_side_reply.cc 2013-10-11 22:01:45 +0000
@@ -517,40 +517,45 @@ clientReplyContext::cacheHit(StoreIOBuff
case VARY_CANCEL:
/* varyEvaluateMatch found a object loop. Process as miss */
debugs(88, DBG_IMPORTANT, "clientProcessHit: Vary object loop!");
processMiss();
return;
}
if (r->method == Http::METHOD_PURGE) {
removeClientStoreReference(&sc, http);
e = NULL;
purgeRequest();
return;
}
if (e->checkNegativeHit()
&& !r->flags.noCacheHack()
) {
http->logType = LOG_TCP_NEGATIVE_HIT;
sendMoreData(result);
+ } else if (blockedHit()) {
+ debugs(88, 5, "reply_from_cache forces a MISS");
+ http->logType = LOG_TCP_MISS;
+ processMiss();
+ return;
} else if (!http->flags.internal && refreshCheckHTTP(e, r)) {
debugs(88, 5, "clientCacheHit: in refreshCheck() block");
/*
* We hold a stale copy; it needs to be validated
*/
/*
* The 'needValidation' flag is used to prevent forwarding
* loops between siblings. If our copy of the object is stale,
* then we should probably only use parents for the validation
* request. Otherwise two siblings could generate a loop if
* both have a stale version of the object.
*/
r->flags.needValidation = true;
if (e->lastmod < 0) {
debugs(88, 3, "validate HIT object? NO. Missing Last-Modified header. Do MISS.");
/*
* Previous reply didn't have a Last-Modified header,
* we cannot revalidate it.
*/
@@ -745,40 +750,64 @@ clientReplyContext::processConditional(S
if (r.flags.ims) {
// handle If-Modified-Since requests from the client
if (e->modifiedSince(&r)) {
http->logType = LOG_TCP_IMS_HIT;
sendMoreData(result);
return;
}
if (matchedIfNoneMatch) {
// If-None-Match matched, reply with 304 Not Modified or
// 412 Precondition Failed
sendNotModifiedOrPreconditionFailedError();
return;
}
// otherwise reply with 304 Not Modified
sendNotModified();
}
}
+/// whether squid.conf reply_from_cache prevents us from serving this hit
+bool
+clientReplyContext::blockedHit() const
+{
+ if (!Config.accessList.replyFromCache)
+ return false; // no hits are blocked by default
+
+ if (http->flags.internal)
+ return false; // internal content "hits" cannot be blocked
+
+ if (const HttpReply *rep = http->storeEntry()->getReply()) {
+ std::auto_ptr<ACLFilledChecklist> chl(clientAclChecklistCreate(Config.accessList.replyFromCache, http));
+ chl->reply = const_cast<HttpReply*>(rep); // ACLChecklist API bug
+ HTTPMSGLOCK(chl->reply);
+ return chl->fastCheck() != ACCESS_ALLOWED; // when in doubt, block
+ }
+
+ // This does not happen, I hope, because we are called from CacheHit, which
+ // is called via a storeClientCopy() callback, and store should initialize
+ // the reply before calling that callback.
+ debugs(88, 3, "Missing reply!");
+ return false;
+}
+
void
clientReplyContext::purgeRequestFindObjectToPurge()
{
/* Try to find a base entry */
http->flags.purging = true;
lookingforstore = 1;
// TODO: can we use purgeAllCached() here instead of doing the
// getPublicByRequestMethod() dance?
StoreEntry::getPublicByRequestMethod(this, http->request, Http::METHOD_GET);
}
// Purges all entries with a given url
// TODO: move to SideAgent parent, when we have one
/*
* We probably cannot purge Vary-affected responses because their MD5
* keys depend on vary headers.
*/
void
purgeEntriesByUrl(HttpRequest * req, const char *url)
=== modified file 'src/client_side_reply.h'
--- src/client_side_reply.h 2013-06-27 15:58:46 +0000
+++ src/client_side_reply.h 2013-10-11 16:31:21 +0000
@@ -123,34 +123,35 @@ private:
void sendStreamError(StoreIOBuffer const &result);
void pushStreamData(StoreIOBuffer const &result, char *source);
clientStreamNode * next() const;
StoreIOBuffer holdingBuffer;
HttpReply *reply;
void processReplyAccess();
static ACLCB ProcessReplyAccessResult;
void processReplyAccessResult(const allow_t &accessAllowed);
void cloneReply();
void buildReplyHeader ();
bool alwaysAllowResponse(Http::StatusCode sline) const;
int checkTransferDone();
void processOnlyIfCachedMiss();
void processConditional(StoreIOBuffer &result);
void cacheHit(StoreIOBuffer result);
void handleIMSReply(StoreIOBuffer result);
void sendMoreData(StoreIOBuffer result);
void triggerInitialStoreRead();
void sendClientOldEntry();
void purgeAllCached();
+ bool blockedHit() const;
void sendBodyTooLargeError();
void sendPreconditionFailedError();
void sendNotModified();
void sendNotModifiedOrPreconditionFailedError();
StoreEntry *old_entry;
store_client *old_sc; /* ... for entry to be validated */
bool deleting;
CBDATA_CLASS2(clientReplyContext);
};
#endif /* SQUID_CLIENTSIDEREPLY_H */