This is an automated email from the ASF dual-hosted git repository. cmcfarlen pushed a commit to branch 10.1.x in repository https://gitbox.apache.org/repos/asf/trafficserver.git
commit 517572e5d843eb0e4123f607d8d465228f67a4da Author: Jasmine Emanouel <[email protected]> AuthorDate: Fri Aug 8 10:34:18 2025 +1000 Add range request support to cache fill + global plugin support (#12391) * Add range request support to cache fill * Update cache_fill.test.py * Add config option to make cache_fill only work for range requests * Update docs * Add --cache-range-req config option Update cache_fill.en.rst Update cache_fill.en.rst Update cache_fill.en.rst * Revert test back to original wont pass in jenkins, passes locally Fix test Update cache_fill.test.py Update cache_fill.test.py Revert test back to original (cherry picked from commit 872ffa1b8c625269cdfbf16314cf5c3dd31fd33c) --- doc/admin-guide/plugins/cache_fill.en.rst | 57 ++++- .../experimental/cache_fill/background_fetch.cc | 27 +++ plugins/experimental/cache_fill/background_fetch.h | 11 + plugins/experimental/cache_fill/cache_fill.cc | 31 +++ plugins/experimental/cache_fill/configs.cc | 48 ++++- plugins/experimental/cache_fill/configs.h | 9 +- .../pluginTest/cache_fill/cache_fill.test.py | 230 +++++++++++++++++++++ 7 files changed, 396 insertions(+), 17 deletions(-) diff --git a/doc/admin-guide/plugins/cache_fill.en.rst b/doc/admin-guide/plugins/cache_fill.en.rst index 2f22c382c9..5db52a372c 100644 --- a/doc/admin-guide/plugins/cache_fill.en.rst +++ b/doc/admin-guide/plugins/cache_fill.en.rst @@ -24,20 +24,69 @@ The initial version of this plugin relays the initial request to the origin serv This plugin doesn't provide any improvement for smaller objects but could also degrade the performance as two outgoing requests for every cache update. -Using the plugin ----------------- +Configuration +------------- +This plugin functions as either a global or per remap plugin, and it takes an optional argument for +specifying a config file with inclusion or exclusion criteria. The config file can be specified both +via an absolute path or via a relative path to the install dir -This plugin functions as a per remap plugin. +To activate the plugin in global mode, in :file:`plugin.config`, simply add:: -To activate the plugin, in :file:`remap.config`, simply append the + cache_fill.so --config <config-file> + +To activate the plugin in per remap mode, in :file:`remap.config`, simply append the below to the specific remap line:: @plugin=cache_fill.so @pparam=<config-file> +include/exclude +--------------- +The plugin supports a config file that can specify exclusion or inclusion of background fetch +based on any arbitrary header or client-ip + +The contents of the config-file could be as below:: + + include User-Agent ABCDEF + exclude User-Agent * + exclude Content-Type text + exclude X-Foo-Bar text + exclude Content-Length <1000 + exclude Client-IP 127.0.0.1 + include Client-IP 10.0.0.0/16 + +The ``include`` configuration directive is only used when there is a corresponding ``exclude`` to exempt. +For example, a single line directive, ``include Host example.com`` would not make the plugin +*only* act on example.com. To achieve classic allow (only) lists, one would need to have a broad +exclude line, such as:: + + exclude Host * + include Host example.com + +range-request-only +------------------ +When set to ``true``, this plugin will only trigger a background fetch if a range header is present. +Range headers include ``Range``, ``If-Match``, ``If-Modified-Since``, ``If-None-Match``, ``If-Range`` +and ``If-Unmodified-Since``. By default, this is set to false. + +This would look like:: + + @plugin=cache_fill.so @pparam=--range-request-only + +cache-range-req +--------------- +When set to ``false``. this plugin will not trigger a background fetch for range requests. By default, +this is set to true. +Note: you cannot set this to false and ``range-request-only`` to true. + +This would look like:: + + @plugin=cache_fill.so @pparam=--cache-range-req=false + Functionality ------------- Plugin decides to trigger a background fetch of the original (Client) request if the request/response is cacheable and cache status is TS_CACHE_LOOKUP_MISS/TS_CACHE_LOOKUP_HIT_STALE. +This will work for range requests by making a background fetch and removing the range header. To disable this feature, set ``--cache-range-req=false`` Future additions ---------------- diff --git a/plugins/experimental/cache_fill/background_fetch.cc b/plugins/experimental/cache_fill/background_fetch.cc index 1881edddd1..621d9844d1 100644 --- a/plugins/experimental/cache_fill/background_fetch.cc +++ b/plugins/experimental/cache_fill/background_fetch.cc @@ -45,6 +45,27 @@ namespace cache_fill_ns DbgCtl dbg_ctl{PLUGIN_NAME}; } +/////////////////////////////////////////////////////////////////////////// +// Remove a header (fully) from an TSMLoc / TSMBuffer. Return the number +// of fields (header values) we removed. +int +remove_header(TSMBuffer bufp, TSMLoc hdr_loc, const char *header, int len) +{ + TSMLoc field = TSMimeHdrFieldFind(bufp, hdr_loc, header, len); + int cnt = 0; + + while (field) { + TSMLoc tmp = TSMimeHdrFieldNextDup(bufp, hdr_loc, field); + + ++cnt; + TSMimeHdrFieldDestroy(bufp, hdr_loc, field); + TSHandleMLocRelease(bufp, hdr_loc, field); + field = tmp; + } + + return cnt; +} + /////////////////////////////////////////////////////////////////////////// // Set a header to a specific value. This will avoid going to through a // remove / add sequence in case of an existing header. @@ -176,6 +197,12 @@ BgFetchData::initialize(TSMBuffer request, TSMLoc req_hdr, TSHttpTxn txnp) if (set_header(mbuf, hdr_loc, TS_MIME_FIELD_HOST, TS_MIME_LEN_HOST, hostp, len)) { Dbg(dbg_ctl, "Set header Host: %.*s", len, hostp); } + // Next, remove the Range headers and IMS (conditional) headers from the request + for (auto const &header : FILTER_HEADERS) { + if (remove_header(mbuf, hdr_loc, header.data(), header.size()) > 0) { + Dbg(dbg_ctl, "Removed the %s header from request", header.data()); + } + } ret = true; } } diff --git a/plugins/experimental/cache_fill/background_fetch.h b/plugins/experimental/cache_fill/background_fetch.h index f27dbc10be..011d5e1d37 100644 --- a/plugins/experimental/cache_fill/background_fetch.h +++ b/plugins/experimental/cache_fill/background_fetch.h @@ -43,6 +43,17 @@ using OutstandingRequests = std::unordered_map<std::string, bool>; const char PLUGIN_NAME[] = "cache_fill"; +// This is the list of all headers that must be removed when we make the actual background +// fetch request for range requests. +static const std::array<const std::string_view, 6> FILTER_HEADERS{ + {{TS_MIME_FIELD_RANGE, static_cast<size_t>(TS_MIME_LEN_RANGE)}, + {TS_MIME_FIELD_IF_MATCH, static_cast<size_t>(TS_MIME_LEN_IF_MATCH)}, + {TS_MIME_FIELD_IF_MODIFIED_SINCE, static_cast<size_t>(TS_MIME_LEN_IF_MODIFIED_SINCE)}, + {TS_MIME_FIELD_IF_NONE_MATCH, static_cast<size_t>(TS_MIME_LEN_IF_NONE_MATCH)}, + {TS_MIME_FIELD_IF_RANGE, static_cast<size_t>(TS_MIME_LEN_IF_RANGE)}, + {TS_MIME_FIELD_IF_UNMODIFIED_SINCE, static_cast<size_t>(TS_MIME_LEN_IF_UNMODIFIED_SINCE)}} +}; + namespace cache_fill_ns { extern DbgCtl dbg_ctl; diff --git a/plugins/experimental/cache_fill/cache_fill.cc b/plugins/experimental/cache_fill/cache_fill.cc index ad3d90c3bf..46eec6b9b9 100644 --- a/plugins/experimental/cache_fill/cache_fill.cc +++ b/plugins/experimental/cache_fill/cache_fill.cc @@ -41,6 +41,9 @@ #include "background_fetch.h" #include "configs.h" +// Global config, if we don't have a remap specific config. +static BgFetchConfig *gConfig = nullptr; + static const char * getCacheLookupResultName(TSCacheLookupResult result) { @@ -137,6 +140,34 @@ cont_handle_cache(TSCont contp, TSEvent event, void *edata) return 0; } +/////////////////////////////////////////////////////////////////////////// +// Setup global hooks +void +TSPluginInit(int argc, const char *argv[]) +{ + TSPluginRegistrationInfo info; + + info.plugin_name = (char *)PLUGIN_NAME; + info.vendor_name = (char *)"Apache Software Foundation"; + info.support_email = (char *)"[email protected]"; + + if (TS_SUCCESS != TSPluginRegister(&info)) { + TSError("[%s] Plugin registration failed", PLUGIN_NAME); + } + + TSCont cont = TSContCreate(cont_handle_cache, nullptr); + + gConfig = new BgFetchConfig(cont); + + if (gConfig->parseOptions(argc, argv)) { + Dbg(dbg_ctl, "cache fill plugin is successfully initialized globally"); + TSHttpHookAdd(TS_HTTP_CACHE_LOOKUP_COMPLETE_HOOK, cont); + } else { + // ToDo: Hmmm, no way to fail a global plugin here? + Dbg(dbg_ctl, "Failed to initialize as global plugin"); + } +} + /////////////////////////////////////////////////////////////////////////// // Setup Remap mode /////////////////////////////////////////////////////////////////////////////// diff --git a/plugins/experimental/cache_fill/configs.cc b/plugins/experimental/cache_fill/configs.cc index 9eef083b8d..b65f0aa765 100644 --- a/plugins/experimental/cache_fill/configs.cc +++ b/plugins/experimental/cache_fill/configs.cc @@ -42,19 +42,19 @@ bool BgFetchConfig::parseOptions(int argc, const char *argv[]) { static const struct option longopt[] = { - {const_cast<char *>("log"), required_argument, nullptr, 'l' }, - {const_cast<char *>("config"), required_argument, nullptr, 'c' }, - {const_cast<char *>("allow-304"), no_argument, nullptr, 'a' }, - {nullptr, no_argument, nullptr, '\0'} + {const_cast<char *>("log"), required_argument, nullptr, 'l' }, + {const_cast<char *>("config"), required_argument, nullptr, 'c' }, + {const_cast<char *>("range-req-only"), optional_argument, nullptr, 'r' }, + {const_cast<char *>("cache-range-req"), optional_argument, nullptr, 'a' }, + {nullptr, no_argument, nullptr, '\0'}, }; while (true) { - int opt = getopt_long(argc, const_cast<char *const *>(argv), "lc", longopt, nullptr); + int opt = getopt_long(argc, const_cast<char *const *>(argv), "", longopt, nullptr); if (opt == -1) { break; } - switch (opt) { case 'l': Dbg(dbg_ctl, "option: log file specified: %s", optarg); @@ -67,9 +67,13 @@ BgFetchConfig::parseOptions(int argc, const char *argv[]) return false; } break; + case 'r': + Dbg(dbg_ctl, "option: --range-req-only set"); + _range_req_only = isTrue(optarg); + break; case 'a': - Dbg(dbg_ctl, "option: --allow-304 set"); - _allow_304 = true; + Dbg(dbg_ctl, "option: --cache-range-req set"); + _cache_range_req = isTrue(optarg); break; default: TSError("[%s] invalid plugin option: %c", PLUGIN_NAME, opt); @@ -78,6 +82,11 @@ BgFetchConfig::parseOptions(int argc, const char *argv[]) } } + if (_range_req_only && !_cache_range_req) { + TSError("[%s] Cannot define _range_req_only=true and _cache_range_req=false", PLUGIN_NAME); + return false; + } + return true; } @@ -198,8 +207,29 @@ BgFetchConfig::bgFetchAllowed(TSHttpTxn txnp) const return false; } - bool allow_bg_fetch = true; + if (_range_req_only || !_cache_range_req) { + TSMBuffer bufp; + TSMLoc hdr_loc; + if (TSHttpTxnClientReqGet(txnp, &bufp, &hdr_loc) == TS_SUCCESS) { + bool hasRangeHdrs = false; + for (auto const &header : FILTER_HEADERS) { + if (TSMimeHdrFieldFind(bufp, hdr_loc, header.data(), header.size() == TS_SUCCESS)) { + hasRangeHdrs = true; + break; + } + } + if (!hasRangeHdrs && _range_req_only) { + Dbg(dbg_ctl, "_range_req_only=true; This transaction is not a range request"); + return false; + } + if (hasRangeHdrs && !_cache_range_req) { + Dbg(dbg_ctl, "_cache_range_req=false; This transaction is a range request"); + return false; + } + } + } + bool allow_bg_fetch = true; // We could do this recursively, but following the linked list is probably more efficient. for (auto const &r : _rules) { if (r.check_field_configured(txnp)) { diff --git a/plugins/experimental/cache_fill/configs.h b/plugins/experimental/cache_fill/configs.h index 7d3304cea8..81e31b05ab 100644 --- a/plugins/experimental/cache_fill/configs.h +++ b/plugins/experimental/cache_fill/configs.h @@ -70,10 +70,10 @@ public: return _log_file; } - bool - allow304() const + static bool + isTrue(const char *arg) { - return _allow_304; + return (nullptr == arg || 0 == strncasecmp("true", arg, 4) || 0 == strncasecmp("1", arg, 1) || 0 == strncasecmp("yes", arg, 3)); } // This parses and populates the BgFetchRule linked list (_rules). @@ -84,6 +84,7 @@ public: private: TSCont _cont = nullptr; list_type _rules; - bool _allow_304 = false; + bool _range_req_only = false; + bool _cache_range_req = true; std::string _log_file; }; diff --git a/tests/gold_tests/pluginTest/cache_fill/cache_fill.test.py b/tests/gold_tests/pluginTest/cache_fill/cache_fill.test.py new file mode 100644 index 0000000000..ad7859e554 --- /dev/null +++ b/tests/gold_tests/pluginTest/cache_fill/cache_fill.test.py @@ -0,0 +1,230 @@ +''' +Test cache_fill plugin +''' +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +Test.Summary = ''' +Basic cache_fill plugin test +''' + +Test.SkipUnless( + Condition.PluginExists('cache_fill.so'), + Condition.PluginExists('xdebug.so'), +) +Test.ContinueOnFail = True +Test.testName = "cache_fill" + + +class CacheFillTest: + + def __init__(self): + self.setUpOriginServer() + self.setUpTS() + self.curl_and_args = '-s -D /dev/stdout -v -x localhost:{} -H "x-debug: x-cache,x-cache-key"'.format(self.ts.Variables.port) + + def setUpOriginServer(self): + # Define and configure origin server + self.server = Test.MakeOriginServer("server") + + req = { + "headers": "GET /nostore HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "Accept: */*" + "Range: bytes=0-4\r\n" + "\r\n", + "timestamp": "1469733493.993", + "body": "" + } + + res = { + "headers": + "HTTP/1.1 200 OK\r\n" + "Cache-Control: nostore\r\n" + "Connection: close\r\n" + 'Etag: 994324f6-78f6bc3e8d639\r\n', + "timestamp": "1469733493.993", + "body": "hello hello" + } + + self.server.addResponse("sessionlog.json", req, res) + + req = {"headers": "GET /200 HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "\r\n", "timestamp": "1469733493.993", "body": ""} + + res = { + "headers": + "HTTP/1.1 200 OK\r\n" + "Cache-Control: max-age=1\r\n" + "Connection: close\r\n" + + 'Etag: 772102f4-56f4bc1e6d417\r\n', + "timestamp": "1469733493.993", + "body": "hello hello" + } + + self.server.addResponse("sessionlog.json", req, res) + + req = { + "headers": "GET /range HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "Accept: */*" + "Range: bytes=0-4\r\n" + "\r\n", + "timestamp": "1469733493.993", + "body": "" + } + res = { + "headers": + "HTTP/1.1 200 OK\r\n" + "Cache-Control: max-age=1\r\n" + "Connection: close\r\n" + + 'Etag: 883213f5-67f5bc2e7d528\r\n', + "timestamp": "1469733493.993", + "body": "hello hello" + } + + self.server.addResponse("sessionlog.json", req, res) + + def setUpTS(self): + # Define and configure ATS + self.ts = Test.MakeATSProcess("ts") + + self.ts.Disk.remap_config.AddLines( + [ + 'map http://www.example.com/200 http://127.0.0.1:{}/200 @plugin=cache_fill.so'.format(self.server.Variables.Port), + 'map http://www.example.com/range http://127.0.0.1:{}/range @plugin=cache_fill.so'.format( + self.server.Variables.Port), + 'map http://www.example.com/nostore http://127.0.0.1:{}/nostore @plugin=cache_fill.so'.format( + self.server.Variables.Port), + 'map http://www.example.com/304 http://127.0.0.1:{}/range @plugin=cache_fill.so'.format(self.server.Variables.Port), + ]) + + self.ts.Disk.plugin_config.AddLine('xdebug.so --enable=x-cache,x-cache-key') + + self.ts.Disk.records_config.update( + { + 'proxy.config.diags.debug.enabled': 1, + 'proxy.config.diags.debug.tags': 'cache_fill|.*cache.*', + }) + + def test_cacheMiss(self): + # Cache miss; background fetch should fill cache + tr = Test.AddTestRun("Cache miss") + ps = tr.Processes.Default + ps.StartBefore(self.server, ready=When.PortOpen(self.server.Variables.Port)) + ps.StartBefore(Test.Processes.ts) + tr.MakeCurlCommand(self.curl_and_args + ' http://www.example.com/200', ts=self.ts) + ps.ReturnCode = 0 + ps.Streams.stdout.Content = Testers.ContainsExpression("X-Cache: miss", "expected cache miss") + ps.Streams.stdout.Content += Testers.ContainsExpression("200 OK", "Expected 200 OK status") + tr.StillRunningAfter = self.ts + + def test_cacheHit(self): + # Cache hit-fresh from background fill + tr = Test.AddTestRun("Cache hit-fresh") + ps = tr.Processes.Default + tr.MakeCurlCommand(self.curl_and_args + ' http://www.example.com/200', ts=self.ts) + ps.ReturnCode = 0 + ps.Streams.stdout.Content = Testers.ContainsExpression("X-Cache: hit-fresh", "expected cache hit") + ps.Streams.stdout.Content += Testers.ContainsExpression("200 OK", "Expected 200 OK status") + tr.StillRunningAfter = self.ts + + def test_rangeReq_CacheMiss(self): + # Cache miss; background fetch should fill cache + tr = Test.AddTestRun("Range cache miss") + ps = tr.Processes.Default + tr.MakeCurlCommand(self.curl_and_args + ' http://www.example.com/range -r 0-4', ts=self.ts) + ps.ReturnCode = 0 + ps.Streams.stdout.Content = Testers.ContainsExpression("X-Cache: miss", "expected cache miss") + ps.Streams.stdout.Content += Testers.ContainsExpression("200 OK", "Expected 200 status") + tr.StillRunningAfter = self.ts + + def test_rangeReq_CacheHit(self): + # Cache hit-fresh from background fill + tr = Test.AddTestRun("Range cache hit-fresh") + ps = tr.Processes.Default + tr.MakeCurlCommand(self.curl_and_args + ' http://www.example.com/range -r 0-4', ts=self.ts) + ps.ReturnCode = 0 + ps.Streams.stdout.Content = Testers.ContainsExpression("X-Cache: hit-fresh", "expected cache hit") + ps.Streams.stdout.Content += Testers.ContainsExpression("206 Partial Content", "Expected 206 status") + ps.Streams.stdout.Content += Testers.ContainsExpression( + "Content-Range: bytes 0-4/11", "Expected Content-Range: bytes 0-4/11") + tr.StillRunningAfter = self.ts + + def test_noStore_noFill(self): + # Background fetch should NOT fill cache + tr = Test.AddTestRun("nostore cache miss") + ps = tr.Processes.Default + tr.MakeCurlCommand(self.curl_and_args + ' http://www.example.com/nostore -r 0-4', ts=self.ts) + ps.ReturnCode = 0 + ps.Streams.stdout.Content = Testers.ContainsExpression("X-Cache: miss", "expected cache miss") + ps.Streams.stdout.Content += Testers.ContainsExpression("200 OK", "Expected 200 status") + tr.StillRunningAfter = self.ts + + def test_nostore_cacheMiss(self): + # Cache hit miss because background fill was not triggered + tr = Test.AddTestRun("nostore cache hit-fresh") + ps = tr.Processes.Default + tr.MakeCurlCommand(self.curl_and_args + ' http://www.example.com/nostore -r 0-4', ts=self.ts) + ps.ReturnCode = 0 + ps.Streams.stdout.Content = Testers.ContainsExpression("X-Cache: miss", "expected cache miss") + ps.Streams.stdout.Content += Testers.ContainsExpression("200 OK", "Expected 200 status") + tr.StillRunningAfter = self.ts + + def setUpGlobalPlugin(self): + ## Check global plugin ## + self.ts.Disk.plugin_config.AddLine('cache_fill.so') + + self.ts.Disk.remap_config.AddLines( + ['map http://www.example.com/global http://127.0.0.1:{}/global'.format(self.server.Variables.Port)]) + + req = { + "headers": "GET /global HTTP/1.1\r\n" + "Host: www.example.com\r\n" + "\r\n", + "timestamp": "1469733493.993", + "body": "" + } + res = { + "headers": + "HTTP/1.1 200 OK\r\n" + "Cache-Control: max-age=1\r\n" + "Connection: close\r\n" + + 'Etag: 661091f3-45f3bc0e5d306\r\n', + "timestamp": "1469733493.993", + "body": "hello hello" + } + + self.server.addResponse("sessionlog.json", req, res) + + def test_global_cacheMiss(self): + # Global implementation: Cache miss; background fetch should fill cache + tr = Test.AddTestRun("Cache miss - global implementation") + + ps = tr.Processes.Default + tr.MakeCurlCommand(self.curl_and_args + ' http://www.example.com/global', ts=self.ts) + ps.ReturnCode = 0 + ps.Streams.stdout.Content = Testers.ContainsExpression("X-Cache: miss", "expected cache miss") + + ps.Streams.stdout.Content += Testers.ContainsExpression("200 OK", "Expected 200 status") + tr.StillRunningAfter = self.ts + + def test_global_cacheHit(self): + # Global implementation: Cache hit-fresh from background fill + tr = Test.AddTestRun("Cache hit-fresh - global implementation") + ps = tr.Processes.Default + tr.MakeCurlCommand(self.curl_and_args + ' http://www.example.com/global', ts=self.ts) + ps.ReturnCode = 0 + ps.Streams.stdout.Content = Testers.ContainsExpression("X-Cache: hit-fresh", "expected cache hit") + ps.Streams.stdout.Content += Testers.ContainsExpression("200 OK", "Expected 200 status") + tr.StillRunningAfter = self.ts + + def runTraffic(self): + self.test_cacheMiss() + self.test_cacheHit() + self.test_rangeReq_CacheMiss() + self.test_rangeReq_CacheHit() + self.test_noStore_noFill() + self.test_nostore_cacheMiss() + self.setUpGlobalPlugin() + self.test_global_cacheMiss() + self.test_global_cacheHit() + + def run(self): + self.runTraffic() + + +CacheFillTest().run()
