TS-4023 Adds a new cachekey plugin This plugin allows some common cache key manipulations based on various HTTP request elements. It can - sort query parameters so reordering can be a cache hit - ignore specific query parameters from the cache key by name or regular expression - ignore all query parameters from the cache key - only use specific query parameters in the cache key by name or regular expression - include headers or cookies by name - capture values from the User-Agent header. - classify request using User-Agent and a list of regular expressions
This closes #371 Project: http://git-wip-us.apache.org/repos/asf/trafficserver/repo Commit: http://git-wip-us.apache.org/repos/asf/trafficserver/commit/d2140cf0 Tree: http://git-wip-us.apache.org/repos/asf/trafficserver/tree/d2140cf0 Diff: http://git-wip-us.apache.org/repos/asf/trafficserver/diff/d2140cf0 Branch: refs/heads/master Commit: d2140cf0128c6f89ce843dbcf8816e979de7c8c7 Parents: 4c6f15e Author: Gancho Tenev <[email protected]> Authored: Fri Nov 13 14:03:17 2015 -0800 Committer: Leif Hedstrom <[email protected]> Committed: Tue Dec 15 16:41:05 2015 -0800 ---------------------------------------------------------------------- configure.ac | 1 + doc/admin-guide/plugins/cachekey.en.rst | 361 +++++++++++ doc/admin-guide/plugins/index.en.rst | 1 + plugins/experimental/Makefile.am | 1 + plugins/experimental/cachekey/Makefile.am | 25 + plugins/experimental/cachekey/README.md | 13 + plugins/experimental/cachekey/cachekey.cc | 519 +++++++++++++++ plugins/experimental/cachekey/cachekey.h | 81 +++ plugins/experimental/cachekey/common.cc | 38 ++ plugins/experimental/cachekey/common.h | 62 ++ plugins/experimental/cachekey/configs.cc | 428 +++++++++++++ plugins/experimental/cachekey/configs.h | 161 +++++ plugins/experimental/cachekey/pattern.cc | 505 +++++++++++++++ plugins/experimental/cachekey/pattern.h | 137 ++++ plugins/experimental/cachekey/plugin.cc | 130 ++++ .../experimental/cachekey/tests/pattern_test.cc | 66 ++ .../cachekey/tests/test_cachekey.py | 636 +++++++++++++++++++ 17 files changed, 3165 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/trafficserver/blob/d2140cf0/configure.ac ---------------------------------------------------------------------- diff --git a/configure.ac b/configure.ac index 22f8a59..6ad6d37 100644 --- a/configure.ac +++ b/configure.ac @@ -1908,6 +1908,7 @@ AS_IF([test "x$enable_experimental_plugins" = "xyes"], [ plugins/experimental/buffer_upload/Makefile plugins/experimental/cache_range_requests/Makefile plugins/experimental/channel_stats/Makefile + plugins/experimental/cachekey/Makefile plugins/experimental/cache_promote/Makefile plugins/experimental/collapsed_connection/Makefile plugins/experimental/custom_redirect/Makefile http://git-wip-us.apache.org/repos/asf/trafficserver/blob/d2140cf0/doc/admin-guide/plugins/cachekey.en.rst ---------------------------------------------------------------------- diff --git a/doc/admin-guide/plugins/cachekey.en.rst b/doc/admin-guide/plugins/cachekey.en.rst new file mode 100644 index 0000000..294348c --- /dev/null +++ b/doc/admin-guide/plugins/cachekey.en.rst @@ -0,0 +1,361 @@ +.. _cachekey-plugin: +.. Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. + + +Cache Key Manipulation (cachekey) +--------------------------------------- + +Description +=========== + +This plugin allows some common cache key manipulations based on various HTTP request components. It can + +* sort query parameters to prevent query parameters reordereding from being a cache miss +* ignore specific query parameters from the cache key by name or regular expression +* ignore all query parameters from the cache key +* only use specific query parameters in the cache key by name or regular expression +* include headers or cookies by name +* capture values from the ``User-Agent`` header. +* classify request using ``User-Agent`` and a list of regular expressions + +Plugin parameters +================= + +All parameters are optional, and if not used, their default values are as mentioned below. Boolean values default to ``false`` and the rest default to an empty list. Examples of each parameter's usage can be found below. + +* URI query parameters + * If no query related plugin parameters are used, the query is included as received from the UA in the cache key. + * ``--exclude-params`` (default: empty list) - comma-separated list of query params to be black-listed in the cache key. If the list is empty then no black-list is applied (no query parameters will be excluded from the cache key). The exclude list overrides the include list. + * ``--include-params`` (default: empty list) - comma-separated list of query params to be white-listed in the cache key. If the list is empty then no white-list is applied (all query parameters will be included in the cache key). + * ``--include-match-params`` (default: empty list) - regular expression matching query parameter names which will be white-listed in the cache key. + * ``--exclude-match-params`` (default: empty list) - regular expression matching query parameter names which will be black-listed in the cache key. + * ``--remove-all-params`` (boolean:``true|false``, ``0|1``, ``yes|no``, default: ``false``) - if equals ``true`` then all query parameters are removed (the whole query string) and all other URI query parameter related settings (if used) will have no effect. + * ``--sort-params`` (boolean:``true|false``, ``0|1``, ``yes|no``, default: ``false``) - if equals ``true`` then all query parameters are sorted in an increasing case-sensitive order +* HTTP headers + * ``--include-headers`` (default: empty list) - comma separated list of headers to be added to the cache key. +* HTTP cookies + * ``--include-cookies`` (default: empty list) - comma separated list of cookies to be added to the cache key. + +* Host name, port and custom prefix + * Host and port are added to the beginning of the cache key by default unless a custom preffix by using ``--static-prefix`` or ``--capture-prefix`` plugin parameters is specified. + * ``--static-prefix`` (default: empty string) - if specified and not an empty string the value will be added to the beginning of the cache key. + * ``--capture-prefix=<capture_definition>`` (default: empty string) - if specified and not an empty string will capture strings from ``host:port`` based on the ``<capture_definition>`` (see below) and add them to the beginning of the cache key. + * If ``--static-prefix`` and ``--capture-prefix`` are used together then the value of ``--static-prefix`` is added first to the cache key, followed by the ``--capture-prefix`` capturing/replacement results. + +* ``User-Agent`` classification + * ``--ua-whitelist=<classname>:<filename>`` (default: empty string) - loads a regex patterns list from a file ``<filename>``, the patterns are matched against the ``User-Agent`` header and if matched ``<classname>`` is added it to the key. + * ``--ua-blacklist=<classname>:<filename>`` (default: empty string) - loads a regex patterns list from a file ``<filename>``, the patterns are matched against the ``User-Agent`` header and if **not** matched ``<classname>`` is added it to the key. + +* ``User-Agent`` regex capturing and replacement + * ``--ua-capture=<capture_definition>`` (default: empty string) - if specified and not an empty string will capture strings from ``User-Agent`` header based on ``<capture_definition>`` (see below) and will add them to the cache key. + +* ``<capture_definition>`` can be in the following formats + * ``<regex>`` - ``<regex>`` defines regex capturing groups, up to 10 captured strings based on ``<regex>`` will be added to the cache key. + * ``/<regex>/<replacement>/`` - ``<regex>`` defines regex capturing groups, ``<replacement>`` defines a pattern where the captured strings referenced with ``$0`` ... ``$9`` will be substituted and the result will be added to the cache key. + +Cache Key Structure +=================== + +:: + + | hierarchical part query + HTTP request | ââââââââââââââââââââââââââââââââââ´ââââââââââââââââââââââââââââââââââââââââââââââââ´ââââââ + components | URI host and port HTTP headers and cookies URI path URI query + | ââââââââââ´âââââââââââââââââââââââââââ´âââââââââââââââââââââââââââââââââ´ââââââââââââ´ââââââ + Sample 1 | /www.example.com/80/popular/Mozilla/5.0/H1:v1/H2:v2/C1=v1;C2=v2/path/to/data?a=1&b=2&c=3 + Sample 2 | /nice_custom_prefix/popular/Mozilla/5.0/H1:v1/H2:v2/C1=v1;C2=v2/path/to/data?a=1&b=2&c=3 + | ââââââââââ¬ââââââââââââââ¬ââââââââââ¬âââââââââââ¬âââââââââââââ¬ââââââââââââ¬ââââââââââââ¬ââââââ + Cache Key | host:port or UA-class UA-captures headers cookies path query + components | custom prefix replacement + + +* With the current implementation the following cache key components are always present in the cache key: + * ``prefix or host:port`` - included at the beginning of the cache key. If neither ``--static-prefix`` nor ``--capture-prefix`` are specified or are empty strings then ``host:port`` from the request URI are used. + * ``path`` - URI path included **as is** (but can be empty) +* The rest of the cache key components are optional and their presence in the cache key depends on the plugin configuration and the HTTP requests handled by the plugin: + * ``UA-class`` - a single class name, result of UA classification defined by ``--ua-whitelist`` and ``--ua-blacklist`` parameters. + * ``UA-captures`` - a result of the regex capture (and possibly replacement) from the first ``User-Agent`` header. + * ``headers`` - always sorted list of headers defined by ``--include-headers`` + * ``cookies`` - always sorted list of headers defined by ``--include-cookies`` + * ``query`` - the request URI query **as is** or a list of query parameters proccessed by this plugin as configured. +* The following URI components are ignored (not included in the cache key): + * ``scheme:`` + * ``user:password@`` from the ``authority`` URI component + * ``#fragment`` + +The following is an example of how the above sample keys were generated (``Sample 1`` and ``Sample 2``). + +Traffic server configuration :: + + $ cat etc/trafficserver/remap.config + map http://www.example.com http://www.origin.com \ + @plugin=cachekey.so \ + @pparam=--ua-whitelist=popular:popular_agents.config \ + @pparam=--ua-capture=(Mozilla\/[^\s]*).* \ + @pparam=--include-headers=H1,H2 \ + @pparam=--include-cookies=C1,C2 \ + @pparam=--include-params=a,b,c \ + @pparam=--sort-params=true + + $ cat etc/trafficserver/popular_agents.config + ^Mozilla.* + ^Twitter.* + ^Facebo.* + + $ cat etc/trafficserver/plugin.config + xdebug.so + +HTTP request :: + + $ curl 'http://www.example.com/path/to/data?c=3&a=1&b=2&x=1&y=2&z=3' \ + -v -x 127.0.0.1:8080 -o /dev/null -s \ + -H "H1: v1" \ + -H "H2: v2" \ + -H "Cookie: C1=v1; C2=v2" \ + -H 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A' \ + -H 'X-Debug: X-Cache-Key' + * About to connect() to proxy 127.0.0.1 port 8080 (#0) + * Trying 127.0.0.1... connected + * Connected to 127.0.0.1 (127.0.0.1) port 8080 (#0) + > GET http://www.example.com/path/to/data?c=3&a=1&b=2&x=1&y=2&z=3 HTTP/1.1 + > Host: www.example.com + > Accept: */* + > Proxy-Connection: Keep-Alive + > H1: v1 + > H2: v2 + > Cookie: C1=v1; C2=v2 + > User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A + > X-Debug: X-Cache-Key + > + < HTTP/1.1 200 OK + < Server: ATS/6.1.0 + < Date: Thu, 19 Nov 2015 23:17:58 GMT + < Content-type: application/json + < Age: 0 + < Transfer-Encoding: chunked + < Proxy-Connection: keep-alive + < X-Cache-Key: /www.example.com/80/popular/Mozilla/5.0/H1:v1/H2:v2/C1=v1;C2=v2/path/to/data?a=1&b=2&c=3 + < + { [data not shown] + * Connection #0 to host 127.0.0.1 left intact + * Closing connection #0 + +The response header ``X-Cache-Key`` header contains the cache key: :: + + /www.example.com/80/popular/Mozilla/5.0/H1:v1/H2:v2/C1=v1;C2=v2/path/to/data?a=1&b=2&c=3 + +The ``xdebug.so`` plugin and ``X-Debug`` request header are used just to demonstrate basic cache key troubleshooting. + +If we add ``--static-prefix=nice_custom_prefix`` to the remap rule then the cache key would look like the following: :: + + /nice_custom_prefix/popular/Mozilla/5.0/H1:v1/H2:v2/C1=v1;C2=v2/path/to/data?a=1&b=2&c=3 + +Usage examples +============== + +URI query parameters +^^^^^^^^^^^^^^^^^^^^ + +Ignore the query string (all query parameters) +"""""""""""""""""""""""""""""""""""""""""""""" +The following added to the remap rule will ignore the query, removing it from the cache key. :: + + @plugin=cachekey.so @pparam=--remove-all-params=true + +Cache key normalization by sorting the query parameters +""""""""""""""""""""""""""""""""""""""""""""""""""""""" +The following will normalize the cache key by sorting the query parameters. :: + + @plugin=cachekey.so @pparam=--sort-params=true + +If the URI has the following query string ``c=1&a=1&b=2&x=1&k=1&u=1&y=1`` the cache key will use ``a=1&b=2&c=1&k=1&u=1&x=1&y=1`` + +Ignore (exclude) certain query parameters +""""""""""""""""""""""""""""""""""""""""" + +The following will make sure query parameters `a` and `b` will **not** be used when constructing the cache key. :: + + @plugin=cachekey.so @pparam=--exclude-params=a,b + +If the URI has the following query string ``c=1&a=1&b=2&x=1&k=1&u=1&y=1`` the cache key will use ``c=1&x=1&k=1&u=1&y=1`` + +Ignore (exclude) certain query parameters from the cache key by using regular expression (PCRE) +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +The following will make sure query parameters ``a`` and ``b`` will **not** be used when constructing the cache key. :: + + @plugin=cachekey.so @pparam=--exclude-match-params=(a|b) + +If the URI has the following query string ``c=1&a=1&b=2&x=1&k=1&u=1&y=1`` the cache key will use ``c=1&x=1&k=1&u=1&y=1`` + +Include only certain query parameters +""""""""""""""""""""""""""""""""""""" +The following will make sure only query parameters `a` and `c` will be used when constructing the cache key and the rest will be ignored. :: + + @plugin=cachekey.so @pparam=--include-params=a,c + +If the URI has the following query string ``c=1&a=1&b=2&x=1&k=1&u=1&y=1`` the cache key will use ``c=1&a=1`` + +Include only certain query parameters by using regular expression (PCRE) +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +The following will make sure only query parameters ``a`` and ``c`` will be used when constructing the cache key and the rest will be ignored. :: + + @plugin=cachekey.so @pparam=--include-match-params=(a|c) + +If the URI has the following query string ``c=1&a=1&b=2&x=1&k=1&u=1&y=1`` the cache key will use ``c=1&a=1`` + +White-list + black-list certain parameters using multiple parameters in the same remap rule. +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +If the plugin is used with the following plugin parameters in the remap rule: :: + + @plugin=cachekey.so \ + @pparam=--exclude-params=x \ + @pparam=--exclude-params=y \ + @pparam=--exclude-params=z \ + @pparam=--include-params=y,c \ + @pparam=--include-params=x,b + +and if the URI has the following query string ``c=1&a=1&b=2&x=1&k=1&u=1&y=1`` the cache key will use ``c=1&b=1`` + +White-list + black-list certain parameters using multiple parameters in the same remap rule and regular expressions (PCRE). +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +If the plugin is used with the following plugin parameters in the remap rule: :: + + @plugin=cachekey.so \ + @pparam=--exclude-match-params=x \ + @pparam=--exclude-match-params=y \ + @pparam=--exclude-match-params=z \ + @pparam=--include-match-params=(y|c) \ + @pparam=--include-match-params=(x|b) + +and if the URI has the following query string ``c=1&a=1&b=2&x=1&k=1&u=1&y=1`` the cache key will use ``c=1&b=1`` + +Mixing --include-params, --exclude-params, --include-match-param and --exclude-match-param +"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +If the plugin is used with the following plugin parameters in the remap rule: :: + + @plugin=cachekey.so \ + @pparam=--exclude-params=x \ + @pparam=--exclude-match-params=y \ + @pparam=--exclude-match-params=z \ + @pparam=--include-params=y,c \ + @pparam=--include-match-params=(x|b) + +and if the URI has the following query string ``c=1&a=1&b=2&x=1&k=1&u=1&y=1`` the cache key will use ``c=1&b=1`` + +HTTP Headers +^^^^^^^^^^^^ + +Include certain headers in the cache key +"""""""""""""""""""""""""""""""""""""""" +The following headers ``HeaderA`` and ``HeaderB`` will be used when constructing the cache key and the rest will be ignored. :: + + @plugin=cachekey.so @pparam=--include-headers=HeaderA,HeaderB + +HTTP Cookies +^^^^^^^^^^^^ + +Include certain cookies in the cache key +"""""""""""""""""""""""""""""""""""""""" + +The following headers ``CookieA`` and ``CookieB`` will be used when constructing the cache key and the rest will be ignored. :: + + @plugin=cachekey.so @pparam=--include-headers=CookieA,CookieB + + +Host name, port and static prefix +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Replacing host:port with a static cache key prefix +""""""""""""""""""""""""""""""""""""""""""""""""""" +If the plugin is used with the following plugin parameter in the remap rule. :: + + @plugin=cachekey.so @pparam=--static-prefix=static_prefix + +the cache key will be prefixed with ``/static_prefix`` instead of ``host:port`` when ``--static-prefix`` is not used. + +Capturing from the host:port and adding it to beginning of cache key prefix +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +If the plugin is used with the following plugin parameter in the remap rule. :: + + @plugin=cachekey.so @pparam=--capture-prefix=(test_prefix).*:([^\s\/$]*) + +the cache key will be prefixed with ``/test_prefix/80`` instead of ``test_prefix_371.example.com:80`` when ``--capture-prefix`` is not used. + +Combining --static-prefix and --capture-prefix +""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" +If the plugin is used with the following plugin parameter in the remap rule. :: + + @plugin=cachekey.so @pparam=--capture-prefix=(test_prefix).*:([^\s\/$]*) @pparam=--static-prefix=static_prefix + +the cache key will be prefixed with ``/static_prefix/test_prefix/80`` instead of ``test_prefix_371.example.com:80`` when neither ``--capture-prefix`` nor ``--static-prefix`` are used. + +User-Agent capturing, replacement and classification +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Let us say we have a request with ``User-Agent`` header: :: + + Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) + AppleWebKit/537.75.14 (KHTML, like Gecko) + Version/7.0.3 Safari/7046A194A + + +Capture PCRE groups from User-Agent header +"""""""""""""""""""""""""""""""""""""""""" +If the plugin is used with the following plugin parameter:: + + @plugin=cachekey.so \ + @pparam=--ua-capture=(Mozilla\/[^\s]*).*(AppleWebKit\/[^\s]*) + +then ``Mozilla/5.0`` and ``AppleWebKit/537.75.14`` will be used when constructing the key. + +Capture and replace groups from User-Agent header +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +If the plugin is used with the following plugin parameter:: + + @plugin=cachekey.so \ + @pparam=--ua-capture=/(Mozilla\/[^\s]*).*(AppleWebKit\/[^\s]*)/$1_$2/ + +then ``Mozilla/5.0_AppleWebKit/537.75.14`` will be used when constructing the key. + +User-Agent white-list classifier +"""""""""""""""""""""""""""""""" +If the plugin is used with the following plugin parameter:: + + @plugin=cachekey.so \ + @pparam=--ua-whitelist=browser:browser_agents.config + +and if ``browser_agents.config`` contains: :: + + ^Mozilla.* + ^Twitter.* + ^Facebo.* + +then ``browser`` will be used when constructing the key. + +User-Agent black-list classifier +"""""""""""""""""""""""""""""""" +If the plugin is used with the following plugin parameter:: + + @plugin=cachekey.so \ + @pparam=--ua-blacklist=browser:tool_agents.config + +and if ``tool_agents.config`` contains: :: + + ^PHP.* + ^Python.* + ^curl.* + +then ``browser`` will be used when constructing the key. http://git-wip-us.apache.org/repos/asf/trafficserver/blob/d2140cf0/doc/admin-guide/plugins/index.en.rst ---------------------------------------------------------------------- diff --git a/doc/admin-guide/plugins/index.en.rst b/doc/admin-guide/plugins/index.en.rst index 25dcadb..37845d6 100644 --- a/doc/admin-guide/plugins/index.en.rst +++ b/doc/admin-guide/plugins/index.en.rst @@ -75,6 +75,7 @@ directory of the Apache Traffic Server source tree. Experimental plugins can be Balancer: balances requests across multiple origin servers <balancer.en> Buffer Upload: buffers POST data before connecting to the Origin server <buffer_upload.en> Cache Promotion: provides additional control over when an object should be allowed into the cache <cache_promote.en> + Cachekey: allows some common cache key manipulations based on various HTTP request elements <cachekey.en> Combo Handler: provides an intelligent way to combine multiple URLs into a single URL, and have Apache Traffic Server combine the components into one response <combo_handler.en> ESI: implements the ESI specification <esi.en> Epic: emits Traffic Server metrics in a format that is consumed tby the Epic Network Monitoring System <epic.en> http://git-wip-us.apache.org/repos/asf/trafficserver/blob/d2140cf0/plugins/experimental/Makefile.am ---------------------------------------------------------------------- diff --git a/plugins/experimental/Makefile.am b/plugins/experimental/Makefile.am index cfef4a3..fba51b5 100644 --- a/plugins/experimental/Makefile.am +++ b/plugins/experimental/Makefile.am @@ -19,6 +19,7 @@ SUBDIRS = \ background_fetch \ balancer \ buffer_upload \ + cachekey \ cache_range_requests \ cache_promote \ collapsed_connection \ http://git-wip-us.apache.org/repos/asf/trafficserver/blob/d2140cf0/plugins/experimental/cachekey/Makefile.am ---------------------------------------------------------------------- diff --git a/plugins/experimental/cachekey/Makefile.am b/plugins/experimental/cachekey/Makefile.am new file mode 100644 index 0000000..efd4456 --- /dev/null +++ b/plugins/experimental/cachekey/Makefile.am @@ -0,0 +1,25 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +include $(top_srcdir)/build/plugins.mk + +pkglib_LTLIBRARIES = cachekey.la +cachekey_la_SOURCES = cachekey.cc common.cc configs.cc pattern.cc plugin.cc +cachekey_la_LDFLAGS = $(TS_PLUGIN_LDFLAGS) + +VIRTUALENV_DIR = ../../../ci/tsqa/virtualenv +tsqa: $(VIRTUALENV_DIR) + @. $(VIRTUALENV_DIR)/bin/activate && $(VIRTUALENV_DIR)/bin/nosetests --with-xunit -sv --logging-level=INFO http://git-wip-us.apache.org/repos/asf/trafficserver/blob/d2140cf0/plugins/experimental/cachekey/README.md ---------------------------------------------------------------------- diff --git a/plugins/experimental/cachekey/README.md b/plugins/experimental/cachekey/README.md new file mode 100644 index 0000000..23946c0 --- /dev/null +++ b/plugins/experimental/cachekey/README.md @@ -0,0 +1,13 @@ +# Description +This plugin allows some common cache key manipulations based on various HTTP request elements. It can + +* sort query parameters to prevent query parameters reordereding from being a cache miss +* ignore specific query parameters from the cache key by name or regular expression +* ignore all query parameters from the cache key +* only use specific query parameters in the cache key by name or regular expression +* include headers or cookies by name +* capture values from the `User-Agent` header. +* classify request using `User-Agent` and a list of regular expressions + +# Documentation +Details and examples can be found in [cachekey plugin documentation](../../../doc/admin-guide/plugins/cachekey.en.rst). http://git-wip-us.apache.org/repos/asf/trafficserver/blob/d2140cf0/plugins/experimental/cachekey/cachekey.cc ---------------------------------------------------------------------- diff --git a/plugins/experimental/cachekey/cachekey.cc b/plugins/experimental/cachekey/cachekey.cc new file mode 100644 index 0000000..c2246bd --- /dev/null +++ b/plugins/experimental/cachekey/cachekey.cc @@ -0,0 +1,519 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/** + * @file cachekey.cc + * @brief Cache key manipulation. + */ + +#include <string.h> /* strlen() */ +#include <sstream> /* istringstream */ +#include "cachekey.h" + +static void +append(String &target, unsigned n) +{ + char buf[sizeof("4294967295")]; + snprintf(buf, sizeof(buf), "%u", n); + target.append(buf); +} + +static void +appendEncoded(String &target, const char *s, size_t len) +{ + if (0 == len) { + return; + } + + char tmp[len * 2]; + size_t written; + + /* The default table does not encode the comma, so we need to use our own table here. */ + static const unsigned char map[32] = { + 0xFF, 0xFF, 0xFF, + 0xFF, // control + 0xB4, // space " # % + 0x08, // , + 0x00, // + 0x0A, // < > + 0x00, 0x00, // + 0x00, // + 0x1E, 0x80, // [ \ ] ^ ` + 0x00, 0x00, // + 0x1F, // { | } ~ DEL + 0x00, 0x00, 0x00, + 0x00, // all non-ascii characters unmodified + 0x00, 0x00, 0x00, + 0x00, // . + 0x00, 0x00, 0x00, + 0x00, // . + 0x00, 0x00, 0x00, + 0x00 // . + }; + + if (TSStringPercentEncode(s, len, tmp, sizeof(tmp), &written, map) == TS_SUCCESS) { + target.append(tmp, written); + } else { + /* If the encoding fails (pretty unlikely), then just append what we have. + * This is just a best-effort encoding anyway. */ + target.append(s, len); + } +} + +template <typename ContainerType, typename Iterator> +static String +containerToString(ContainerType &c, const String &sdelim, const String &delim) +{ + String result; + for (Iterator arg(c.begin()); arg != c.end(); ++arg) { + result.append(arg == c.begin() ? sdelim : delim); + result.append(*arg); + } + return result; +} + +static void +appendToContainer(StringSet &c, const String &s) +{ + c.insert(s); +} + +static void +appendToContainer(StringList &c, const String &s) +{ + c.push_back(s); +} + +template <typename T> +static String +getKeyQuery(const char *query, int length, const ConfigQuery &config) +{ + std::istringstream istr(String(query, length)); + String token; + T container; + + while (std::getline(istr, token, '&')) { + String::size_type pos(token.find_first_of("=")); + String param(token.substr(0, pos == String::npos ? token.size() : pos)); + + if (config.toBeAdded(param)) { + ::appendToContainer(container, token); + } + } + + return containerToString<T, typename T::const_iterator>(container, "?", "&"); +} + +static void +ltrim(String &target) +{ + String::size_type p(target.find_first_not_of(' ')); + + if (p != target.npos) { + target.erase(0, p); + } +} + +static TSMLoc +nextDuplicate(TSMBuffer buffer, TSMLoc hdr, TSMLoc field) +{ + TSMLoc next = TSMimeHdrFieldNextDup(buffer, hdr, field); + TSHandleMLocRelease(buffer, hdr, field); + return next; +} + +/** + * @brief Iterates through all User-Agent headers and fields and classifies them using provided classifier. + * @param c classifier + * @param buf marshal buffer from the request + * @param hdrs headers handle from the request + * @param classname reference to the string where the class name will be returned + */ +static bool +classifyUserAgent(const Classifier &c, TSMBuffer buf, TSMLoc hdrs, String &classname) +{ + TSMLoc field; + bool matched = false; + + field = TSMimeHdrFieldFind(buf, hdrs, TS_MIME_FIELD_USER_AGENT, TS_MIME_LEN_USER_AGENT); + while (field != TS_NULL_MLOC && !matched) { + const char *value; + int len; + int count = TSMimeHdrFieldValuesCount(buf, hdrs, field); + + for (int i = 0; i < count; ++i) { + value = TSMimeHdrFieldValueStringGet(buf, hdrs, field, i, &len); + const String val(value, len); + if (c.classify(val, classname)) { + matched = true; + break; + } + } + + field = ::nextDuplicate(buf, hdrs, field); + } + + TSHandleMLocRelease(buf, hdrs, field); + return matched; +} + +/** + * @brief Constructor setting up the cache key prefix, initializing request info. + * @param txn transaction handle. + * @param buf marshal buffer + * @param url URI handle + * @param hdrs headers handle + */ +CacheKey::CacheKey(TSHttpTxn txn, TSMBuffer buf, TSMLoc url, TSMLoc hdrs) : _txn(txn), _buf(buf), _url(url), _hdrs(hdrs) +{ + _key.reserve(512); +} + +/** + * @brief Append unsigned integer to the key. + * @param number unsigned integer + */ +void +CacheKey::append(unsigned n) +{ + _key.append("/"); + ::append(_key, n); +} + +/** + * @brief Append a string to the key. + * @param s string + */ +void +CacheKey::append(const String &s) +{ + _key.append("/"); + ::appendEncoded(_key, s.data(), s.size()); +} + +/** + * @brief Append null-terminated C-style string to the key. + * @param s null-terminated C-style string. + */ +void +CacheKey::append(const char *s) +{ + _key.append("/"); + ::appendEncoded(_key, s, strlen(s)); +} + +/** + * @brief Append first n characters from array if characters pointed by s. + * @param n number of characters + * @param s character array pointer + */ +void +CacheKey::append(const char *s, unsigned n) +{ + _key.append("/"); + ::appendEncoded(_key, s, n); +} + +/** + * @brief Append a custom prefix or the host:port part of the URI to the cache key. + * @note This is the only cache key component from the key which is always available. + * @param prefix if not empty string the method will append the static prefix to the cache key. + * @param pattern if not empty the method will append the result of regex capturing and/or replacement to the cache key. + * @note if both prefix and pattern are not empty prefix will be added first, followed by the results from pattern. + */ +void +CacheKey::appendPrefix(const String &prefix, Pattern &pattern) +{ + bool customPrefix = false; + + if (!prefix.empty()) { + customPrefix = true; + append(prefix); + CacheKeyDebug("added static prefix, key: '%s'", _key.c_str()); + } + + int len; + const char *ptr = TSUrlHostGet(_buf, _url, &len); + int port = TSUrlPortGet(_buf, _url); + + if (!pattern.empty()) { + customPrefix = true; + + String hostAndPort; + hostAndPort.append(ptr, len).append(":"); + ::append(hostAndPort, port); + + StringVector captures; + if (pattern.process(hostAndPort, captures)) { + for (StringVector::iterator it = captures.begin(); it != captures.end(); it++) { + append(*it); + } + CacheKeyDebug("added capture prefix, key: '%s'", _key.c_str()); + } + } + + if (!customPrefix) { + _key.append("/").append(ptr, len).append("/"); + ::append(_key, port); + CacheKeyDebug("added default prefix, key: '%s'", _key.c_str()); + } +} + +/** + * @brief Appends the path from the URI to the cache key. + * @note A path is always defined for a URI, though the defined path may be empty (zero length) (RFC 3986) + * @todo enhance, i.e. /<regex>/<replace>/ + */ +void +CacheKey::appendPath() +{ + const char *ptr; + int len; + + ptr = TSUrlPathGet(_buf, _url, &len); + if (NULL != ptr && 0 != len) { + _key.append("/"); + _key.append(ptr, len); + } +} + +/** + * @brief Append headers by following the rules specified in the header configuration object. + * @param config header-related configuration containing information about which headers need to be appended to the key. + * @note Add the headers to hier-part (RFC 3986), always sort them in the cache key. + */ +void +CacheKey::appendHeaders(const ConfigHeaders &config) +{ + if (config.toBeRemoved() || config.toBeSkipped()) { + // Don't add any headers to the cache key. + return; + } + + TSMLoc field; + StringSet hset; /* Sort and uniquify the header list in the cache key. */ + + /* Iterating header by header is not efficient according to comments inside traffic server API, + * Iterate over an 'include'-kind of list to avoid header by header iteration. + * @todo: revisit this when (if?) adding regex matching for headers. */ + for (StringSet::iterator it = config.getInclude().begin(); it != config.getInclude().end(); ++it) { + String name_s = *it; + + for (field = TSMimeHdrFieldFind(_buf, _hdrs, name_s.c_str(), name_s.size()); field != TS_NULL_MLOC; + field = ::nextDuplicate(_buf, _hdrs, field)) { + const char *value; + int vlen; + int count = TSMimeHdrFieldValuesCount(_buf, _hdrs, field); + + for (int i = 0; i < count; ++i) { + value = TSMimeHdrFieldValueStringGet(_buf, _hdrs, field, i, &vlen); + if (value == NULL || vlen == 0) { + CacheKeyDebug("missing value %d for header %s", i, name_s.c_str()); + continue; + } + + String value_s(value, vlen); + + if (config.toBeAdded(name_s)) { + String header; + header.append(name_s).append(":").append(value_s); + hset.insert(header); + CacheKeyDebug("adding header => '%s: %s'", name_s.c_str(), value_s.c_str()); + } + } + } + } + + /* It doesn't make sense to have the headers unordered in the cache key. */ + String headers_key = containerToString<StringSet, StringSet::const_iterator>(hset, "", "/"); + if (!headers_key.empty()) { + append(headers_key); + } +} + +/** + * @brief Append cookies by following the rules specified in the cookies config object. + * @param config cookies-related configuration containing information about which cookies need to be appended to the key. + * @note Add the cookies to "hier-part" (RFC 3986), always sort them in the cache key. + */ +void +CacheKey::appendCookies(const ConfigCookies &config) +{ + if (config.toBeRemoved() || config.toBeSkipped()) { + /* Don't append any cookies to the cache key. */ + return; + } + + TSMLoc field; + StringSet cset; /* sort and uniquify the cookies list in the cache key */ + + for (field = TSMimeHdrFieldFind(_buf, _hdrs, TS_MIME_FIELD_COOKIE, TS_MIME_LEN_COOKIE); field != TS_NULL_MLOC; + field = ::nextDuplicate(_buf, _hdrs, field)) { + int count = TSMimeHdrFieldValuesCount(_buf, _hdrs, field); + + for (int i = 0; i < count; ++i) { + const char *value; + int len; + + value = TSMimeHdrFieldValueStringGet(_buf, _hdrs, field, i, &len); + if (value == NULL || len == 0) { + continue; + } + + std::istringstream istr(String(value, len)); + String cookie; + + while (std::getline(istr, cookie, ';')) { + ::ltrim(cookie); // Trim leading spaces. + + String::size_type pos(cookie.find_first_of("=")); + String name(cookie.substr(0, pos == String::npos ? cookie.size() : pos)); + + /* We only add it to the cache key it is in the cookie set. */ + if (config.toBeAdded(name)) { + cset.insert(cookie); + } + } + } + } + + /* We are iterating over the cookies in client order, + * but the cache key needs a stable ordering, so we sort via std::set. */ + String cookies_keys = containerToString<StringSet, StringSet::const_iterator>(cset, "", ";"); + if (!cookies_keys.empty()) { + append(cookies_keys); + } +} + +/** + * @brief Append query parameters by following the rules specified in the query configuration object. + * @param config query configuration containing information about which query parameters need to be appended to the key. + * @note Keep the query parameters in the "query part" (RFC 3986). + */ +void +CacheKey::appendQuery(const ConfigQuery &config) +{ + /* No query parameters in the cache key? */ + if (config.toBeRemoved()) { + return; + } + + const char *query; + int length; + + query = TSUrlHttpQueryGet(_buf, _url, &length); + if (query == NULL || length == 0) { + return; + } + + /* If need to skip all other rules just append the whole query to the key. */ + if (config.toBeSkipped()) { + _key.append("?"); + _key.append(query, length); + return; + } + + /* Use the corresponding container based on whether we need + * to sort the parameters (set) or keep the order (list) */ + String keyQuery; + if (config.toBeSorted()) { + keyQuery = getKeyQuery<StringSet>(query, length, config); + } else { + keyQuery = getKeyQuery<StringList>(query, length, config); + } + + if (!keyQuery.empty()) { + _key.append(keyQuery); + } +} + +/** + * @brief Append User-Agent header captures specified in the Pattern configuration object. + * + * Apply given PCRE pattern/replacement to the first User-Agent value, and append any captured portions to cache key. + * @param config PCRE pattern which contains capture groups. + * @todo: TBD if ignoring the comma in the header as a field separator is generic enough. + * @note Add the UA captures to hier-part (RFC 3986) in the original order. + */ +void +CacheKey::appendUaCaptures(Pattern &config) +{ + if (config.empty()) { + return; + } + + TSMLoc field; + const char *value; + int len; + + field = TSMimeHdrFieldFind(_buf, _hdrs, TS_MIME_FIELD_USER_AGENT, TS_MIME_LEN_USER_AGENT); + if (field == TS_NULL_MLOC) { + CacheKeyDebug("missing %.*s header", TS_MIME_LEN_USER_AGENT, TS_MIME_FIELD_USER_AGENT); + return; + } + + /* Now, strictly speaking, the User-Agent header should not contain a comma, + * because that's really a field separator (RFC 2616). Unfortunately, the + * iOS apps will send an embedded comma and we have to deal with it as if + * it was a single header. */ + value = TSMimeHdrFieldValueStringGet(_buf, _hdrs, field, -1, &len); + if (value && len) { + String val(value, len); + StringVector captures; + + if (config.process(val, captures)) { + for (StringVector::iterator it = captures.begin(); it != captures.end(); it++) { + append(*it); + } + } + } + + TSHandleMLocRelease(_buf, _hdrs, field); +} + +/** + * @brief Append the class name based on the User-Agent classification using the provided classifier. + * @param classifier User-Agent header classifier which will return a single class name to be added to the key. + * @return true if classification successful, false if no match was found. + * @note Add the class to hier-part (RFC 3986). + */ +bool +CacheKey::appendUaClass(Classifier &classifier) +{ + String classname; + bool matched = ::classifyUserAgent(classifier, _buf, _hdrs, classname); + + if (matched) { + append(classname); + } else { + /* @todo: TBD do we need a default class name to be added to the key? */ + } + + return matched; +} + +/** + * @brief Update cache key. + * @return true if success, false if failed to set the cache key. + */ +bool +CacheKey::finalize() const +{ + CacheKeyDebug("finalizing cache key '%s'", _key.c_str()); + return TSCacheUrlSet(_txn, &(_key[0]), _key.size()) == TS_SUCCESS; +} http://git-wip-us.apache.org/repos/asf/trafficserver/blob/d2140cf0/plugins/experimental/cachekey/cachekey.h ---------------------------------------------------------------------- diff --git a/plugins/experimental/cachekey/cachekey.h b/plugins/experimental/cachekey/cachekey.h new file mode 100644 index 0000000..7e5ddb4 --- /dev/null +++ b/plugins/experimental/cachekey/cachekey.h @@ -0,0 +1,81 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/** + * @file cachekey.h + * @brief Cache key manipulation (header file). + */ + +#ifndef PLUGINS_EXPERIMENTAL_CACHEKEY_CACHEKEY_H_ +#define PLUGINS_EXPERIMENTAL_CACHEKEY_CACHEKEY_H_ + +#include "common.h" +#include "configs.h" + +/** + * @brief Cache key manipulation class. + * + * Initialize the cache key from the request URI. + * + * The cache key is to be a valid URI. Key structure documented in doc/cachekey.en.rst#cache-key-structure + * @note scheme, #fragment, user:password@ from URI authority component are currently ignored. + * The query parameters, headers and cookies are handled similarly in general, + * but there are some differences in the handling of the query and the rest of the elements: + * - headers and cookies are never included in the cache key by default, query is. + * - query manipulation is different (stripping off, sorting, exclusion of query parameters, etc). + * That is why seemed like a good idea to add headers, cookies, UA-captures, UA-classes + * to the "hier-part" and keep only the query parameters in the "query part" (RFC 3986). + * + * @todo Consider avoiding the ATS API multiple-lookups while handling headers and cookies. + * Currently ts/ts.h states that iterating through the headers one by one is not efficient + * but being able to iterate through all the headers once and figure out what to append to + * the cache key seems be more time efficient. + */ +class CacheKey +{ +public: + CacheKey(TSHttpTxn txn, TSMBuffer buf, TSMLoc url, TSMLoc hdrs); + + void append(unsigned number); + void append(const String &); + void append(const char *s); + void append(const char *n, unsigned s); + void appendPrefix(const String &prefix, Pattern &pattern); + void appendPath(); + void appendHeaders(const ConfigHeaders &config); + void appendQuery(const ConfigQuery &config); + void appendCookies(const ConfigCookies &config); + void appendUaCaptures(Pattern &config); + bool appendUaClass(Classifier &classifier); + bool finalize() const; + +private: + CacheKey(); // disallow + CacheKey(const CacheKey &); // disallow + CacheKey &operator=(const CacheKey &); // disallow + + /* Information from the request */ + TSHttpTxn _txn; /**< @brief transaction handle */ + TSMBuffer _buf; /**< @brief marshal buffer */ + TSMLoc _url; /**< @brief URI handle */ + TSMLoc _hdrs; /**< @brief headers handle */ + + String _key; /**< @brief cache key */ +}; + +#endif /* PLUGINS_EXPERIMENTAL_CACHEKEY_CACHEKEY_H_ */ http://git-wip-us.apache.org/repos/asf/trafficserver/blob/d2140cf0/plugins/experimental/cachekey/common.cc ---------------------------------------------------------------------- diff --git a/plugins/experimental/cachekey/common.cc b/plugins/experimental/cachekey/common.cc new file mode 100644 index 0000000..87397c8 --- /dev/null +++ b/plugins/experimental/cachekey/common.cc @@ -0,0 +1,38 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/** + * @file common.cc + * @brief Common declarations and definitions. + * @see common.h + */ + +#include "common.h" + +#ifdef CACHEKEY_UNIT_TEST + +void +PrintToStdErr(const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); +} + +#endif /* CACHEKEY_UNIT_TEST */ http://git-wip-us.apache.org/repos/asf/trafficserver/blob/d2140cf0/plugins/experimental/cachekey/common.h ---------------------------------------------------------------------- diff --git a/plugins/experimental/cachekey/common.h b/plugins/experimental/cachekey/common.h new file mode 100644 index 0000000..628b50f --- /dev/null +++ b/plugins/experimental/cachekey/common.h @@ -0,0 +1,62 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/** + * @file common.h + * @brief Common declarations and definitions (header file). + */ + +#ifndef PLUGINS_EXPERIMENTAL_CACHEKEY_COMMON_H_ +#define PLUGINS_EXPERIMENTAL_CACHEKEY_COMMON_H_ + +#define PLUGIN_NAME "cachekey" + +#include <string> +#include <set> +#include <list> +#include <vector> + +typedef std::string String; +typedef std::set<std::string> StringSet; +typedef std::list<std::string> StringList; +typedef std::vector<std::string> StringVector; + +#ifdef CACHEKEY_UNIT_TEST +#include <stdio.h> +#include <stdarg.h> + +#define CacheKeyDebug(fmt, ...) PrintToStdErr("(%s) %s:%d:%s() " fmt "\n", PLUGIN_NAME, __FILE__, __LINE__, __func__, ##__VA_ARGS__) +#define CacheKeyError(fmt, ...) PrintToStdErr("(%s) %s:%d:%s() " fmt "\n", PLUGIN_NAME, __FILE__, __LINE__, __func__, ##__VA_ARGS__) +void PrintToStdErr(const char *fmt, ...); + +#else /* CACHEKEY_UNIT_TEST */ +#include "ts/ts.h" + +#define CacheKeyDebug(fmt, ...) \ + do { \ + TSDebug(PLUGIN_NAME, "%s:%d:%s() " fmt, __FILE__, __LINE__, __func__, ##__VA_ARGS__); \ + } while (0) + +#define CacheKeyError(fmt, ...) \ + do { \ + TSError("(%s) " fmt, PLUGIN_NAME, ##__VA_ARGS__); \ + TSDebug(PLUGIN_NAME, "%s:%d:%s() " fmt, __FILE__, __LINE__, __func__, ##__VA_ARGS__); \ + } while (0) +#endif /* CACHEKEY_UNIT_TEST */ + +#endif /* PLUGINS_EXPERIMENTAL_CACHEKEY_COMMON_H_ */ http://git-wip-us.apache.org/repos/asf/trafficserver/blob/d2140cf0/plugins/experimental/cachekey/configs.cc ---------------------------------------------------------------------- diff --git a/plugins/experimental/cachekey/configs.cc b/plugins/experimental/cachekey/configs.cc new file mode 100644 index 0000000..f865234 --- /dev/null +++ b/plugins/experimental/cachekey/configs.cc @@ -0,0 +1,428 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/** + * @file configs.cc + * @brief Plugin configuration. + */ + +#include <fstream> /* std::ifstream */ +#include <sstream> /* std::istringstream */ +#include <getopt.h> /* getopt_long() */ +#include <strings.h> /* strncasecmp() */ + +#include "configs.h" + +template <typename ContainerType> +static void +commaSeparateString(ContainerType &c, const String &input) +{ + std::istringstream istr(input); + String token; + + while (std::getline(istr, token, ',')) { + c.insert(c.end(), token); + } +} + +static bool +isTrue(const char *arg) +{ + return (0 == strncasecmp("true", arg, 4) || 0 == strncasecmp("1", arg, 1) || 0 == strncasecmp("yes", arg, 3)); +} + +void +ConfigElements::setExclude(const char *arg) +{ + ::commaSeparateString<StringSet>(_exclude, arg); +} + +void +ConfigElements::setInclude(const char *arg) +{ + ::commaSeparateString<StringSet>(_include, arg); +} + +static void +setPattern(MultiPattern &multiPattern, const char *arg) +{ + Pattern *p = new Pattern(); + if (NULL != p && p->init(arg)) { + multiPattern.add(p); + } else { + delete p; + } +} + +void +ConfigElements::setExcludePatterns(const char *arg) +{ + setPattern(_excludePatterns, arg); +} + +void +ConfigElements::setIncludePatterns(const char *arg) +{ + setPattern(_includePatterns, arg); +} + +void +ConfigElements::setSort(const char *arg) +{ + _sort = ::isTrue(arg); +} + +void +ConfigElements::setRemove(const char *arg) +{ + _remove = ::isTrue(arg); +} + +bool +ConfigElements::toBeRemoved() const +{ + return _remove; +} + +bool +ConfigElements::toBeSkipped() const +{ + return _skip; +} + +bool +ConfigElements::toBeSorted() const +{ + return _sort; +} + +bool +ConfigElements::toBeAdded(const String &element) const +{ + /* Exclude the element if it is in the exclusion list. If the list is empty don't exclude anything. */ + bool exclude = (!_exclude.empty() && _exclude.find(element) != _exclude.end()) || + (!_excludePatterns.empty() && _excludePatterns.match(element)); + CacheKeyDebug("%s '%s' %s the 'exclude' rule", name().c_str(), element.c_str(), exclude ? "matches" : "does not match"); + + /* Include the element only if it is in the inclusion list. If the list is empty include everything. */ + bool include = + ((_include.empty() && _includePatterns.empty()) || _include.find(element) != _include.end()) || _includePatterns.match(element); + CacheKeyDebug("%s '%s' %s the 'include' rule", name().c_str(), element.c_str(), include ? "matches" : "do not match"); + + if (include && !exclude) { + CacheKeyDebug("%s '%s' should be added to cache key", name().c_str(), element.c_str()); + return true; + } + + CacheKeyDebug("%s '%s' should not be added to cache key", name().c_str(), element.c_str()); + return false; +} + +inline bool +ConfigElements::noIncludeExcludeRules() const +{ + return _exclude.empty() && _excludePatterns.empty() && _include.empty() && _includePatterns.empty(); +} + +/** + * @brief finalizes the query parameters related configuration. + * + * If we don't have any inclusions or exclusions and don't have to sort, we don't need to do anything + * with the query string. Include the whole original query in the cache key. + */ +bool +ConfigQuery::finalize() +{ + _skip = noIncludeExcludeRules() && !_sort; + return true; +} + +const String ConfigQuery::_NAME = "query parameter"; +inline const String & +ConfigQuery::name() const +{ + return _NAME; +} + +/** + * @briefs finalizes the headers related configuration. + * + * If the all include and exclude lists are empty, including patterns, then there is no headers to be included. + */ +bool +ConfigHeaders::finalize() +{ + _remove = noIncludeExcludeRules(); + return true; +} + +const String ConfigHeaders::_NAME = "header"; +inline const String & +ConfigHeaders::name() const +{ + return _NAME; +} + +/** + * @brief finalizes the cookies related configuration. + * + * If the all include and exclude lists are empty, including pattern, then there is no cookies to be included. + */ +bool +ConfigCookies::finalize() +{ + _remove = noIncludeExcludeRules(); + return true; +} + +const String ConfigCookies::_NAME = "cookie"; +inline const String & +ConfigCookies::name() const +{ + return _NAME; +} + +/** + * @brief Accessor method for getting include list only for headers config. + * + * We would not need to drill this hole in the design if there was an efficient way to iterate through the headers in the traffic + * server API (inefficiency mentioned in ts/ts.h), iterating through the "include" list should be good enough work-around. + */ +const StringSet & +ConfigHeaders::getInclude() const +{ + return _include; +} + +/** + * @brief Rebase a relative path onto the configuration directory. + */ +static String +makeConfigPath(const String &path) +{ + if (path.empty() || path[0] == '/') { + return path; + } + + return String(TSConfigDirGet()) + "/" + path; +} + +/** + * @brief a helper function which loads the classifier from files. + * @param args classname + filename in '<classname>:<filename>' format. + * @param blacklist true - load as a blacklist classifier, false - whitelist. + * @return true if successful, false otherwise. + */ +bool +Configs::loadClassifiers(const String &args, bool blacklist) +{ + static const char *EXPECTED_FORMAT = "<classname>:<filename>"; + + std::size_t d = args.find(':'); + if (String::npos == d) { + CacheKeyError("failed to parse classifier string '%s', expected format: '%s'", optarg ? optarg : "null", EXPECTED_FORMAT); + return false; + } + + String classname(optarg, 0, d); + String filename(optarg, d + 1, String::npos); + + if (classname.empty() || filename.empty()) { + CacheKeyError("'<classname>' and '<filename>' in '%s' cannot be empty, expected format: '%s'", optarg ? optarg : "null", + EXPECTED_FORMAT); + return false; + } + + String path(makeConfigPath(filename)); + + std::ifstream ifstr; + String regex; + unsigned lineno = 0; + + ifstr.open(path.c_str()); + if (!ifstr) { + CacheKeyError("failed to load classifier '%s' from '%s'", classname.c_str(), path.c_str()); + return false; + } + + MultiPattern *multiPattern; + if (blacklist) { + multiPattern = new NonMatchingMultiPattern(classname); + } else { + multiPattern = new MultiPattern(classname); + } + if (NULL == multiPattern) { + CacheKeyError("failed to allocate classifier '%s'", classname.c_str()); + return false; + } + + CacheKeyDebug("loading classifier '%s' from '%s'", classname.c_str(), path.c_str()); + + while (std::getline(ifstr, regex)) { + Pattern *p; + String::size_type pos; + + ++lineno; + + // Allow #-prefixed comments. + pos = regex.find_first_of('#'); + if (pos != String::npos) { + regex.resize(pos); + } + + if (regex.empty()) { + continue; + } + + p = new Pattern(); + + if (NULL != p && p->init(regex)) { + if (blacklist) { + CacheKeyDebug("Added pattern '%s' to black list '%s'", regex.c_str(), classname.c_str()); + multiPattern->add(p); + } else { + CacheKeyDebug("Added pattern '%s' to white list '%s'", regex.c_str(), classname.c_str()); + multiPattern->add(p); + } + } else { + CacheKeyError("%s:%u: failed to parse regex '%s'", path.c_str(), lineno, regex.c_str()); + delete p; + } + } + + ifstr.close(); + + if (!multiPattern->empty()) { + _classifier.add(multiPattern); + } else { + delete multiPattern; + } + + return true; +} + +/** + * @brief initializes plugin configuration. + * @param argc number of plugin parameters + * @param argv plugin parameters + */ +bool +Configs::init(int argc, char *argv[]) +{ + static const struct option longopt[] = {{"exclude-params", optional_argument, 0, 'a'}, + {"include-params", optional_argument, 0, 'b'}, + {"include-match-params", optional_argument, 0, 'c'}, + {"exclude-match-params", optional_argument, 0, 'd'}, + {"sort-params", optional_argument, 0, 'e'}, + {"remove-all-params", optional_argument, 0, 'f'}, + {"include-headers", optional_argument, 0, 'g'}, + {"include-cookies", optional_argument, 0, 'h'}, + {"ua-capture", optional_argument, 0, 'i'}, + {"static-prefix", optional_argument, 0, 'j'}, + {"capture-prefix", optional_argument, 0, 'k'}, + {"ua-whitelist", optional_argument, 0, 'l'}, + {"ua-blacklist", optional_argument, 0, 'm'}, + {0, 0, 0, 0}}; + + bool status = true; + optind = 0; + + /* argv contains the "to" and "from" URLs. Skip the first so that the second one poses as the program name. */ + argc--; + argv++; + + for (;;) { + int opt; + opt = getopt_long(argc, (char *const *)argv, "", longopt, NULL); + + if (opt == -1) { + break; + } + CacheKeyDebug("processing %s", argv[optind - 1]); + + switch (opt) { + case 'a': /* exclude-params */ + _query.setExclude(optarg); + break; + case 'b': /* include-params */ + _query.setInclude(optarg); + break; + case 'c': /* include-match-params */ + _query.setIncludePatterns(optarg); + break; + case 'd': /* exclude-match-params */ + _query.setExcludePatterns(optarg); + break; + case 'e': /* sort-params */ + _query.setSort(optarg); + break; + case 'f': /* remove-all-params */ + _query.setRemove(optarg); + break; + case 'g': /* include-headers */ + _headers.setInclude(optarg); + break; + case 'h': /* include-cookies */ + _cookies.setInclude(optarg); + break; + case 'i': /* ua-capture */ + if (!_uaCapture.init(optarg)) { + CacheKeyError("failed to initialize User-Agent capture pattern '%s'", optarg); + status = false; + } + break; + case 'j': /* static-prefix */ + _prefix.assign(optarg); + CacheKeyDebug("prefix='%s'", _prefix.c_str()); + break; + case 'k': /* capture-prefix */ + if (!_hostCapture.init(optarg)) { + CacheKeyError("failed to initialize URI host:port capture pattern '%s'", optarg); + status = false; + } + break; + case 'l': /* ua-whitelist */ + if (!loadClassifiers(optarg, /* blacklist = */ false)) { + CacheKeyError("failed to load User-Agent pattern white-list '%s'", optarg); + status = false; + } + break; + case 'm': /* ua-blacklist */ + if (!loadClassifiers(optarg, /* blacklist = */ true)) { + CacheKeyError("failed to load User-Agent pattern black-list '%s'", optarg); + status = false; + } + break; + } + } + + status &= finalize(); + + return status; +} + +/** + * @brief provides means for post-processing of the plugin parameters to finalize the configuration or to "cache" some of the + * decisions for later use. + * @return true if successful, false if failure. + */ +bool +Configs::finalize() +{ + return _query.finalize() && _headers.finalize() && _cookies.finalize(); +} http://git-wip-us.apache.org/repos/asf/trafficserver/blob/d2140cf0/plugins/experimental/cachekey/configs.h ---------------------------------------------------------------------- diff --git a/plugins/experimental/cachekey/configs.h b/plugins/experimental/cachekey/configs.h new file mode 100644 index 0000000..c9e2080 --- /dev/null +++ b/plugins/experimental/cachekey/configs.h @@ -0,0 +1,161 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/** + * @file configs.h + * @brief Plugin configuration (header file). + */ + +#ifndef PLUGINS_EXPERIMENTAL_CACHEKEY_CONFIGS_H_ +#define PLUGINS_EXPERIMENTAL_CACHEKEY_CONFIGS_H_ + +#include "pattern.h" +#include "common.h" + +/** + * @brief Plug-in configuration elements (query / headers / cookies). + * + * Query parameters, cookies and headers can be handle in a similar way, through a similar set of rules (methods and properties). + */ +class ConfigElements +{ +public: + ConfigElements() : _sort(false), _remove(false), _skip(false) {} + virtual ~ConfigElements() {} + + void setExclude(const char *arg); + void setInclude(const char *arg); + void setExcludePatterns(const char *arg); + void setIncludePatterns(const char *arg); + void setRemove(const char *arg); + void setSort(const char *arg); + + /** @brief shows if the elements are to be sorted in the result */ + bool toBeSorted() const; + /** @brief shows if the elements are to be removed from the result */ + bool toBeRemoved() const; + /** @brief shows if the processing of elements is to be skipped */ + bool toBeSkipped() const; + /** @brief shows if the element is to be included in the result */ + bool toBeAdded(const String &element) const; + /** @brief returns the configuration element name for debug logging */ + virtual const String &name() const = 0; + + /** + * @brief provides means for post-processing of the configuration after all of parameters are available. + * @return true if successful, false if failure. + */ + virtual bool finalize() = 0; + +protected: + bool noIncludeExcludeRules() const; + + StringSet _exclude; + StringSet _include; + + MultiPattern _includePatterns; + MultiPattern _excludePatterns; + + bool _sort; + bool _remove; + bool _skip; +}; + +/** + * @brief Query configuration class. + */ +class ConfigQuery : public ConfigElements +{ +public: + bool finalize(); + +private: + const String &name() const; + static const String _NAME; +}; + +/** + * @brief Headers configuration class. + */ +class ConfigHeaders : public ConfigElements +{ +public: + bool finalize(); + + const StringSet &getInclude() const; + +private: + const String &name() const; + static const String _NAME; +}; + +/** + * @brief Cookies configuration class. + */ +class ConfigCookies : public ConfigElements +{ +public: + bool finalize(); + +private: + const String &name() const; + static const String _NAME; +}; + +/** + * @brief Class holding all configurable rules on how the cache key need to be constructed. + */ +class Configs +{ +public: + Configs() {} + + /** + * @brief initializes plugin configuration. + * @param argc number of plugin parameters + * @param argv plugin parameters + */ + bool init(int argc, char *argv[]); + + /** + * @brief provides means for post-processing of the plugin parameters to finalize the configuration or to "cache" some of the + * decisions for later use. + * @return true if succesful, false if failure. + */ + bool finalize(); + + /* Make the following members public to avoid unnecessary accessors */ + ConfigQuery _query; /**< @brief query parameter related configuration */ + ConfigHeaders _headers; /**< @brief headers related configuration */ + ConfigCookies _cookies; /**< @brief cookies related configuration */ + Pattern _uaCapture; /**< @brief the capture groups and the replacement string used for the User-Agent header capture */ + String _prefix; /**< @brief cache key prefix string */ + Pattern _hostCapture; /**< @brief cache key prefix captured from the URI host:port */ + Classifier _classifier; /**< @brief blacklist and white-list classifier used to classify User-Agent header */ + +private: + /** + * @brief a helper function which loads the classifier from files. + * @param args classname + filename in '<classname>:<filename>' format. + * @param blacklist true - load as a blacklist classifier, false - white-list. + * @return true if successful, false otherwise. + */ + bool loadClassifiers(const String &args, bool blacklist = true); +}; + +#endif // PLUGINS_EXPERIMENTAL_CACHEKEY_CONFIGS_H_ http://git-wip-us.apache.org/repos/asf/trafficserver/blob/d2140cf0/plugins/experimental/cachekey/pattern.cc ---------------------------------------------------------------------- diff --git a/plugins/experimental/cachekey/pattern.cc b/plugins/experimental/cachekey/pattern.cc new file mode 100644 index 0000000..4d884e6 --- /dev/null +++ b/plugins/experimental/cachekey/pattern.cc @@ -0,0 +1,505 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/** + * @file pattern.cc + * @brief PRCE related classes. + * @see pattern.h + */ + +#include "pattern.h" + +static void +replaceString(String &str, const String &from, const String &to) +{ + if (from.empty()) { + return; + } + + String::size_type start_pos = 0; + while ((start_pos = str.find(from, start_pos)) != String::npos) { + str.replace(start_pos, from.length(), to); + start_pos += to.length(); + } +} + +Pattern::Pattern() : _re(NULL), _extra(NULL), _pattern(""), _replacement(""), _tokenCount(0), _matchCount(0) +{ +} + +/** + * @brief Initializes PCRE pattern by providing the subject and replacement strings. + * @param pattern PCRE pattern, a string containing PCRE patterns, capturing groups. + * @param replacement PCRE replacement, a string where $0 ... $9 will be replaced with the corresponding capturing groups + * @return true if successful, false if failure + */ +bool +Pattern::init(const String &pattern, const String &replacenemt) +{ + pcreFree(); + + _pattern.assign(pattern); + _replacement.assign(replacenemt); + + _tokenCount = 0; + _matchCount = 0; + + if (!compile()) { + CacheKeyDebug("failed to initialize pattern:'%s', replacement:'%s'", pattern.c_str(), replacenemt.c_str()); + pcreFree(); + return false; + } + + return true; +} + +/** + * @brief Initializes PCRE pattern by providing the pattern only or pattern+replacement in a single configuration string. + * @see init() + * @param config PCRE pattern <pattern> or PCRE pattern + replacement in format /<pattern>/<replacement>/ + * @return true if successful, false if failure + */ +bool +Pattern::init(const String &config) +{ + if (config[0] == '/') { + /* This is a config in format /regex/replacement/ */ + String pattern; + String replacement; + + size_t start = 1; + size_t current = 0; + size_t next = 1; + do { + current = next + 1; + next = config.find_first_of("/", current); + } while (next != String::npos && '\\' == config[next - 1]); + + if (next != String::npos) { + pattern = config.substr(start, next - start); + } else { + /* Error, no closing '/' */ + CacheKeyError("failed to parse the pattern in '%s'", config.c_str()); + return false; + } + + start = next + 1; + do { + current = next + 1; + next = config.find_first_of("/", current); + } while (next != String::npos && '\\' == config[next - 1]); + + if (next != String::npos) { + replacement = config.substr(start, next - start); + } else { + /* Error, no closing '/' */ + CacheKeyError("failed to parse the replacement in '%s'", config.c_str()); + return false; + } + + // Remove '\' which escaped '/' inside the pattern and replacement strings. + ::replaceString(pattern, "\\/", "/"); + ::replaceString(replacement, "\\/", "/"); + + return this->init(pattern, replacement); + } else { + return this->init(config, ""); + } + + /* Should never get here. */ + return false; +} + +/** + * @brief Checks if the pattern object was initialized with a meaningful regex pattern. + * @return true if initialized, false if not. + */ +bool +Pattern::empty() const +{ + return _pattern.empty() || NULL == _re; +} + +/** + * @brief Frees PCRE library related resources. + */ +void +Pattern::pcreFree() +{ + if (_re) { + pcre_free(_re); + _re = NULL; + } + + if (_extra) { + pcre_free(_extra); + _extra = NULL; + } +} + +/** + * @bried Destructor, frees PCRE related resources. + */ +Pattern::~Pattern() +{ + pcreFree(); +} + +/** + * @brief Capture or capture-and-replace depending on whether a replacement string is specified. + * @see replace() + * @see capture() + * @param subject PCRE subject string + * @param result vector of strings where the result of captures or the replacements will be returned. + * @return true if there was a match and capture or replacement succeeded, false if failure. + */ +bool +Pattern::process(const String &subject, StringVector &result) +{ + if (!_replacement.empty()) { + /* Replacement pattern was provided in the configuration - capture and replace. */ + String element; + if (replace(subject, element)) { + result.push_back(element); + } else { + return false; + } + } else { + /* Replacement was not provided so return all capturing groups except the group zero. */ + StringVector captures; + if (capture(subject, captures)) { + if (captures.size() == 1) { + result.push_back(captures[0]); + } else { + StringVector::iterator it = captures.begin() + 1; + for (; it != captures.end(); it++) { + result.push_back(*it); + } + } + } else { + return false; + } + } + + return true; +} + +/** + * @brief Simple failure handling routine. + * @param PCRE subject string. + * @return true - failed, false - no failure. + */ +bool +Pattern::failed(const String &subject) const +{ + if (_matchCount < 0) { + switch (_matchCount) { + case PCRE_ERROR_NOMATCH: + CacheKeyDebug("%s does not match %s", _pattern.c_str(), subject.c_str()); + break; + default: + CacheKeyError("matching error %d", _matchCount); + break; + } + + return true; + } + + return false; +} + +/** + * @brief PCRE matches a subject string against the the regex pattern. + * @param subject PCRE subject + * @return true - matched, false - did not. + */ +bool +Pattern::match(const String &subject) +{ + CacheKeyDebug("matching '%s' to '%s'", _pattern.c_str(), subject.c_str()); + + if (!_re) { + return false; + } + + _matchCount = pcre_exec(_re, _extra, subject.c_str(), subject.length(), 0, PCRE_NOTEMPTY, NULL, 0); + if (failed(subject)) { + return false; + } + + return true; +} + +/** + * @brief Return all PCRE capture groups that matched in the subject string + * @param subject PCRE subject string + * @param result reference to vector of strings containing all capture groups + */ +bool +Pattern::capture(const String &subject, StringVector &result) +{ + CacheKeyDebug("matching '%s' to '%s'", _pattern.c_str(), subject.c_str()); + + if (!_re) { + return false; + } + + _matchCount = pcre_exec(_re, NULL, subject.c_str(), subject.length(), 0, PCRE_NOTEMPTY, _ovector, OVECOUNT); + if (failed(subject)) { + return false; + } + + for (int i = 0; i < _matchCount; i++) { + int start = _ovector[2 * i]; + int length = _ovector[2 * i + 1] - _ovector[2 * i]; + + String dst(subject, start, length); + + CacheKeyDebug("capturing '%s' %d[%d,%d]", dst.c_str(), i, _ovector[2 * i], _ovector[2 * i + 1]); + result.push_back(dst); + } + + return true; +} + +/** + * @brief Replaces all replacements found in the replacement string with what matched in the PCRE capturing groups. + * @param subject PCRE subject string + * @param result reference to A string where the result of the replacement will be stored + * @return true - success, false - nothing matched or failure. + */ +bool +Pattern::replace(const String &subject, String &result) +{ + CacheKeyDebug("matching '%s' to '%s'", _pattern.c_str(), subject.c_str()); + + if (!_re) { + return false; + } + + _matchCount = pcre_exec(_re, NULL, subject.c_str(), subject.length(), 0, PCRE_NOTEMPTY, _ovector, OVECOUNT); + if (failed(subject)) { + return false; + } + + /* Verify the replacement has the right number of matching groups */ + for (int i = 0; i < _tokenCount; i++) { + if (_tokens[i] >= _matchCount) { + CacheKeyError("invalid reference in replacement string: $%d", _tokens[i]); + return false; + } + } + + int previous = 0; + for (int i = 0; i < _tokenCount; i++) { + int replIndex = _tokens[i]; + int start = _ovector[2 * replIndex]; + int length = _ovector[2 * replIndex + 1] - _ovector[2 * replIndex]; + + String src(_replacement, _tokenOffset[i], 2); + String dst(subject, start, length); + + CacheKeyDebug("replacing '%s' with '%s'", src.c_str(), dst.c_str()); + + result.append(_replacement, previous, _tokenOffset[i] - previous); + result.append(dst); + + previous = _tokenOffset[i] + 2; /* 2 is the size of $0 or $1 or $2, ... or $9 */ + } + + result.append(_replacement, previous, _replacement.length() - previous); + + CacheKeyDebug("replacing '%s' resulted in '%s'", _replacement.c_str(), result.c_str()); + + return true; +} + +/** + * @brief PCRE compiles the regex, called only during initialization. + * @return true if successful, false if not. + */ +bool +Pattern::compile() +{ + const char *errPtr; /* PCRE error */ + int errOffset; /* PCRE error offset */ + + CacheKeyDebug("compiling pattern:'%s', replacement:'%s'", _pattern.c_str(), _replacement.c_str()); + + _re = pcre_compile(_pattern.c_str(), /* the pattern */ + 0, /* options */ + &errPtr, /* for error message */ + &errOffset, /* for error offset */ + NULL); /* use default character tables */ + + if (NULL == _re) { + CacheKeyError("compile of regex '%s' at char %d: %s", _pattern.c_str(), errOffset, errPtr); + + return false; + } + + _extra = pcre_study(_re, 0, &errPtr); + + if ((NULL == _extra) && (NULL != errPtr) && (0 != *errPtr)) { + CacheKeyError("failed to study regex '%s': %s", _pattern.c_str(), errPtr); + + pcre_free(_re); + _re = NULL; + return false; + } + + if (_replacement.empty()) { + /* No replacement necessary - we are done. */ + return true; + } + + _tokenCount = 0; + bool success = true; + + for (unsigned i = 0; i < _replacement.length(); i++) { + if (_replacement[i] == '$') { + if (_tokenCount >= TOKENCOUNT) { + CacheKeyError("too many tokens in replacement string: %s", _replacement.c_str()); + + success = false; + break; + } else if (_replacement[i + 1] < '0' || _replacement[i + 1] > '9') { + CacheKeyError("invalid replacement token $%c in %s: should be $0 - $9", _replacement[i + 1], _replacement.c_str()); + + success = false; + break; + } else { + /* Store the location of the replacement */ + /* Convert '0' to 0 */ + _tokens[_tokenCount] = _replacement[i + 1] - '0'; + _tokenOffset[_tokenCount] = i; + _tokenCount++; + /* Skip the next char */ + i++; + } + } + } + + if (!success) { + pcreFree(); + } + + return success; +} + +/** + * @brief Destructor, deletes all patterns. + */ +MultiPattern::~MultiPattern() +{ + for (std::vector<Pattern *>::iterator p = this->_list.begin(); p != this->_list.end(); ++p) { + delete (*p); + } +} + +/** + * @brief Check if empty. + * @return true if the classification contains any patterns, false otherwise + */ +bool +MultiPattern::empty() const +{ + return _list.empty(); +} + +/** + * @brief Adds a pattern to the multi-pattern + * + * The order of addition matters during the classification + * @param pattern pattern pointer + */ +void +MultiPattern::add(Pattern *pattern) +{ + this->_list.push_back(pattern); +} + +/** + * @brief Matches the subject string against all patterns. + * @param subject subject string. + * @return true if any matches, false if nothing matches. + */ +bool +MultiPattern::match(const String &subject) const +{ + for (std::vector<Pattern *>::const_iterator p = this->_list.begin(); p != this->_list.end(); ++p) { + if (NULL != (*p) && (*p)->match(subject)) { + return true; + } + } + return false; +} + +/** + * @brief Returns the name of the multi-pattern (set during the instantiation only). + */ +const String & +MultiPattern::name() const +{ + return _name; +} + +/** + * @brief Destructor, deletes all multi-patterns. + */ +Classifier::~Classifier() +{ + for (std::vector<MultiPattern *>::iterator p = _list.begin(); p != _list.end(); ++p) { + delete (*p); + } +} + +/** + * @brief Classifies a subject string by matching against the vector of named multi-patterns + * in the order they were added and returns the first matching multi-pattern name. + * @param subject string subject being classified. + * @param name reference to a string where the name of the class that matched first will be stored. + * @return true if something matched, false otherwise. + */ +bool +Classifier::classify(const String &subject, String &name) const +{ + bool matched = false; + for (std::vector<MultiPattern *>::const_iterator p = _list.begin(); p != _list.end(); ++p) { + if ((*p)->empty()) { + continue; + } else if ((*p)->match(subject)) { + name = (*p)->name(); + matched = true; + break; + } + } + return matched; +} + +/** + * @brief Adds a multi-pattern to the classifier. + * + * The order of addition matters during the classification + * @param pattern multi-pattern pointer + */ +void +Classifier::add(MultiPattern *pattern) +{ + _list.push_back(pattern); +} http://git-wip-us.apache.org/repos/asf/trafficserver/blob/d2140cf0/plugins/experimental/cachekey/pattern.h ---------------------------------------------------------------------- diff --git a/plugins/experimental/cachekey/pattern.h b/plugins/experimental/cachekey/pattern.h new file mode 100644 index 0000000..69df978 --- /dev/null +++ b/plugins/experimental/cachekey/pattern.h @@ -0,0 +1,137 @@ +/* + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +/** + * @file pattern.h + * @brief PRCE related classes (header file). + */ + +#ifndef PLUGINS_EXPERIMENTAL_CACHEKEY_PATTERN_H_ +#define PLUGINS_EXPERIMENTAL_CACHEKEY_PATTERN_H_ + +#include <pcre.h> /* pcre, pcre_extra, pcre_exec */ +#include "common.h" + +/** + * @brief PCRE matching, capturing and replacing + */ +class Pattern +{ +public: + static const int TOKENCOUNT = 10; /**< @brief Capturing groups $0..$9 */ + static const int OVECOUNT = TOKENCOUNT * 3; /**< @brief pcre_exec() array count, handle 10 capture groups */ + + Pattern(); + virtual ~Pattern(); + + bool init(const String &pattern, const String &replacenemt); + bool init(const String &config); + bool empty() const; + bool match(const String &subject); + bool capture(const String &subject, StringVector &result); + bool replace(const String &subject, String &result); + bool process(const String &subject, StringVector &result); + +private: + bool compile(); + bool failed(const String &subject) const; + void pcreFree(); + + pcre *_re; /**< @brief PCRE compiled info structure, computed during initialization */ + pcre_extra *_extra; /**< @brief PCRE study data block, computed during initialization */ + + String _pattern; /**< @brief PCRE pattern string, containing PCRE patterns and capturing groups. */ + String _replacement; /**< @brief PCRE replacement string, containing $0..$9 to be replaced with content of the capturing groups */ + + int _tokenCount; /**< @brief number of replacements $0..$9 found in the replacement string if not empty */ + int _tokens[TOKENCOUNT]; /**< @brief replacement index 0..9, since they can be used in the replacement string in any order */ + int _tokenOffset[TOKENCOUNT]; /**< @brief replacement offset inside the replacement string */ + + int _matchCount; /**< @brief match count */ + int _ovector[OVECOUNT]; /**< @brief vector used by the pcre_exec() */ +}; + +/** + * @brief Named list of regular expressions. + */ +class MultiPattern +{ +public: + MultiPattern(const String name = "") : _name(name) {} + virtual ~MultiPattern(); + + bool empty() const; + void add(Pattern *pattern); + virtual bool match(const String &subject) const; + const String &name() const; + +protected: + std::vector<Pattern *> _list; /**< @brief vector which dictates the order of the pattern evaluation. */ + String _name; /**< @brief multi-pattern name */ + +private: + MultiPattern(const MultiPattern &); // disallow + MultiPattern &operator=(const MultiPattern &); // disallow +}; + +/** + * @brief Named list of non-matching regular expressions. + */ +class NonMatchingMultiPattern : public MultiPattern +{ +public: + NonMatchingMultiPattern(const String &name) { _name = name; } + + /* + * @brief Matches the subject string against all patterns. + * @param subject subject string + * @return return false if any of the patterns matches, true otherwise. + */ + virtual bool + match(const String &subject) const + { + return !MultiPattern::match(subject); + } + +private: + NonMatchingMultiPattern(); // disallow + NonMatchingMultiPattern(const NonMatchingMultiPattern &); // disallow + NonMatchingMultiPattern &operator=(const NonMatchingMultiPattern &); // disallow +}; + + +/** + * @brief Simple classifier which classifies a subject string using a list of named multi-patterns. + */ +class Classifier +{ +public: + Classifier() {} + ~Classifier(); + + bool classify(const String &subject, String &name) const; + void add(MultiPattern *pattern); + +private: + std::vector<MultiPattern *> _list; /**< @brief vector which dictates the multi-pattern evaluation order */ + + Classifier(const Classifier &); // disallow + Classifier &operator=(const Classifier &); // disallow +}; + +#endif /* PLUGINS_EXPERIMENTAL_CACHEKEY_PATTERN_H_ */
