This is an automated email from the ASF dual-hosted git repository.

gancho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/trafficserver.git


The following commit(s) were added to refs/heads/master by this push:
     new d49271b  cachekey: capture cache key elements from headers
d49271b is described below

commit d49271b167bca3ea9e7a1b0ac0c65c72512a85dc
Author: Gancho Tenev <gan...@apache.org>
AuthorDate: Fri Jul 6 17:25:33 2018 -0700

    cachekey: capture cache key elements from headers
    
    --capture-header=<headername>:<capture_definition>
    captures elements from header <headername> using <capture_definition>
    and adds them to the cache key.
---
 doc/admin-guide/plugins/cachekey.en.rst |  29 ++++++--
 plugins/cachekey/cachekey.cc            | 119 +++++++++++++++++++++-----------
 plugins/cachekey/cachekey.h             |   4 ++
 plugins/cachekey/common.h               |   2 +
 plugins/cachekey/configs.cc             |  52 ++++++++++++++
 plugins/cachekey/configs.h              |  14 +++-
 plugins/cachekey/pattern.cc             |  12 ++++
 plugins/cachekey/pattern.h              |   2 +
 8 files changed, 187 insertions(+), 47 deletions(-)

diff --git a/doc/admin-guide/plugins/cachekey.en.rst 
b/doc/admin-guide/plugins/cachekey.en.rst
index 958cdbe..0b631d5 100644
--- a/doc/admin-guide/plugins/cachekey.en.rst
+++ b/doc/admin-guide/plugins/cachekey.en.rst
@@ -110,14 +110,16 @@ Cache key structure and related plugin parameters
 
 ::
 
-  Optional components      | ┌───────────────────┐
-                           | │ --include-headers │
-                           | ├───────────────────┤
-  Default values if no     | │ (empty)           |
-  optional components      | └───────────────────┘
+  Optional components      | ┌───────────────────┬────────────────────┐
+                           | │ --include-headers │  --capture-headers │
+                           | ├────────────────────────────────────────┤
+  Default values if no     | │ (empty)           |  (empty)           |
+  optional components      | └───────────────────┴────────────────────┘
   configured               |
 
-* ``--include-headers`` (default: empty list) - comma separated list of 
headers to be added to the cache key. The list of headers defined by 
``--include-headers`` are always sorted before adding them to the cache  key.
+* ``--include-headers`` (default: empty list) - comma separated list of 
headers to be added to the cache key. The list of headers defined by 
``--include-headers`` are always sorted before adding them to the cache key.
+
+* ``--capture-header=<headername>:<capture_definition>`` (default: empty) - 
captures elements from header <headername> using <capture_definition> and adds 
them to the cache key.
 
 "Cookies" section
 ^^^^^^^^^^^^^^^^^
@@ -400,6 +402,21 @@ The following headers ``HeaderA`` and ``HeaderB`` will be 
used when constructing
 
   @plugin=cachekey.so @pparam=--include-headers=HeaderA,HeaderB
 
+The following would capture from the ``Authorization`` header and will add the 
captured element to the cache key ::
+
+  @plugin=cachekey.so \
+      
@pparam=--capture-header=Authorization:/AWS\s(?<clientID>[^:]+).*/clientID:$1/"
+
+If the request looks like the following::
+
+  http://example-cdn.com/path/file
+  Authorization: AWS MKIARYMOG51PT0DLD:DLiWQ2lyS49H4Zyx34kW0URtg6s=
+
+Cache key would be set to::
+
+  /example-cdn.com/80/clientID:MKIARYMOG51PTCKQ0DLD/path/file
+
+
 HTTP Cookies
 ^^^^^^^^^^^^
 
diff --git a/plugins/cachekey/cachekey.cc b/plugins/cachekey/cachekey.cc
index a31d628..c89b657 100644
--- a/plugins/cachekey/cachekey.cc
+++ b/plugins/cachekey/cachekey.cc
@@ -437,6 +437,61 @@ CacheKey::appendPath(Pattern &pathCapture, Pattern 
&pathCaptureUri)
   }
 }
 
+template <class T>
+void
+CacheKey::processHeader(const String &name, const ConfigHeaders &config, T 
&dst,
+                        void (*fun)(const ConfigHeaders &config, const String 
&name_s, const String &value_s, T &captures))
+{
+  TSMLoc field;
+
+  for (field = TSMimeHdrFieldFind(_buf, _hdrs, name.c_str(), name.size()); 
field != TS_NULL_MLOC;
+       field = ::nextDuplicate(_buf, _hdrs, field)) {
+    const char *value;
+    int vlen;
+    int count = TSMimeHdrFieldValuesCount(_buf, _hdrs, field);
+
+    for (int i = 0; i < count; ++i) {
+      value = TSMimeHdrFieldValueStringGet(_buf, _hdrs, field, i, &vlen);
+      if (value == nullptr || vlen == 0) {
+        CacheKeyDebug("missing value %d for header %s", i, name.c_str());
+        continue;
+      }
+
+      String value_s(value, vlen);
+      fun(config, name, value_s, dst);
+    }
+  }
+}
+
+template <class T>
+void
+captureWholeHeaders(const ConfigHeaders &config, const String &name, const 
String &value, T &captures)
+{
+  CacheKeyDebug("processing header %s", name.c_str());
+  if (config.toBeAdded(name)) {
+    String header;
+    header.append(name).append(":").append(value);
+    captures.insert(header);
+    CacheKeyDebug("adding header '%s: %s'", name.c_str(), value.c_str());
+  } else {
+    CacheKeyDebug("failed to find header '%s'", name.c_str());
+  }
+}
+
+template <class T>
+void
+captureFromHeaders(const ConfigHeaders &config, const String &name, const 
String &value, T &captures)
+{
+  CacheKeyDebug("processing capture from header %s", name.c_str());
+  auto itMp = config.getCaptures().find(name);
+  if (config.getCaptures().end() != itMp) {
+    itMp->second->process(value, captures);
+    CacheKeyDebug("found capture pattern for header '%s'", name.c_str());
+  } else {
+    CacheKeyDebug("failed to find header '%s'", name.c_str());
+  }
+}
+
 /**
  * @brief Append headers by following the rules specified in the header 
configuration object.
  * @param config header-related configuration containing information about 
which headers need to be appended to the key.
@@ -445,49 +500,35 @@ CacheKey::appendPath(Pattern &pathCapture, Pattern 
&pathCaptureUri)
 void
 CacheKey::appendHeaders(const ConfigHeaders &config)
 {
-  if (config.toBeRemoved() || config.toBeSkipped()) {
-    // Don't add any headers to the cache key.
-    return;
-  }
-
-  TSMLoc field;
-  StringSet hset; /* Sort and uniquify the header list in the cache key. */
-
-  /* Iterating header by header is not efficient according to comments inside 
traffic server API,
-   * Iterate over an 'include'-kind of list to avoid header by header 
iteration.
-   * @todo: revisit this when (if?) adding regex matching for headers. */
-  for (StringSet::iterator it = config.getInclude().begin(); it != 
config.getInclude().end(); ++it) {
-    String name_s = *it;
-
-    for (field = TSMimeHdrFieldFind(_buf, _hdrs, name_s.c_str(), 
name_s.size()); field != TS_NULL_MLOC;
-         field = ::nextDuplicate(_buf, _hdrs, field)) {
-      const char *value;
-      int vlen;
-      int count = TSMimeHdrFieldValuesCount(_buf, _hdrs, field);
-
-      for (int i = 0; i < count; ++i) {
-        value = TSMimeHdrFieldValueStringGet(_buf, _hdrs, field, i, &vlen);
-        if (value == nullptr || vlen == 0) {
-          CacheKeyDebug("missing value %d for header %s", i, name_s.c_str());
-          continue;
-        }
-
-        String value_s(value, vlen);
+  if (!config.toBeRemoved() && !config.toBeSkipped()) {
+    /* Iterating header by header is not efficient according to comments 
inside traffic server API,
+     * Iterate over an 'include'-kind of list or the capture definitions to 
avoid header by header iteration.
+     * @todo: revisit this when (if?) adding regex matching for headers. */
+
+    /* Adding whole headers, iterate over "--include-header" list */
+    StringSet hdrSet; /* Sort and uniquify the header list in the cache key. */
+    for (auto it = config.getInclude().begin(); it != 
config.getInclude().end(); ++it) {
+      processHeader(*it, config, hdrSet, captureWholeHeaders);
+    }
 
-        if (config.toBeAdded(name_s)) {
-          String header;
-          header.append(name_s).append(":").append(value_s);
-          hset.insert(header);
-          CacheKeyDebug("adding header => '%s: %s'", name_s.c_str(), 
value_s.c_str());
-        }
-      }
+    /* Append to the cache key. It doesn't make sense to have the headers 
unordered in the cache key. */
+    String headers_key = containerToString<StringSet, 
StringSet::const_iterator>(hdrSet, "", _separator);
+    if (!headers_key.empty()) {
+      append(headers_key);
     }
   }
 
-  /* It doesn't make sense to have the headers unordered in the cache key. */
-  String headers_key = containerToString<StringSet, 
StringSet::const_iterator>(hset, "", _separator);
-  if (!headers_key.empty()) {
-    append(headers_key);
+  if (!config.getCaptures().empty()) {
+    /* Adding captures from headers, iterate over "--capture-header" 
definitions */
+    StringVector hdrCaptures;
+    for (auto it = config.getCaptures().begin(); it != 
config.getCaptures().end(); ++it) {
+      processHeader(it->first, config, hdrCaptures, captureFromHeaders);
+    }
+
+    /* Append to the cache key. Add the captures in the order capture 
definitions are captured / specified */
+    for (auto &capture : hdrCaptures) {
+      append(capture);
+    }
   }
 }
 
diff --git a/plugins/cachekey/cachekey.h b/plugins/cachekey/cachekey.h
index 6116bba..7ea058c 100644
--- a/plugins/cachekey/cachekey.h
+++ b/plugins/cachekey/cachekey.h
@@ -73,6 +73,10 @@ public:
 private:
   CacheKey(); // disallow
 
+  template <class T>
+  void processHeader(const String &name_s, const ConfigHeaders &config, T &dst,
+                     void (*fun)(const ConfigHeaders &config, const String 
&name_s, const String &value_s, T &captures));
+
   /* Information from the request */
   TSHttpTxn _txn;      /**< @brief transaction handle */
   TSMBuffer _buf;      /**< @brief marshal buffer */
diff --git a/plugins/cachekey/common.h b/plugins/cachekey/common.h
index a608683..07e886f 100644
--- a/plugins/cachekey/common.h
+++ b/plugins/cachekey/common.h
@@ -26,11 +26,13 @@
 #define PLUGIN_NAME "cachekey"
 
 #include <string>
+#include <string_view>
 #include <set>
 #include <list>
 #include <vector>
 
 typedef std::string String;
+typedef std::string_view StringView;
 typedef std::set<std::string> StringSet;
 typedef std::list<std::string> StringList;
 typedef std::vector<std::string> StringVector;
diff --git a/plugins/cachekey/configs.cc b/plugins/cachekey/configs.cc
index 16cf27a..2a13d76 100644
--- a/plugins/cachekey/configs.cc
+++ b/plugins/cachekey/configs.cc
@@ -70,6 +70,47 @@ setPattern(MultiPattern &multiPattern, const char *arg)
   }
 }
 
+bool
+ConfigElements::setCapture(const String &name, const String &pattern)
+{
+  auto it = _captures.find(name);
+  if (_captures.end() == it) {
+    auto mp = new MultiPattern(name);
+    if (nullptr != mp) {
+      _captures[name] = mp;
+    } else {
+      return false;
+    }
+  }
+  setPattern(*_captures[name], pattern.c_str());
+  CacheKeyDebug("added capture pattern '%s' for element '%s'", 
pattern.c_str(), name.c_str());
+  return true;
+}
+
+void
+ConfigElements::addCapture(const char *arg)
+{
+  StringView args(arg);
+  StringView::size_type pos = args.find_first_of(':');
+  if (StringView::npos != pos) {
+    String name(args.substr(0, pos));
+    if (!name.empty()) {
+      String pattern(args.substr(pos + 1));
+      if (!pattern.empty()) {
+        if (!setCapture(name, pattern)) {
+          CacheKeyError("failed to add capture: '%s'", arg);
+        }
+      } else {
+        CacheKeyError("missing pattern in capture: '%s'", arg);
+      }
+    } else {
+      CacheKeyError("missing element name in capture: %s", arg);
+    }
+  } else {
+    CacheKeyError("invalid capture: %s, should be 'name:<capture_definition>", 
arg);
+  }
+}
+
 void
 ConfigElements::setExcludePatterns(const char *arg)
 {
@@ -140,6 +181,13 @@ ConfigElements::noIncludeExcludeRules() const
   return _exclude.empty() && _excludePatterns.empty() && _include.empty() && 
_includePatterns.empty();
 }
 
+ConfigElements::~ConfigElements()
+{
+  for (auto it = _captures.begin(); it != _captures.end(); it++) {
+    delete it->second;
+  }
+}
+
 /**
  * @brief finalizes the query parameters related configuration.
  *
@@ -348,6 +396,7 @@ Configs::init(int argc, const char *argv[], bool 
perRemapConfig)
     {const_cast<char *>("remove-path"), optional_argument, nullptr, 'r'},
     {const_cast<char *>("separator"), optional_argument, nullptr, 's'},
     {const_cast<char *>("uri-type"), optional_argument, nullptr, 't'},
+    {const_cast<char *>("capture-header"), optional_argument, nullptr, 'u'},
     {nullptr, 0, nullptr, 0},
   };
 
@@ -452,6 +501,9 @@ Configs::init(int argc, const char *argv[], bool 
perRemapConfig)
     case 't': /* uri-type */
       setUriType(optarg);
       break;
+    case 'u': /* capture-header */
+      _headers.addCapture(optarg);
+      break;
     }
   }
 
diff --git a/plugins/cachekey/configs.h b/plugins/cachekey/configs.h
index b4890d5..603ff43 100644
--- a/plugins/cachekey/configs.h
+++ b/plugins/cachekey/configs.h
@@ -26,6 +26,8 @@
 #include "pattern.h"
 #include "common.h"
 
+#include <map>
+
 enum CacheKeyUriType {
   REMAP,
   PRISTINE,
@@ -40,14 +42,19 @@ class ConfigElements
 {
 public:
   ConfigElements() : _sort(false), _remove(false), _skip(false) {}
-  virtual ~ConfigElements() {}
+  virtual ~ConfigElements();
   void setExclude(const char *arg);
   void setInclude(const char *arg);
   void setExcludePatterns(const char *arg);
   void setIncludePatterns(const char *arg);
   void setRemove(const char *arg);
   void setSort(const char *arg);
-
+  void addCapture(const char *arg);
+  const auto &
+  getCaptures() const
+  {
+    return _captures;
+  }
   /** @brief shows if the elements are to be sorted in the result */
   bool toBeSorted() const;
   /** @brief shows if the elements are to be removed from the result */
@@ -67,6 +74,7 @@ public:
 
 protected:
   bool noIncludeExcludeRules() const;
+  bool setCapture(const String &name, const String &pattern);
 
   StringSet _exclude;
   StringSet _include;
@@ -77,6 +85,8 @@ protected:
   bool _sort;
   bool _remove;
   bool _skip;
+
+  std::map<String, MultiPattern *> _captures;
 };
 
 /**
diff --git a/plugins/cachekey/pattern.cc b/plugins/cachekey/pattern.cc
index 319a4a3..27eb94e 100644
--- a/plugins/cachekey/pattern.cc
+++ b/plugins/cachekey/pattern.cc
@@ -458,6 +458,18 @@ MultiPattern::name() const
   return _name;
 }
 
+bool
+MultiPattern::process(const String &subject, StringVector &result) const
+{
+  bool res = false;
+  for (auto p : this->_list) {
+    if (nullptr != p && p->process(subject, result)) {
+      res = true;
+    }
+  }
+  return res;
+}
+
 /**
  * @brief Destructor, deletes all multi-patterns.
  */
diff --git a/plugins/cachekey/pattern.h b/plugins/cachekey/pattern.h
index 302d20a..3cb7d58 100644
--- a/plugins/cachekey/pattern.h
+++ b/plugins/cachekey/pattern.h
@@ -85,6 +85,8 @@ public:
   virtual bool match(const String &subject) const;
   const String &name() const;
 
+  bool process(const String &subject, StringVector &result) const;
+
 protected:
   std::vector<Pattern *> _list; /**< @brief vector which dictates the order of 
the pattern evaluation. */
   String _name;                 /**< @brief multi-pattern name */

Reply via email to