bneradt commented on code in PR #12740:
URL: https://github.com/apache/trafficserver/pull/12740#discussion_r2600099286


##########
plugins/experimental/filter_body/filter_body.cc:
##########
@@ -0,0 +1,1040 @@
+/** @file
+
+  @brief A remap plugin that filters request/response bodies for CVE 
exploitation patterns.
+
+  This plugin performs zero-copy streaming inspection of request or response 
bodies,
+  looking for configured patterns. When a pattern matches, it can log, block 
(403),
+  and/or add a header.
+
+  @section license License
+
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+*/
+
+#include <cstring>
+#include <string>
+#include <vector>
+#include <algorithm>
+#include <cctype>
+
+#include <yaml-cpp/yaml.h>
+
+#include "swoc/TextView.h"
+#include "ts/ts.h"
+#include "ts/remap.h"
+#include "tscore/ink_defs.h"
+
+#define PLUGIN_NAME "filter_body"
+
+namespace
+{
+DbgCtl dbg_ctl{PLUGIN_NAME};
+
+// Action flags
+constexpr unsigned ACTION_LOG        = 1 << 0;
+constexpr unsigned ACTION_BLOCK      = 1 << 1;
+constexpr unsigned ACTION_ADD_HEADER = 1 << 2;
+
+// Direction
+enum class Direction { REQUEST, RESPONSE };
+
+// Header match condition
+struct HeaderCondition {
+  std::string              name;
+  std::vector<std::string> patterns; // case-insensitive match
+};
+
+// Header to add when action triggers
+struct AddHeader {
+  std::string name;
+  std::string value; // supports <rule_name> substitution
+};
+
+// A single filtering rule
+struct Rule {
+  std::string                  name;
+  Direction                    direction = Direction::REQUEST;
+  unsigned                     actions   = ACTION_LOG;  // default: log only
+  std::vector<AddHeader>       add_headers;             // headers to add on 
match
+  std::vector<std::string>     methods;                 // for request rules
+  std::vector<int>             status_codes;            // for response rules
+  int64_t                      max_content_length = -1; // -1 means no limit
+  std::vector<HeaderCondition> headers;
+  std::vector<std::string>     body_patterns; // case-sensitive match
+  size_t                       max_pattern_len = 0;
+  int                          stat_id         = -1; // metrics counter for 
matches (-1 = not created)
+};
+
+// Plugin configuration (per remap instance)
+struct FilterConfig {
+  std::vector<Rule> request_rules;
+  std::vector<Rule> response_rules;
+  size_t            max_lookback = 0; // max pattern length - 1 across all 
rules
+};
+
+// Per-transaction transform data
+struct TransformData {
+  TSHttpTxn                 txnp;
+  Rule const               *matched_rule = nullptr;
+  FilterConfig const       *config       = nullptr;
+  std::vector<Rule const *> active_rules; // rules that passed header check
+  std::string               lookback;     // small buffer for cross-boundary 
patterns
+  TSIOBuffer                output_buffer = nullptr;
+  TSIOBufferReader          output_reader = nullptr;
+  TSVIO                     output_vio    = nullptr;
+  Direction                 direction     = Direction::REQUEST; // direction 
of this transform
+  bool                      blocked       = false;
+  bool                      headers_added = false;
+};
+
+/**
+ * @brief Case-insensitive substring search.
+ *
+ * Searches for @a needle within @a haystack using case-insensitive comparison.
+ *
+ * @param[in] haystack The string to search within.
+ * @param[in] needle   The pattern to search for.
+ * @return Pointer to the first occurrence of needle in haystack, or nullptr 
if not found.
+ */
+const char *
+strcasestr_local(swoc::TextView haystack, swoc::TextView needle)
+{
+  if (needle.empty() || haystack.size() < needle.size()) {
+    return nullptr;
+  }
+
+  for (size_t i = 0; i <= haystack.size() - needle.size(); ++i) {
+    if (haystack.substr(i, needle.size()).starts_with_nocase(needle)) {
+      return haystack.data() + i;
+    }
+  }
+  return nullptr;
+}
+
+/**
+ * @brief Case-sensitive substring search.
+ *
+ * Searches for @a needle within @a haystack using exact (case-sensitive) 
comparison.
+ *
+ * @param[in] haystack The string to search within.
+ * @param[in] needle   The pattern to search for.
+ * @return Pointer to the first occurrence of needle in haystack, or nullptr 
if not found.
+ */
+const char *
+strstr_local(swoc::TextView haystack, swoc::TextView needle)
+{
+  if (needle.empty() || haystack.size() < needle.size()) {
+    return nullptr;
+  }
+
+  auto pos = haystack.find(needle);
+  if (pos != std::string::npos) {
+    return haystack.data() + pos;
+  }
+  return nullptr;
+}
+
+/**
+ * @brief Check if the HTTP method matches the rule's method filter.
+ *
+ * If the rule has no method restrictions, all methods match.
+ *
+ * @param[in] rule    The rule containing method restrictions.
+ * @param[in] bufp    The message buffer containing the HTTP headers.
+ * @param[in] hdr_loc The location of the HTTP header.
+ * @return true if the method matches or no method restriction exists, false 
otherwise.
+ */
+bool
+method_matches(Rule const &rule, TSMBuffer bufp, TSMLoc hdr_loc)
+{
+  if (rule.methods.empty()) {
+    return true;
+  }
+
+  int         method_len = 0;
+  const char *method     = TSHttpHdrMethodGet(bufp, hdr_loc, &method_len);
+  if (method == nullptr) {
+    return false;
+  }
+
+  swoc::TextView method_view(method, method_len);
+  method_view.trim_if(::isspace);
+
+  for (auto const &m : rule.methods) {
+    if (0 == strcasecmp(method_view, swoc::TextView(m))) {
+      return true;
+    }
+  }
+  return false;
+}
+
+/**
+ * @brief Check if the HTTP status code matches the rule's status filter.
+ *
+ * For response rules, this checks if the response status code is in the rule's
+ * allowed status codes list.
+ *
+ * @param[in] rule    The rule containing the status code filter.
+ * @param[in] bufp    The message buffer containing the HTTP response.
+ * @param[in] hdr_loc The location of the HTTP response header.
+ * @return true if the status matches or no status restriction exists, false 
otherwise.
+ */
+bool
+status_matches(Rule const &rule, TSMBuffer bufp, TSMLoc hdr_loc)
+{
+  if (rule.status_codes.empty()) {
+    return true; // no status restriction
+  }
+
+  TSHttpStatus status = TSHttpHdrStatusGet(bufp, hdr_loc);
+  for (int const code : rule.status_codes) {
+    if (static_cast<int>(status) == code) {
+      return true;
+    }
+  }
+  return false;
+}
+
+/**
+ * @brief Check if Content-Length is within the rule's max_content_length 
limit.
+ *
+ * If the rule has no content length limit (max_content_length < 0), all sizes 
are allowed.
+ * If the Content-Length header is missing, the check passes.
+ *
+ * @param[in] rule    The rule containing the content length limit.
+ * @param[in] bufp    The message buffer containing the HTTP headers.
+ * @param[in] hdr_loc The location of the HTTP header.
+ * @return true if content length is within limit or no limit exists, false 
otherwise.
+ */
+bool
+content_length_ok(Rule const &rule, TSMBuffer bufp, TSMLoc hdr_loc)
+{
+  if (rule.max_content_length < 0) {
+    return true; // no limit
+  }
+
+  TSMLoc field_loc = TSMimeHdrFieldFind(bufp, hdr_loc, 
TS_MIME_FIELD_CONTENT_LENGTH, TS_MIME_LEN_CONTENT_LENGTH);
+  if (field_loc == TS_NULL_MLOC) {
+    return true; // no Content-Length header, allow
+  }
+
+  int64_t content_length = TSMimeHdrFieldValueInt64Get(bufp, hdr_loc, 
field_loc, 0);
+  TSHandleMLocRelease(bufp, hdr_loc, field_loc);
+
+  return content_length <= rule.max_content_length;
+}
+
+/**
+ * @brief Check if a single header condition matches.
+ *
+ * Uses case-insensitive pattern search. Returns true if any pattern in the
+ * condition matches any value of the specified header (OR logic within 
header).
+ *
+ * @param[in] cond    The header condition to check.
+ * @param[in] bufp    The message buffer containing the HTTP headers.
+ * @param[in] hdr_loc The location of the HTTP header.
+ * @return true if the header exists and any pattern matches, false otherwise.
+ */
+bool
+header_condition_matches(HeaderCondition const &cond, TSMBuffer bufp, TSMLoc 
hdr_loc)
+{
+  TSMLoc field_loc = TSMimeHdrFieldFind(bufp, hdr_loc, cond.name.c_str(), 
static_cast<int>(cond.name.length()));
+  if (field_loc == TS_NULL_MLOC) {
+    return false;
+  }
+
+  bool matched    = false;
+  int  num_values = TSMimeHdrFieldValuesCount(bufp, hdr_loc, field_loc);
+  for (int i = 0; i < num_values && !matched; ++i) {
+    int         value_len = 0;
+    const char *value     = TSMimeHdrFieldValueStringGet(bufp, hdr_loc, 
field_loc, i, &value_len);
+    if (value == nullptr) {
+      continue;
+    }
+
+    swoc::TextView value_view(value, value_len);
+    for (auto const &pattern : cond.patterns) {
+      if (strcasestr_local(value_view, swoc::TextView(pattern)) != nullptr) {
+        matched = true;
+        break;
+      }
+    }
+  }
+
+  TSHandleMLocRelease(bufp, hdr_loc, field_loc);
+  return matched;
+}
+
+/**
+ * @brief Check if ALL header conditions in a rule match.
+ *
+ * Uses AND logic between headers - all header conditions must match for the
+ * rule to apply.
+ *
+ * @param[in] rule    The rule containing header conditions.
+ * @param[in] bufp    The message buffer containing the HTTP headers.
+ * @param[in] hdr_loc The location of the HTTP header.
+ * @return true if all header conditions match, false otherwise.
+ */
+bool
+headers_match(Rule const &rule, TSMBuffer bufp, TSMLoc hdr_loc)
+{
+  for (auto const &cond : rule.headers) {
+    if (!header_condition_matches(cond, bufp, hdr_loc)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+/**
+ * @brief Search for body patterns in the given data.
+ *
+ * Searches for any of the rule's body patterns in the data using 
case-sensitive
+ * matching. Returns the first matched pattern.
+ *
+ * @param[in] rule The rule containing body patterns to search for.
+ * @param[in] data The data buffer to search within.
+ * @return Pointer to the matched pattern string, or nullptr if no match.
+ */
+std::string const *
+search_body_patterns(Rule const &rule, swoc::TextView data)
+{
+  for (auto const &pattern : rule.body_patterns) {
+    if (strstr_local(data, swoc::TextView(pattern)) != nullptr) {
+      return &pattern;
+    }
+  }
+  return nullptr;
+}
+
+/**
+ * @brief Add a header field to an HTTP message.
+ *
+ * Creates and appends a new header field with the given name and value.
+ *
+ * @param[in] bufp    The message buffer to add the header to.
+ * @param[in] hdr_loc The location of the HTTP header.
+ * @param[in] name    The header field name.
+ * @param[in] value   The header field value.
+ */
+void
+add_header_to_message(TSMBuffer bufp, TSMLoc hdr_loc, std::string const &name, 
std::string const &value)
+{
+  TSMLoc field_loc;
+  if (TSMimeHdrFieldCreateNamed(bufp, hdr_loc, name.c_str(), 
static_cast<int>(name.length()), &field_loc) != TS_SUCCESS) {
+    TSError("[%s] Failed to create header field: %s", PLUGIN_NAME, 
name.c_str());
+    return;
+  }
+
+  if (TSMimeHdrFieldValueStringSet(bufp, hdr_loc, field_loc, -1, 
value.c_str(), static_cast<int>(value.length())) != TS_SUCCESS) {
+    TSError("[%s] Failed to set header value: %s", PLUGIN_NAME, name.c_str());
+    TSHandleMLocRelease(bufp, hdr_loc, field_loc);
+    return;
+  }
+
+  if (TSMimeHdrFieldAppend(bufp, hdr_loc, field_loc) != TS_SUCCESS) {
+    TSError("[%s] Failed to append header field: %s", PLUGIN_NAME, 
name.c_str());
+  }
+
+  TSHandleMLocRelease(bufp, hdr_loc, field_loc);
+}
+
+/**
+ * @brief Execute the configured actions for a matched rule.
+ *
+ * Performs the actions specified in the rule: log, add_header, and/or block.
+ * For request rules, headers are added to the server request (proxy request 
to origin).
+ * For response rules, headers are added to the client response.
+ *
+ * @note Headers are added during body inspection, which occurs after headers 
may have
+ *       already been sent. For request transforms, the server request headers 
should
+ *       still be modifiable. For response transforms, headers are added 
before the
+ *       response is sent to the client.
+ *
+ * @param[in,out] data            The transform data containing transaction 
state.
+ * @param[in]     rule            The matched rule containing actions to 
execute.
+ * @param[in]     matched_pattern The pattern that triggered the match (for 
logging).
+ */

Review Comment:
   Looks like this doxygen comment should be below, above `execute_actions`. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to