bneradt commented on code in PR #12740: URL: https://github.com/apache/trafficserver/pull/12740#discussion_r2600099286
########## plugins/experimental/filter_body/filter_body.cc: ########## @@ -0,0 +1,1040 @@ +/** @file + + @brief A remap plugin that filters request/response bodies for CVE exploitation patterns. + + This plugin performs zero-copy streaming inspection of request or response bodies, + looking for configured patterns. When a pattern matches, it can log, block (403), + and/or add a header. + + @section license License + + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +#include <cstring> +#include <string> +#include <vector> +#include <algorithm> +#include <cctype> + +#include <yaml-cpp/yaml.h> + +#include "swoc/TextView.h" +#include "ts/ts.h" +#include "ts/remap.h" +#include "tscore/ink_defs.h" + +#define PLUGIN_NAME "filter_body" + +namespace +{ +DbgCtl dbg_ctl{PLUGIN_NAME}; + +// Action flags +constexpr unsigned ACTION_LOG = 1 << 0; +constexpr unsigned ACTION_BLOCK = 1 << 1; +constexpr unsigned ACTION_ADD_HEADER = 1 << 2; + +// Direction +enum class Direction { REQUEST, RESPONSE }; + +// Header match condition +struct HeaderCondition { + std::string name; + std::vector<std::string> patterns; // case-insensitive match +}; + +// Header to add when action triggers +struct AddHeader { + std::string name; + std::string value; // supports <rule_name> substitution +}; + +// A single filtering rule +struct Rule { + std::string name; + Direction direction = Direction::REQUEST; + unsigned actions = ACTION_LOG; // default: log only + std::vector<AddHeader> add_headers; // headers to add on match + std::vector<std::string> methods; // for request rules + std::vector<int> status_codes; // for response rules + int64_t max_content_length = -1; // -1 means no limit + std::vector<HeaderCondition> headers; + std::vector<std::string> body_patterns; // case-sensitive match + size_t max_pattern_len = 0; + int stat_id = -1; // metrics counter for matches (-1 = not created) +}; + +// Plugin configuration (per remap instance) +struct FilterConfig { + std::vector<Rule> request_rules; + std::vector<Rule> response_rules; + size_t max_lookback = 0; // max pattern length - 1 across all rules +}; + +// Per-transaction transform data +struct TransformData { + TSHttpTxn txnp; + Rule const *matched_rule = nullptr; + FilterConfig const *config = nullptr; + std::vector<Rule const *> active_rules; // rules that passed header check + std::string lookback; // small buffer for cross-boundary patterns + TSIOBuffer output_buffer = nullptr; + TSIOBufferReader output_reader = nullptr; + TSVIO output_vio = nullptr; + Direction direction = Direction::REQUEST; // direction of this transform + bool blocked = false; + bool headers_added = false; +}; + +/** + * @brief Case-insensitive substring search. + * + * Searches for @a needle within @a haystack using case-insensitive comparison. + * + * @param[in] haystack The string to search within. + * @param[in] needle The pattern to search for. + * @return Pointer to the first occurrence of needle in haystack, or nullptr if not found. + */ +const char * +strcasestr_local(swoc::TextView haystack, swoc::TextView needle) +{ + if (needle.empty() || haystack.size() < needle.size()) { + return nullptr; + } + + for (size_t i = 0; i <= haystack.size() - needle.size(); ++i) { + if (haystack.substr(i, needle.size()).starts_with_nocase(needle)) { + return haystack.data() + i; + } + } + return nullptr; +} + +/** + * @brief Case-sensitive substring search. + * + * Searches for @a needle within @a haystack using exact (case-sensitive) comparison. + * + * @param[in] haystack The string to search within. + * @param[in] needle The pattern to search for. + * @return Pointer to the first occurrence of needle in haystack, or nullptr if not found. + */ +const char * +strstr_local(swoc::TextView haystack, swoc::TextView needle) +{ + if (needle.empty() || haystack.size() < needle.size()) { + return nullptr; + } + + auto pos = haystack.find(needle); + if (pos != std::string::npos) { + return haystack.data() + pos; + } + return nullptr; +} + +/** + * @brief Check if the HTTP method matches the rule's method filter. + * + * If the rule has no method restrictions, all methods match. + * + * @param[in] rule The rule containing method restrictions. + * @param[in] bufp The message buffer containing the HTTP headers. + * @param[in] hdr_loc The location of the HTTP header. + * @return true if the method matches or no method restriction exists, false otherwise. + */ +bool +method_matches(Rule const &rule, TSMBuffer bufp, TSMLoc hdr_loc) +{ + if (rule.methods.empty()) { + return true; + } + + int method_len = 0; + const char *method = TSHttpHdrMethodGet(bufp, hdr_loc, &method_len); + if (method == nullptr) { + return false; + } + + swoc::TextView method_view(method, method_len); + method_view.trim_if(::isspace); + + for (auto const &m : rule.methods) { + if (0 == strcasecmp(method_view, swoc::TextView(m))) { + return true; + } + } + return false; +} + +/** + * @brief Check if the HTTP status code matches the rule's status filter. + * + * For response rules, this checks if the response status code is in the rule's + * allowed status codes list. + * + * @param[in] rule The rule containing the status code filter. + * @param[in] bufp The message buffer containing the HTTP response. + * @param[in] hdr_loc The location of the HTTP response header. + * @return true if the status matches or no status restriction exists, false otherwise. + */ +bool +status_matches(Rule const &rule, TSMBuffer bufp, TSMLoc hdr_loc) +{ + if (rule.status_codes.empty()) { + return true; // no status restriction + } + + TSHttpStatus status = TSHttpHdrStatusGet(bufp, hdr_loc); + for (int const code : rule.status_codes) { + if (static_cast<int>(status) == code) { + return true; + } + } + return false; +} + +/** + * @brief Check if Content-Length is within the rule's max_content_length limit. + * + * If the rule has no content length limit (max_content_length < 0), all sizes are allowed. + * If the Content-Length header is missing, the check passes. + * + * @param[in] rule The rule containing the content length limit. + * @param[in] bufp The message buffer containing the HTTP headers. + * @param[in] hdr_loc The location of the HTTP header. + * @return true if content length is within limit or no limit exists, false otherwise. + */ +bool +content_length_ok(Rule const &rule, TSMBuffer bufp, TSMLoc hdr_loc) +{ + if (rule.max_content_length < 0) { + return true; // no limit + } + + TSMLoc field_loc = TSMimeHdrFieldFind(bufp, hdr_loc, TS_MIME_FIELD_CONTENT_LENGTH, TS_MIME_LEN_CONTENT_LENGTH); + if (field_loc == TS_NULL_MLOC) { + return true; // no Content-Length header, allow + } + + int64_t content_length = TSMimeHdrFieldValueInt64Get(bufp, hdr_loc, field_loc, 0); + TSHandleMLocRelease(bufp, hdr_loc, field_loc); + + return content_length <= rule.max_content_length; +} + +/** + * @brief Check if a single header condition matches. + * + * Uses case-insensitive pattern search. Returns true if any pattern in the + * condition matches any value of the specified header (OR logic within header). + * + * @param[in] cond The header condition to check. + * @param[in] bufp The message buffer containing the HTTP headers. + * @param[in] hdr_loc The location of the HTTP header. + * @return true if the header exists and any pattern matches, false otherwise. + */ +bool +header_condition_matches(HeaderCondition const &cond, TSMBuffer bufp, TSMLoc hdr_loc) +{ + TSMLoc field_loc = TSMimeHdrFieldFind(bufp, hdr_loc, cond.name.c_str(), static_cast<int>(cond.name.length())); + if (field_loc == TS_NULL_MLOC) { + return false; + } + + bool matched = false; + int num_values = TSMimeHdrFieldValuesCount(bufp, hdr_loc, field_loc); + for (int i = 0; i < num_values && !matched; ++i) { + int value_len = 0; + const char *value = TSMimeHdrFieldValueStringGet(bufp, hdr_loc, field_loc, i, &value_len); + if (value == nullptr) { + continue; + } + + swoc::TextView value_view(value, value_len); + for (auto const &pattern : cond.patterns) { + if (strcasestr_local(value_view, swoc::TextView(pattern)) != nullptr) { + matched = true; + break; + } + } + } + + TSHandleMLocRelease(bufp, hdr_loc, field_loc); + return matched; +} + +/** + * @brief Check if ALL header conditions in a rule match. + * + * Uses AND logic between headers - all header conditions must match for the + * rule to apply. + * + * @param[in] rule The rule containing header conditions. + * @param[in] bufp The message buffer containing the HTTP headers. + * @param[in] hdr_loc The location of the HTTP header. + * @return true if all header conditions match, false otherwise. + */ +bool +headers_match(Rule const &rule, TSMBuffer bufp, TSMLoc hdr_loc) +{ + for (auto const &cond : rule.headers) { + if (!header_condition_matches(cond, bufp, hdr_loc)) { + return false; + } + } + return true; +} + +/** + * @brief Search for body patterns in the given data. + * + * Searches for any of the rule's body patterns in the data using case-sensitive + * matching. Returns the first matched pattern. + * + * @param[in] rule The rule containing body patterns to search for. + * @param[in] data The data buffer to search within. + * @return Pointer to the matched pattern string, or nullptr if no match. + */ +std::string const * +search_body_patterns(Rule const &rule, swoc::TextView data) +{ + for (auto const &pattern : rule.body_patterns) { + if (strstr_local(data, swoc::TextView(pattern)) != nullptr) { + return &pattern; + } + } + return nullptr; +} + +/** + * @brief Add a header field to an HTTP message. + * + * Creates and appends a new header field with the given name and value. + * + * @param[in] bufp The message buffer to add the header to. + * @param[in] hdr_loc The location of the HTTP header. + * @param[in] name The header field name. + * @param[in] value The header field value. + */ +void +add_header_to_message(TSMBuffer bufp, TSMLoc hdr_loc, std::string const &name, std::string const &value) +{ + TSMLoc field_loc; + if (TSMimeHdrFieldCreateNamed(bufp, hdr_loc, name.c_str(), static_cast<int>(name.length()), &field_loc) != TS_SUCCESS) { + TSError("[%s] Failed to create header field: %s", PLUGIN_NAME, name.c_str()); + return; + } + + if (TSMimeHdrFieldValueStringSet(bufp, hdr_loc, field_loc, -1, value.c_str(), static_cast<int>(value.length())) != TS_SUCCESS) { + TSError("[%s] Failed to set header value: %s", PLUGIN_NAME, name.c_str()); + TSHandleMLocRelease(bufp, hdr_loc, field_loc); + return; + } + + if (TSMimeHdrFieldAppend(bufp, hdr_loc, field_loc) != TS_SUCCESS) { + TSError("[%s] Failed to append header field: %s", PLUGIN_NAME, name.c_str()); + } + + TSHandleMLocRelease(bufp, hdr_loc, field_loc); +} + +/** + * @brief Execute the configured actions for a matched rule. + * + * Performs the actions specified in the rule: log, add_header, and/or block. + * For request rules, headers are added to the server request (proxy request to origin). + * For response rules, headers are added to the client response. + * + * @note Headers are added during body inspection, which occurs after headers may have + * already been sent. For request transforms, the server request headers should + * still be modifiable. For response transforms, headers are added before the + * response is sent to the client. + * + * @param[in,out] data The transform data containing transaction state. + * @param[in] rule The matched rule containing actions to execute. + * @param[in] matched_pattern The pattern that triggered the match (for logging). + */ Review Comment: Looks like this doxygen comment should be below, above `execute_actions`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
