Copilot commented on code in PR #12792:
URL: https://github.com/apache/trafficserver/pull/12792#discussion_r2672969751


##########
plugins/stats_over_http/stats_over_http.cc:
##########
@@ -509,32 +564,265 @@ static
   return sanitized_name;
 }
 
+/** Parse a Prometheus v2 metric name and return the base name and labels.
+ *
+ * @param[in] name The metric name to parse.
+ * @return A prometheus_v2_metric struct containing the base name and labels.
+ */
+static
+// Remove this check when we drop support for pre-13 GCC versions.
+#if defined(__cpp_lib_constexpr_string) && __cpp_lib_constexpr_string >= 
201907L
+// Clang <= 16 doesn't fully support constexpr std::string.
+#if !defined(__clang__) || __clang_major__ > 16
+  constexpr
+#endif
+#endif
+  prometheus_v2_metric
+  parse_metric_v2(std::string_view name)
+{
+  swoc::TextView name_view{name};
+  std::string    labels;
+  std::string    base_name;
+
+  auto escape_label = [](std::string_view val) {
+    std::string escaped;
+    escaped.reserve(val.length());
+    for (char c : val) {
+      if (c == '"' || c == '\\') {
+        escaped += '\\';
+        escaped += c;
+      } else if (c == '\n') {
+        escaped += "\\n";
+      } else {
+        escaped += c;
+      }
+    }
+    return escaped;
+  };
+
+  auto add_label = [&](std::string_view key, std::string_view val) {
+    if (!labels.empty()) {
+      labels += ", ";
+    }
+    labels += std::string(key) + "=\"" + escape_label(val) + "\"";
+  };
+
+  constexpr std::string_view methods[]    = {"get",      "post",      "head",  
        "put",   "delete",           "options",
+                                             "trace",    "connect",   "push",  
        "purge", "extension_method", "incoming",
+                                             "outgoing", "completed", 
"invalid_client"};
+  constexpr std::string_view results[]    = {"hit", "miss", "error", "errors", 
"success", "failure"};
+  constexpr std::string_view categories[] = {"volume", "thread", "interface", 
"net", "host", "port"};
+
+  auto contains = [](const std::string_view *arr, size_t size, 
std::string_view token) {
+    for (size_t i = 0; i < size; ++i) {
+      if (arr[i] == token) {
+        return true;
+      }
+    }
+    return false;
+  };
+
+  while (!name_view.empty()) {
+    // take_prefix_at is not constexpr in some versions of swoc, so we do it 
manually if needed
+    // or just use it and see. swoc::TextView find_first_of is not constexpr.
+    size_t         sep = name_view.find_first_of("._[]");
+    swoc::TextView token;
+    if (sep == swoc::TextView::npos) {
+      token = name_view;
+      name_view.clear();
+    } else {
+      token = name_view.prefix(sep);
+      name_view.remove_prefix(sep + 1);
+    }
+
+    if (token.empty()) {
+      continue;
+    }
+
+    bool token_handled = false;
+
+    // Status codes (200, 4xx, etc.)
+    if (token.length() == 3 && (token[0] >= '0' && token[0] <= '9') && 
((token[1] >= '0' && token[1] <= '9') || token[1] == 'x') &&
+        ((token[2] >= '0' && token[2] <= '9') || token[2] == 'x')) {
+      add_label("status", token);
+      token_handled = true;
+    }
+    // Methods
+    else if (contains(methods, sizeof(methods) / sizeof(methods[0]), token)) {
+      add_label("method", token);
+      token_handled = true;
+    }
+    // Generic Categories + Index (volume, 0, etc.)
+    else if (contains(categories, sizeof(categories) / sizeof(categories[0]), 
token)) {
+      swoc::TextView next   = name_view;
+      size_t         id_sep = next.find_first_of("._[]");
+      swoc::TextView id;
+      if (id_sep == swoc::TextView::npos) {
+        id = next;
+        next.clear();
+      } else {
+        id = next.prefix(id_sep);
+        next.remove_prefix(id_sep + 1);
+      }
+
+      bool is_id = !id.empty();
+      for (char c : id) {
+        if (!(c >= '0' && c <= '9') && c != 'x') {
+          is_id = false;
+          break;
+        }
+      }
+      if (is_id) {
+        add_label(token, id);
+        if (!base_name.empty()) {
+          base_name += ".";
+        }
+        base_name     += token;
+        name_view      = next;
+        token_handled  = true;
+      }
+    }
+    // Results (hit, miss)
+    else if (contains(results, sizeof(results) / sizeof(results[0]), token)) {
+      // 'hit' and 'miss' are almost always labels.
+      if (token == "hit" || token == "miss" || !name_view.empty()) {
+        add_label("result", token);
+        token_handled = true;
+      }
+    }
+    // Buckets (e.g., 10ms)
+    else {
+      constexpr std::string_view units[] = {"ms", "us", "s"};
+      for (const auto &unit : units) {
+        size_t unit_len = unit.length();
+        if (token.length() > unit_len && token.substr(token.length() - 
unit_len) == unit) {
+          bool all_digits = true;
+          for (size_t j = 0; j < token.length() - unit_len; ++j) {
+            if (!(token[j] >= '0' && token[j] <= '9')) {
+              all_digits = false;
+              break;
+            }
+          }
+          if (all_digits && token.length() > unit_len) {
+            add_label("le", token);
+            token_handled = true;
+            break;
+          }
+        }
+      }
+    }
+
+    if (!token_handled) {
+      if (!base_name.empty()) {
+        base_name += ".";
+      }
+      base_name += token;
+    }
+  }
+
+  return {base_name, labels};
+}
+
+static void
+prometheus_v2_out_stat(TSRecordType /* rec_type ATS_UNUSED */, void *edata, 
int /* registered ATS_UNUSED */, const char *name,
+                       TSRecordDataType data_type, TSRecordData *datum)
+{
+  stats_state *my_state = static_cast<stats_state *>(edata);
+
+  if (data_type == TS_RECORDDATATYPE_STRING) {
+    return; // Prometheus does not support string values.
+  }
+
+  auto        v2             = parse_metric_v2(name);
+  std::string sanitized_name = sanitize_metric_name_for_prometheus(v2.name);
+
+  if (sanitized_name.empty()) {
+    return;
+  }
+
+  // Only emit HELP and TYPE once per base name.
+  auto it = my_state->prometheus_v2_emitted.find(sanitized_name);
+  if (it == my_state->prometheus_v2_emitted.end()) {
+    APPEND("# HELP ");
+    APPEND(sanitized_name.c_str());
+    APPEND(" ");
+    APPEND(name);
+    APPEND("\n");
+
+    const char *type_str = (data_type == TS_RECORDDATATYPE_COUNTER) ? 
"counter" : "gauge";
+    APPEND("# TYPE ");
+    APPEND(sanitized_name.c_str());
+    APPEND(" ");
+    APPEND(type_str);
+    APPEND("\n");
+
+    my_state->prometheus_v2_emitted[sanitized_name] = data_type;
+  }

Review Comment:
   The prometheus_v2_emitted map stores TSRecordDataType (an int) but only 
checks for existence. If metrics with the same base name but different data 
types are encountered after the first one, they will use the TYPE line from the 
first metric encountered. This could result in incorrect TYPE metadata (e.g., 
marking an INT as a counter when it should be a gauge). Consider validating 
that all metrics with the same base name have consistent types, or at least 
documenting this behavior.



##########
plugins/stats_over_http/stats_over_http.cc:
##########
@@ -39,6 +39,10 @@
 #include <string_view>
 #include <sys/stat.h>
 #include <ts/ts.h>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+#include <mutex>

Review Comment:
   The mutex header is included but never used in the code. Consider removing 
this unused include to keep dependencies clean.
   ```suggestion
   
   ```



##########
plugins/stats_over_http/stats_over_http.cc:
##########
@@ -509,32 +564,265 @@ static
   return sanitized_name;
 }
 
+/** Parse a Prometheus v2 metric name and return the base name and labels.
+ *
+ * @param[in] name The metric name to parse.
+ * @return A prometheus_v2_metric struct containing the base name and labels.
+ */
+static
+// Remove this check when we drop support for pre-13 GCC versions.
+#if defined(__cpp_lib_constexpr_string) && __cpp_lib_constexpr_string >= 
201907L
+// Clang <= 16 doesn't fully support constexpr std::string.
+#if !defined(__clang__) || __clang_major__ > 16
+  constexpr
+#endif
+#endif
+  prometheus_v2_metric
+  parse_metric_v2(std::string_view name)
+{
+  swoc::TextView name_view{name};
+  std::string    labels;
+  std::string    base_name;
+
+  auto escape_label = [](std::string_view val) {
+    std::string escaped;
+    escaped.reserve(val.length());
+    for (char c : val) {
+      if (c == '"' || c == '\\') {
+        escaped += '\\';
+        escaped += c;
+      } else if (c == '\n') {
+        escaped += "\\n";
+      } else {
+        escaped += c;
+      }
+    }
+    return escaped;
+  };

Review Comment:
   The escape_label function uses repeated string concatenation within a loop, 
which can be inefficient due to repeated memory allocations. Since you already 
reserve space based on val.length(), consider using a more efficient approach 
or pre-calculating the required size accounting for escaped characters.



##########
plugins/stats_over_http/stats_over_http.cc:
##########
@@ -92,6 +96,28 @@ const int BROTLI_LGW               = 16;
 static bool integer_counters = false;
 static bool wrap_counters    = false;
 
+struct prometheus_v2_metric {
+  std::string name;
+  std::string labels;
+
+  bool
+  operator==(const prometheus_v2_metric &other) const
+  {
+    return name == other.name && labels == other.labels;
+  }
+};
+
+namespace std
+{
+template <> struct hash<prometheus_v2_metric> {
+  size_t
+  operator()(const prometheus_v2_metric &m) const
+  {
+    return hash<string>()(m.name) ^ hash<string>()(m.labels);

Review Comment:
   The hash function uses XOR to combine hashes of name and labels, which can 
produce identical hash values when name and labels are swapped or when they 
have the same hash. Consider using a more robust hash combination like 
std::hash with tuple or boost::hash_combine pattern to avoid unnecessary hash 
collisions.
   ```suggestion
       size_t h1 = hash<string>()(m.name);
       size_t h2 = hash<string>()(m.labels);
       // Combine hashes in a non-commutative way to reduce collisions.
       h1 ^= h2 + 0x9e3779b9 + (h1 << 6) + (h1 >> 2);
       return h1;
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to