weiqingy commented on code in PR #609:
URL: https://github.com/apache/flink-agents/pull/609#discussion_r3162076092


##########
runtime/src/main/java/org/apache/flink/agents/runtime/eventlog/JsonTruncator.java:
##########
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.agents.runtime.eventlog;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.JsonNodeFactory;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Truncates a Jackson {@link JsonNode} tree per configurable thresholds.
+ *
+ * <p>Three truncation strategies are applied in a single recursive pass:
+ *
+ * <ul>
+ *   <li><b>String truncation</b>: Strings longer than {@code maxStringLength} 
are replaced with a
+ *       wrapper: {@code {"truncatedString": "first N chars...", 
"omittedChars": M}}
+ *   <li><b>Array truncation</b>: Arrays larger than {@code maxArrayElements} 
are replaced with a
+ *       wrapper: {@code {"truncatedList": [first N elements], 
"omittedElements": M}}
+ *   <li><b>Depth truncation</b>: At max depth, object nodes retain only 
scalar fields; nested
+ *       objects/arrays are dropped: {@code {"truncatedObject": {scalars...}, 
"omittedFields": N}}
+ * </ul>
+ *
+ * <p>Setting any threshold to {@code 0} disables that specific truncation 
strategy. If all
+ * thresholds are {@code 0}, no truncation occurs.
+ *
+ * <p>Protected fields at the top level of the event node ({@code eventType}, 
{@code id}, {@code
+ * attributes}) are never truncated.
+ */
+public class JsonTruncator {
+
+    private static final Set<String> PROTECTED_FIELDS =
+            new HashSet<>(Arrays.asList("eventType", "id", "attributes"));
+
+    private final int maxStringLength;
+    private final int maxArrayElements;
+    private final int maxDepth;
+
+    /**
+     * Creates a new truncator with the given thresholds.
+     *
+     * @param maxStringLength maximum character length for string values; 0 to 
disable
+     * @param maxArrayElements maximum number of array elements retained; 0 to 
disable
+     * @param maxDepth maximum object nesting depth; 0 to disable
+     */
+    public JsonTruncator(int maxStringLength, int maxArrayElements, int 
maxDepth) {
+        this.maxStringLength = maxStringLength;
+        this.maxArrayElements = maxArrayElements;
+        this.maxDepth = maxDepth;
+    }
+
+    /**
+     * Truncates the given event node in place according to configured 
thresholds.
+     *
+     * <p>Protected fields ({@code eventType}, {@code id}, {@code attributes}) 
at the top level of
+     * the event node are never truncated.
+     *
+     * @param eventNode the top-level event JSON object to truncate
+     * @return {@code true} if any field was truncated, {@code false} if the 
node was unchanged
+     */
+    public boolean truncate(ObjectNode eventNode) {
+        if (eventNode == null) {
+            return false;
+        }
+        return truncateObject(eventNode, 1, true);
+    }
+
+    /**
+     * Recursively truncates an object node.
+     *
+     * @param node the object node to process
+     * @param depth current depth (1 = top-level event node)
+     * @param isTopLevel whether this is the top-level event node (for 
protected field checks)
+     * @return true if any truncation occurred
+     */
+    private boolean truncateObject(ObjectNode node, int depth, boolean 
isTopLevel) {
+        boolean truncated = false;
+
+        // At max depth, collapse the entire object to retain only scalars
+        if (maxDepth > 0 && depth >= maxDepth) {
+            return collapseAtMaxDepth(node, isTopLevel);
+        }
+
+        List<String> fieldNames = new ArrayList<>();
+        node.fieldNames().forEachRemaining(fieldNames::add);
+
+        for (String fieldName : fieldNames) {
+            if (isTopLevel && PROTECTED_FIELDS.contains(fieldName)) {
+                continue;
+            }
+
+            JsonNode child = node.get(fieldName);
+            if (child == null) {
+                continue;
+            }
+
+            if (child.isTextual()) {
+                JsonNode replacement = truncateString(child.textValue());
+                if (replacement != null) {
+                    node.set(fieldName, replacement);
+                    truncated = true;
+                }
+            } else if (child.isArray()) {
+                // First recurse into retained elements, then truncate the 
array if needed
+                truncated |= truncateArrayContents((ArrayNode) child, depth + 
1);
+                JsonNode replacement = truncateArray((ArrayNode) child);

Review Comment:
   Good call. In `c4596bc` `EventLogRecord` is reduced to just `(context, 
event)`; `logLevel`, `eventType`, `truncator`, and `truncatedEventsCounter` are 
all gone from the record.
   
   Truncation orchestration and the counter live in `FileEventLogger.append()` 
now, and the `logLevel` field in the on-disk JSON is emitted by the logger at 
write time rather than carried on the record.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to