weiqingy commented on code in PR #609: URL: https://github.com/apache/flink-agents/pull/609#discussion_r3162076092
########## runtime/src/main/java/org/apache/flink/agents/runtime/eventlog/JsonTruncator.java: ########## @@ -0,0 +1,269 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.agents.runtime.eventlog; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.JsonNodeFactory; +import com.fasterxml.jackson.databind.node.ObjectNode; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * Truncates a Jackson {@link JsonNode} tree per configurable thresholds. + * + * <p>Three truncation strategies are applied in a single recursive pass: + * + * <ul> + * <li><b>String truncation</b>: Strings longer than {@code maxStringLength} are replaced with a + * wrapper: {@code {"truncatedString": "first N chars...", "omittedChars": M}} + * <li><b>Array truncation</b>: Arrays larger than {@code maxArrayElements} are replaced with a + * wrapper: {@code {"truncatedList": [first N elements], "omittedElements": M}} + * <li><b>Depth truncation</b>: At max depth, object nodes retain only scalar fields; nested + * objects/arrays are dropped: {@code {"truncatedObject": {scalars...}, "omittedFields": N}} + * </ul> + * + * <p>Setting any threshold to {@code 0} disables that specific truncation strategy. If all + * thresholds are {@code 0}, no truncation occurs. + * + * <p>Protected fields at the top level of the event node ({@code eventType}, {@code id}, {@code + * attributes}) are never truncated. + */ +public class JsonTruncator { + + private static final Set<String> PROTECTED_FIELDS = + new HashSet<>(Arrays.asList("eventType", "id", "attributes")); + + private final int maxStringLength; + private final int maxArrayElements; + private final int maxDepth; + + /** + * Creates a new truncator with the given thresholds. + * + * @param maxStringLength maximum character length for string values; 0 to disable + * @param maxArrayElements maximum number of array elements retained; 0 to disable + * @param maxDepth maximum object nesting depth; 0 to disable + */ + public JsonTruncator(int maxStringLength, int maxArrayElements, int maxDepth) { + this.maxStringLength = maxStringLength; + this.maxArrayElements = maxArrayElements; + this.maxDepth = maxDepth; + } + + /** + * Truncates the given event node in place according to configured thresholds. + * + * <p>Protected fields ({@code eventType}, {@code id}, {@code attributes}) at the top level of + * the event node are never truncated. + * + * @param eventNode the top-level event JSON object to truncate + * @return {@code true} if any field was truncated, {@code false} if the node was unchanged + */ + public boolean truncate(ObjectNode eventNode) { + if (eventNode == null) { + return false; + } + return truncateObject(eventNode, 1, true); + } + + /** + * Recursively truncates an object node. + * + * @param node the object node to process + * @param depth current depth (1 = top-level event node) + * @param isTopLevel whether this is the top-level event node (for protected field checks) + * @return true if any truncation occurred + */ + private boolean truncateObject(ObjectNode node, int depth, boolean isTopLevel) { + boolean truncated = false; + + // At max depth, collapse the entire object to retain only scalars + if (maxDepth > 0 && depth >= maxDepth) { + return collapseAtMaxDepth(node, isTopLevel); + } + + List<String> fieldNames = new ArrayList<>(); + node.fieldNames().forEachRemaining(fieldNames::add); + + for (String fieldName : fieldNames) { + if (isTopLevel && PROTECTED_FIELDS.contains(fieldName)) { + continue; + } + + JsonNode child = node.get(fieldName); + if (child == null) { + continue; + } + + if (child.isTextual()) { + JsonNode replacement = truncateString(child.textValue()); + if (replacement != null) { + node.set(fieldName, replacement); + truncated = true; + } + } else if (child.isArray()) { + // First recurse into retained elements, then truncate the array if needed + truncated |= truncateArrayContents((ArrayNode) child, depth + 1); + JsonNode replacement = truncateArray((ArrayNode) child); Review Comment: Agreed — applied in `4356979`. Both call sites now truncate the array first and only recurse into the elements that survive: the `truncateObject` array branch and the symmetric path in `truncateArrayContents` for nested arrays. Added `testRecursionSkipsDroppedTailElements`, which holds references to the dropped tail elements and asserts their content remains unmutated, so the invariant is locked in against future regressions. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
