This is an automated email from the ASF dual-hosted git repository.

xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new e16b2d9c5a1 Perf Optimize JSON message decoding by parsing directly to 
Map, avoiding intermediate JsonNode (#17485)
e16b2d9c5a1 is described below

commit e16b2d9c5a12da0a1c91abdac6f58001e2574a43
Author: Arunkumar Saravanan <[email protected]>
AuthorDate: Sun Jan 11 07:30:12 2026 +0530

    Perf Optimize JSON message decoding by parsing directly to Map, avoiding 
intermediate JsonNode (#17485)
    
    * Perf Improvement in JSON Decode - Ingestion Performance
    
    * Build issue fix
    
    * Fix API issue
---
 AGENTS.md                                          |  20 +
 pinot-perf/pom.xml                                 |   4 +
 .../apache/pinot/perf/BenchmarkJsonParsing.java    | 282 ++++++++++++
 .../inputformat/clplog/CLPLogMessageDecoder.java   |   6 +-
 .../inputformat/json/JSONMessageDecoder.java       |   6 +-
 .../java/org/apache/pinot/spi/utils/JsonUtils.java |  14 +
 .../org/apache/pinot/spi/utils/JsonUtilsTest.java  | 496 +++++++++++++++++++++
 7 files changed, 822 insertions(+), 6 deletions(-)

diff --git a/AGENTS.md b/AGENTS.md
index d534135dc9e..9c59eacb726 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,3 +1,23 @@
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+-->
 # Apache Pinot - AGENTS Guide
 
 This file provides quick, practical guidance for coding agents working in this
diff --git a/pinot-perf/pom.xml b/pinot-perf/pom.xml
index 59c24b98068..a1295123e05 100644
--- a/pinot-perf/pom.xml
+++ b/pinot-perf/pom.xml
@@ -206,6 +206,10 @@
               
<mainClass>org.apache.pinot.perf.aggregation.SumIntAggregationFunctionBenchmark</mainClass>
               <name>pinot-SumIntAggregationFunctionBenchmark</name>
             </program>
+            <program>
+              <mainClass>org.apache.pinot.perf.BenchmarkJsonParsing</mainClass>
+              <name>pinot-BenchmarkJsonParsing</name>
+            </program>
           </programs>
           <repositoryLayout>flat</repositoryLayout>
           <repositoryName>lib</repositoryName>
diff --git 
a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkJsonParsing.java 
b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkJsonParsing.java
new file mode 100644
index 00000000000..dccc089f906
--- /dev/null
+++ b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkJsonParsing.java
@@ -0,0 +1,282 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.perf;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import org.apache.pinot.spi.utils.JsonUtils;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.ChainedOptionsBuilder;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+
+/**
+ * Benchmark to compare JSON parsing performance:
+ * - Old approach: bytesToJsonNode() + jsonNodeToMap() (two-step parsing)
+ * - New approach: bytesToMap() (single-step parsing)
+ *
+ * This benchmark simulates the JSON decoding path in 
RealtimeSegmentDataManager.consumeLoop()
+ * where JSONMessageDecoder.decode() is called for each message from the 
stream.
+ *
+ * Run with: mvn exec:exec -Dexec.executable="java" -Dexec.args="-cp 
%classpath org.apache.pinot.perf
+ * .BenchmarkJsonParsing"
+ * Or compile and run: java -jar pinot-perf/target/pinot-perf-*-shaded.jar 
BenchmarkJsonParsing
+ */
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.SECONDS)
+@Fork(1)
+@Warmup(iterations = 3, time = 2)
+@Measurement(iterations = 5, time = 3)
+@State(Scope.Benchmark)
+public class BenchmarkJsonParsing {
+
+  public static void main(String[] args)
+      throws Exception {
+    ChainedOptionsBuilder opt = new OptionsBuilder()
+        .include(BenchmarkJsonParsing.class.getSimpleName())
+        .shouldDoGC(true);
+    new Runner(opt.build()).run();
+  }
+
+  // Simulates different JSON payload sizes
+  @Param({"small", "medium", "large", "nested"})
+  private String _payloadType;
+
+  // Number of JSON messages to pre-generate for the benchmark
+  private static final int NUM_MESSAGES = 1000;
+
+  private List<byte[]> _jsonPayloads;
+  private int _currentIndex = 0;
+
+  @Setup(Level.Trial)
+  public void setUp() {
+    _jsonPayloads = new ArrayList<>(NUM_MESSAGES);
+    Random random = new Random(42);
+
+    for (int i = 0; i < NUM_MESSAGES; i++) {
+      String json = generateJsonPayload(_payloadType, random, i);
+      _jsonPayloads.add(json.getBytes(StandardCharsets.UTF_8));
+    }
+  }
+
+  /**
+   * Generates different types of JSON payloads to simulate real-world 
streaming data.
+   */
+  private String generateJsonPayload(String type, Random random, int index) {
+    switch (type) {
+      case "small":
+        // Small payload: ~100 bytes, typical for simple events
+        return String.format(
+            "{\"id\":%d,\"timestamp\":%d,\"value\":%.2f,\"status\":\"%s\"}",
+            index,
+            System.currentTimeMillis() + random.nextInt(10000),
+            random.nextDouble() * 1000,
+            random.nextBoolean() ? "active" : "inactive"
+        );
+
+      case "medium":
+        // Medium payload: ~500 bytes, typical for user events
+        return String.format(
+            "{\"eventId\":\"%s-%d\",\"userId\":\"%s\",\"sessionId\":\"%s\","
+                + "\"timestamp\":%d,\"eventType\":\"%s\",\"properties\":{"
+                + 
"\"page\":\"/products/%d\",\"referrer\":\"https://example.com/search?q=%s\",";
+                + "\"duration\":%d,\"scrollDepth\":%.2f,\"clicks\":%d},"
+                + "\"userAgent\":\"Mozilla/5.0 (Macintosh; Intel Mac OS X 
10_15_7) AppleWebKit/537.36\","
+                + 
"\"ipAddress\":\"192.168.%d.%d\",\"country\":\"US\",\"city\":\"San 
Francisco\"}",
+            "evt", index,
+            "user-" + random.nextInt(100000),
+            "sess-" + random.nextInt(1000000),
+            System.currentTimeMillis(),
+            randomEventType(random),
+            random.nextInt(10000),
+            randomSearchQuery(random),
+            random.nextInt(300),
+            random.nextDouble() * 100,
+            random.nextInt(20),
+            random.nextInt(256),
+            random.nextInt(256)
+        );
+
+      case "large":
+        // Large payload: ~2KB, typical for detailed analytics events
+        StringBuilder sb = new StringBuilder();
+        
sb.append("{\"eventId\":\"").append("evt-").append(index).append("\",");
+        
sb.append("\"timestamp\":").append(System.currentTimeMillis()).append(",");
+        sb.append("\"data\":{");
+
+        // Add 20 fields to make it large
+        for (int j = 0; j < 20; j++) {
+          if (j > 0) {
+            sb.append(",");
+          }
+          sb.append("\"field").append(j).append("\":\"")
+              .append(randomString(random, 50 + 
random.nextInt(50))).append("\"");
+        }
+        sb.append("},\"metrics\":{");
+
+        // Add 10 numeric metrics
+        for (int j = 0; j < 10; j++) {
+          if (j > 0) {
+            sb.append(",");
+          }
+          
sb.append("\"metric").append(j).append("\":").append(random.nextDouble() * 
1000);
+        }
+        sb.append("},\"tags\":[");
+
+        // Add 5 tags
+        for (int j = 0; j < 5; j++) {
+          if (j > 0) {
+            sb.append(",");
+          }
+          sb.append("\"tag-").append(random.nextInt(100)).append("\"");
+        }
+        sb.append("]}");
+        return sb.toString();
+
+      case "nested":
+        // Nested payload: ~1KB with nested objects and arrays
+        return String.format(
+            
"{\"order\":{\"id\":%d,\"customer\":{\"id\":\"%s\",\"name\":\"%s\",\"email\":\"%[email protected]\"},"
+                + 
"\"items\":[{\"sku\":\"SKU-%d\",\"name\":\"%s\",\"price\":%.2f,\"qty\":%d},"
+                + 
"{\"sku\":\"SKU-%d\",\"name\":\"%s\",\"price\":%.2f,\"qty\":%d},"
+                + 
"{\"sku\":\"SKU-%d\",\"name\":\"%s\",\"price\":%.2f,\"qty\":%d}],"
+                + 
"\"shipping\":{\"method\":\"%s\",\"address\":{\"street\":\"%s\",\"city\":\"%s\","
+                + 
"\"state\":\"%s\",\"zip\":\"%05d\"}},\"total\":%.2f,\"currency\":\"USD\"}}",
+            index,
+            "cust-" + random.nextInt(10000),
+            randomName(random),
+            "user" + random.nextInt(10000),
+            random.nextInt(10000), randomProduct(random), random.nextDouble() 
* 100, random.nextInt(5) + 1,
+            random.nextInt(10000), randomProduct(random), random.nextDouble() 
* 100, random.nextInt(5) + 1,
+            random.nextInt(10000), randomProduct(random), random.nextDouble() 
* 100, random.nextInt(5) + 1,
+            random.nextBoolean() ? "express" : "standard",
+            random.nextInt(9999) + " Main St",
+            randomCity(random),
+            randomState(random),
+            random.nextInt(99999),
+            random.nextDouble() * 500 + 50
+        );
+
+      default:
+        return "{\"id\":" + index + "}";
+    }
+  }
+
+  private byte[] getNextPayload() {
+    byte[] payload = _jsonPayloads.get(_currentIndex);
+    _currentIndex = (_currentIndex + 1) % NUM_MESSAGES;
+    return payload;
+  }
+
+  /**
+   * OLD APPROACH: Two-step parsing (bytesToJsonNode + jsonNodeToMap)
+   * This was the original implementation that caused high CPU usage.
+   */
+  @Benchmark
+  public Map<String, Object> oldApproachTwoStepParsing(Blackhole blackhole)
+      throws IOException {
+    byte[] payload = getNextPayload();
+
+    // Step 1: Parse bytes to JsonNode (intermediate tree representation)
+    JsonNode jsonNode = JsonUtils.bytesToJsonNode(payload, 0, payload.length);
+
+    // Step 2: Convert JsonNode to Map
+    Map<String, Object> result = JsonUtils.jsonNodeToMap(jsonNode);
+
+    blackhole.consume(jsonNode);
+    return result;
+  }
+
+  /**
+   * NEW APPROACH: Single-step parsing (bytesToMap)
+   * This is the optimized implementation that parses directly to Map.
+   */
+  @Benchmark
+  public Map<String, Object> newApproachDirectParsing(Blackhole blackhole)
+      throws IOException {
+    byte[] payload = getNextPayload();
+
+    // Direct parsing to Map, skipping the intermediate JsonNode
+    Map<String, Object> result = JsonUtils.bytesToMap(payload, 0, 
payload.length);
+
+    return result;
+  }
+
+  // Helper methods for generating realistic test data
+
+  private static String randomEventType(Random random) {
+    String[] types = {"pageview", "click", "scroll", "purchase", "signup", 
"login", "search"};
+    return types[random.nextInt(types.length)];
+  }
+
+  private static String randomSearchQuery(Random random) {
+    String[] queries = {"laptop", "phone", "headphones", "camera", "tablet", 
"watch", "speaker"};
+    return queries[random.nextInt(queries.length)];
+  }
+
+  private static String randomString(Random random, int length) {
+    StringBuilder sb = new StringBuilder(length);
+    String chars = 
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+    for (int i = 0; i < length; i++) {
+      sb.append(chars.charAt(random.nextInt(chars.length())));
+    }
+    return sb.toString();
+  }
+
+  private static String randomName(Random random) {
+    String[] firstNames = {"John", "Jane", "Bob", "Alice", "Charlie", "Diana", 
"Eve", "Frank"};
+    String[] lastNames = {"Smith", "Johnson", "Williams", "Brown", "Jones", 
"Garcia", "Miller"};
+    return firstNames[random.nextInt(firstNames.length)] + " " + 
lastNames[random.nextInt(lastNames.length)];
+  }
+
+  private static String randomProduct(Random random) {
+    String[] products = {"Widget", "Gadget", "Device", "Tool", "Component", 
"Module", "Unit"};
+    String[] adjectives = {"Premium", "Standard", "Basic", "Pro", "Elite", 
"Ultra"};
+    return adjectives[random.nextInt(adjectives.length)] + " " + 
products[random.nextInt(products.length)];
+  }
+
+  private static String randomCity(Random random) {
+    String[] cities = {"New York", "Los Angeles", "Chicago", "Houston", 
"Phoenix", "San Francisco", "Seattle"};
+    return cities[random.nextInt(cities.length)];
+  }
+
+  private static String randomState(Random random) {
+    String[] states = {"NY", "CA", "IL", "TX", "AZ", "WA", "OR", "NV", "CO", 
"FL"};
+    return states[random.nextInt(states.length)];
+  }
+}
diff --git 
a/pinot-plugins/pinot-input-format/pinot-clp-log/src/main/java/org/apache/pinot/plugin/inputformat/clplog/CLPLogMessageDecoder.java
 
b/pinot-plugins/pinot-input-format/pinot-clp-log/src/main/java/org/apache/pinot/plugin/inputformat/clplog/CLPLogMessageDecoder.java
index 07677dbd62c..dd00b08ef25 100644
--- 
a/pinot-plugins/pinot-input-format/pinot-clp-log/src/main/java/org/apache/pinot/plugin/inputformat/clplog/CLPLogMessageDecoder.java
+++ 
b/pinot-plugins/pinot-input-format/pinot-clp-log/src/main/java/org/apache/pinot/plugin/inputformat/clplog/CLPLogMessageDecoder.java
@@ -18,7 +18,6 @@
  */
 package org.apache.pinot.plugin.inputformat.clplog;
 
-import com.fasterxml.jackson.databind.JsonNode;
 import java.util.Map;
 import java.util.Set;
 import javax.annotation.Nullable;
@@ -99,8 +98,9 @@ public class CLPLogMessageDecoder implements 
StreamMessageDecoder<byte[]> {
   @Override
   public GenericRow decode(byte[] payload, int offset, int length, GenericRow 
destination) {
     try {
-      JsonNode jsonNode = JsonUtils.bytesToJsonNode(payload, offset, length);
-      return _recordExtractor.extract(JsonUtils.jsonNodeToMap(jsonNode), 
destination);
+      // Parse directly to Map, avoiding intermediate JsonNode representation 
for better performance
+      Map<String, Object> jsonMap = JsonUtils.bytesToMap(payload, offset, 
length);
+      return _recordExtractor.extract(jsonMap, destination);
     } catch (Exception e) {
       if (_errorSamplingPeriod != 0) {
         _numErrorsUntilNextPrint--;
diff --git 
a/pinot-plugins/pinot-input-format/pinot-json/src/main/java/org/apache/pinot/plugin/inputformat/json/JSONMessageDecoder.java
 
b/pinot-plugins/pinot-input-format/pinot-json/src/main/java/org/apache/pinot/plugin/inputformat/json/JSONMessageDecoder.java
index 52b7ff01c7d..08a1122e6c4 100644
--- 
a/pinot-plugins/pinot-input-format/pinot-json/src/main/java/org/apache/pinot/plugin/inputformat/json/JSONMessageDecoder.java
+++ 
b/pinot-plugins/pinot-input-format/pinot-json/src/main/java/org/apache/pinot/plugin/inputformat/json/JSONMessageDecoder.java
@@ -18,7 +18,6 @@
  */
 package org.apache.pinot.plugin.inputformat.json;
 
-import com.fasterxml.jackson.databind.JsonNode;
 import java.util.Map;
 import java.util.Set;
 import org.apache.pinot.spi.data.readers.GenericRow;
@@ -59,8 +58,9 @@ public class JSONMessageDecoder implements 
StreamMessageDecoder<byte[]> {
   @Override
   public GenericRow decode(byte[] payload, int offset, int length, GenericRow 
destination) {
     try {
-      JsonNode message = JsonUtils.bytesToJsonNode(payload, offset, length);
-      return _jsonRecordExtractor.extract(JsonUtils.jsonNodeToMap(message), 
destination);
+      // Parse directly to Map, avoiding intermediate JsonNode representation 
for better performance
+      Map<String, Object> jsonMap = JsonUtils.bytesToMap(payload, offset, 
length);
+      return _jsonRecordExtractor.extract(jsonMap, destination);
     } catch (Exception e) {
       throw new RuntimeException(
           "Caught exception while decoding JSON record with payload: " + new 
String(payload, offset, length), e);
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java 
b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java
index 46d5607406c..16bc44c29df 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java
@@ -247,6 +247,20 @@ public class JsonUtils {
     return DEFAULT_READER.forType(MAP_TYPE_REFERENCE).readValue(jsonNode);
   }
 
+  /**
+   * Parses JSON bytes directly to a Map, avoiding the intermediate JsonNode 
representation.
+   * This is more efficient than calling bytesToJsonNode followed by 
jsonNodeToMap.
+   */
+  public static Map<String, Object> bytesToMap(byte[] jsonBytes)
+      throws IOException {
+    return DEFAULT_READER.forType(MAP_TYPE_REFERENCE).readValue(jsonBytes);
+  }
+
+  public static Map<String, Object> bytesToMap(byte[] jsonBytes, int offset, 
int length)
+      throws IOException {
+    return DEFAULT_READER.forType(MAP_TYPE_REFERENCE).readValue(jsonBytes, 
offset, length);
+  }
+
   public static String objectToString(Object object)
       throws JsonProcessingException {
     return DEFAULT_WRITER.writeValueAsString(object);
diff --git 
a/pinot-spi/src/test/java/org/apache/pinot/spi/utils/JsonUtilsTest.java 
b/pinot-spi/src/test/java/org/apache/pinot/spi/utils/JsonUtilsTest.java
index bc975adff9d..f06c19675f2 100644
--- a/pinot-spi/src/test/java/org/apache/pinot/spi/utils/JsonUtilsTest.java
+++ b/pinot-spi/src/test/java/org/apache/pinot/spi/utils/JsonUtilsTest.java
@@ -22,7 +22,9 @@ import com.fasterxml.jackson.databind.JsonNode;
 import com.google.common.collect.ImmutableSet;
 import java.io.File;
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
@@ -34,9 +36,11 @@ import 
org.apache.pinot.spi.config.table.ingestion.ComplexTypeConfig;
 import org.apache.pinot.spi.data.FieldSpec;
 import org.apache.pinot.spi.data.Schema;
 import org.testng.Assert;
+import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
 import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNull;
 import static org.testng.Assert.assertTrue;
 
 
@@ -763,4 +767,496 @@ public class JsonUtilsTest {
       assertEquals(flattenedRecord1.get(".addresses..street"), "second st");
     }
   }
+
+  // ==================== Tests for bytesToMap optimization 
====================
+  // These tests verify that bytesToMap produces identical results to the old
+  // two-step approach (bytesToJsonNode + jsonNodeToMap)
+
+  /**
+   * Data provider for various JSON payloads to test bytesToMap equivalence
+   */
+  @DataProvider(name = "jsonPayloads")
+  public Object[][] jsonPayloads() {
+    return new Object[][]{
+        // Empty and null cases
+        {"{}"},
+        {"{\"key\":null}"},
+
+        // Simple primitives
+        {"{\"string\":\"value\"}"},
+        {"{\"integer\":42}"},
+        {"{\"negative\":-123}"},
+        {"{\"float\":3.14159}"},
+        {"{\"negative_float\":-2.718}"},
+        {"{\"boolean_true\":true}"},
+        {"{\"boolean_false\":false}"},
+        {"{\"zero\":0}"},
+        {"{\"zero_float\":0.0}"},
+
+        // Large numbers
+        {"{\"large_int\":9223372036854775807}"},
+        {"{\"large_negative\":-9223372036854775808}"},
+        {"{\"scientific\":1.23e10}"},
+        {"{\"scientific_negative\":1.23e-10}"},
+
+        // String edge cases
+        {"{\"empty_string\":\"\"}"},
+        {"{\"whitespace\":\"   \"}"},
+        {"{\"with_quotes\":\"she said \\\"hello\\\"\"}"},
+        {"{\"with_backslash\":\"path\\\\to\\\\file\"}"},
+        {"{\"with_newline\":\"line1\\nline2\"}"},
+        {"{\"with_tab\":\"col1\\tcol2\"}"},
+        {"{\"with_unicode\":\"ζ—₯本θͺž\"}"},
+        {"{\"emoji\":\"Hello πŸ‘‹ World 🌍\"}"},
+        {"{\"special_chars\":\"<>&'\\\"\"}"},
+
+        // Arrays
+        {"{\"empty_array\":[]}"},
+        {"{\"int_array\":[1,2,3,4,5]}"},
+        {"{\"string_array\":[\"a\",\"b\",\"c\"]}"},
+        {"{\"mixed_array\":[1,\"two\",3.0,true,null]}"},
+        {"{\"nested_array\":[[1,2],[3,4],[5,6]]}"},
+        {"{\"array_of_objects\":[{\"a\":1},{\"b\":2}]}"},
+
+        // Nested objects
+        {"{\"nested\":{\"level1\":{\"level2\":{\"level3\":\"deep\"}}}}"},
+        
{"{\"person\":{\"name\":\"John\",\"age\":30,\"address\":{\"city\":\"NYC\",\"zip\":\"10001\"}}}"},
+
+        // Complex realistic payloads
+        {
+            
"{\"event\":{\"id\":123,\"type\":\"click\",\"timestamp\":1609459200000,"
+                + "\"user\":{\"id\":\"user123\",\"session\":\"sess456\"},"
+                + 
"\"data\":{\"page\":\"/products\",\"element\":\"button\",\"coordinates\":{\"x\":100,\"y\":200}}}}"
+        },
+
+        // Multiple fields with same type
+        
{"{\"a\":1,\"b\":2,\"c\":3,\"d\":4,\"e\":5,\"f\":6,\"g\":7,\"h\":8,\"i\":9,\"j\":10}"},
+
+        // Field names with special characters
+        {"{\"field-with-dash\":1}"},
+        {"{\"field.with.dots\":2}"},
+        {"{\"field_with_underscore\":3}"},
+        {"{\"123numeric_start\":4}"},
+
+        // Boolean and null combinations
+        {"{\"flags\":{\"active\":true,\"deleted\":false,\"pending\":null}}"},
+
+        // Array with nulls
+        {"{\"array_with_nulls\":[1,null,3,null,5]}"},
+
+        // Unicode field names
+        {"{\"ζ—₯本θͺžγ‚­γƒΌ\":\"value\"}"},
+
+        // Very long string value
+        {"{\"long_string\":\"" + "a".repeat(1000) + "\"}"},
+
+        // Deeply nested structure
+        {"{\"l1\":{\"l2\":{\"l3\":{\"l4\":{\"l5\":{\"value\":\"deep\"}}}}}}"},
+
+        // Real-world like Kafka message
+        {
+            
"{\"topic\":\"events\",\"partition\":0,\"offset\":12345,\"timestamp\":1609459200000,"
+                + "\"key\":\"user123\",\"value\":{\"event_type\":\"purchase\","
+                + "\"items\":[{\"sku\":\"ABC123\",\"qty\":2,\"price\":29.99},"
+                + "{\"sku\":\"XYZ789\",\"qty\":1,\"price\":49.99}],"
+                + "\"total\":109.97,\"currency\":\"USD\"}}"
+        }
+    };
+  }
+
+  /**
+   * Test that bytesToMap produces identical results to bytesToJsonNode + 
jsonNodeToMap
+   * for all payload types
+   */
+  @Test(dataProvider = "jsonPayloads")
+  public void testBytesToMapEquivalence(String jsonString)
+      throws IOException {
+    byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+    // Old approach: bytesToJsonNode + jsonNodeToMap
+    JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes, 0, 
jsonBytes.length);
+    Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+
+    // New approach: bytesToMap directly
+    Map<String, Object> newResult = JsonUtils.bytesToMap(jsonBytes, 0, 
jsonBytes.length);
+
+    // Verify they are equal
+    assertEquals(newResult, oldResult,
+        "bytesToMap should produce identical result to bytesToJsonNode + 
jsonNodeToMap for: " + jsonString);
+  }
+
+  /**
+   * Test bytesToMap with offset and length parameters (partial array reading)
+   */
+  @Test
+  public void testBytesToMapWithOffset()
+      throws IOException {
+    String prefix = "GARBAGE";
+    String jsonString = "{\"key\":\"value\",\"num\":42}";
+    String suffix = "MORE_GARBAGE";
+    String combined = prefix + jsonString + suffix;
+
+    byte[] fullBytes = combined.getBytes(StandardCharsets.UTF_8);
+    int offset = prefix.length();
+    int length = jsonString.length();
+
+    // Old approach
+    JsonNode jsonNode = JsonUtils.bytesToJsonNode(fullBytes, offset, length);
+    Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+
+    // New approach
+    Map<String, Object> newResult = JsonUtils.bytesToMap(fullBytes, offset, 
length);
+
+    assertEquals(newResult, oldResult);
+    assertEquals(newResult.get("key"), "value");
+    assertEquals(newResult.get("num"), 42);
+  }
+
+  /**
+   * Test bytesToMap with offset where offset + length would exceed array 
bounds.
+   * This test ensures we're passing length (not offset+length) to Jackson API.
+   */
+  @Test
+  public void testBytesToMapWithOffsetEdgeCase()
+      throws IOException {
+    // Create a case where offset + length > array.length would fail if we 
incorrectly
+    // passed offset+length as the third parameter to Jackson's readValue
+    String prefix = "XX";  // 2 bytes
+    String jsonString = "{\"a\":1}";  // 7 bytes
+    // Total: 9 bytes
+    // offset=2, length=7
+    // If buggy (passing offset+length=9), would try to read 9 bytes from 
position 2,
+    // but only 7 bytes available from that position
+
+    String combined = prefix + jsonString;  // No suffix - exactly at the 
boundary
+    byte[] fullBytes = combined.getBytes(StandardCharsets.UTF_8);
+    int offset = prefix.length();
+    int length = jsonString.length();
+
+    // Verify array bounds: offset + length should equal array length exactly
+    assertEquals(fullBytes.length, offset + length,
+        "Test setup: offset + length should equal array length");
+
+    // This would fail with the bug: "Invalid 'offset' and/or 'len' arguments"
+    Map<String, Object> result = JsonUtils.bytesToMap(fullBytes, offset, 
length);
+    assertEquals(result.get("a"), 1);
+
+    // Verify equivalence with old approach
+    JsonNode jsonNode = JsonUtils.bytesToJsonNode(fullBytes, offset, length);
+    Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+    assertEquals(result, oldResult);
+  }
+
+  /**
+   * Test bytesToMap with various offset positions to ensure correct slicing.
+   */
+  @Test
+  public void testBytesToMapWithVariousOffsets()
+      throws IOException {
+    String json = "{\"x\":\"y\"}";
+    byte[] jsonBytes = json.getBytes(StandardCharsets.UTF_8);
+
+    // Test with offset=0 (should work like no offset)
+    Map<String, Object> result0 = JsonUtils.bytesToMap(jsonBytes, 0, 
jsonBytes.length);
+    assertEquals(result0.get("x"), "y");
+
+    // Test with padding on both sides at different positions
+    for (int prefixLen = 1; prefixLen <= 10; prefixLen++) {
+      String prefix = "P".repeat(prefixLen);
+      String suffix = "S".repeat(5);
+      String combined = prefix + json + suffix;
+      byte[] fullBytes = combined.getBytes(StandardCharsets.UTF_8);
+
+      Map<String, Object> result = JsonUtils.bytesToMap(fullBytes, prefixLen, 
json.length());
+      assertEquals(result.get("x"), "y",
+          "Failed with prefix length " + prefixLen);
+
+      // Verify equivalence
+      JsonNode node = JsonUtils.bytesToJsonNode(fullBytes, prefixLen, 
json.length());
+      assertEquals(result, JsonUtils.jsonNodeToMap(node));
+    }
+  }
+
+  /**
+   * Test bytesToMap without offset (full array)
+   */
+  @Test
+  public void testBytesToMapFullArray()
+      throws IOException {
+    String jsonString = "{\"name\":\"test\",\"values\":[1,2,3]}";
+    byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+    // Old approach using full array methods
+    JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes);
+    Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+
+    // New approach
+    Map<String, Object> newResult = JsonUtils.bytesToMap(jsonBytes);
+
+    assertEquals(newResult, oldResult);
+  }
+
+  /**
+   * Test that numeric types are preserved correctly
+   */
+  @Test
+  public void testBytesToMapNumericTypes()
+      throws IOException {
+    String jsonString = 
"{\"int\":42,\"long\":9223372036854775807,\"double\":3.14159,\"float\":1.5}";
+    byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+    Map<String, Object> result = JsonUtils.bytesToMap(jsonBytes);
+
+    // Verify the types and values
+    assertEquals(result.get("int"), 42);
+    assertEquals(result.get("long"), 9223372036854775807L);
+    assertEquals(result.get("double"), 3.14159);
+    assertEquals(result.get("float"), 1.5);
+
+    // Also verify equivalence with old approach
+    JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes);
+    Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+    assertEquals(result, oldResult);
+  }
+
+  /**
+   * Test nested object access after parsing
+   */
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testBytesToMapNestedAccess()
+      throws IOException {
+    String jsonString = 
"{\"outer\":{\"inner\":{\"value\":\"deep\"},\"array\":[1,2,3]}}";
+    byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+    Map<String, Object> result = JsonUtils.bytesToMap(jsonBytes);
+    Map<String, Object> outer = (Map<String, Object>) result.get("outer");
+    Map<String, Object> inner = (Map<String, Object>) outer.get("inner");
+    List<Object> array = (List<Object>) outer.get("array");
+
+    assertEquals(inner.get("value"), "deep");
+    assertEquals(array, Arrays.asList(1, 2, 3));
+
+    // Verify equivalence
+    JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes);
+    Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+    assertEquals(result, oldResult);
+  }
+
+  /**
+   * Test null handling in bytesToMap
+   */
+  @Test
+  public void testBytesToMapNullHandling()
+      throws IOException {
+    String jsonString = "{\"nullField\":null,\"nested\":{\"alsoNull\":null}}";
+    byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+    Map<String, Object> result = JsonUtils.bytesToMap(jsonBytes);
+
+    assertTrue(result.containsKey("nullField"));
+    assertNull(result.get("nullField"));
+
+    @SuppressWarnings("unchecked")
+    Map<String, Object> nested = (Map<String, Object>) result.get("nested");
+    assertTrue(nested.containsKey("alsoNull"));
+    assertNull(nested.get("alsoNull"));
+
+    // Verify equivalence
+    JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes);
+    Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+    assertEquals(result, oldResult);
+  }
+
+  /**
+   * Test boolean handling
+   */
+  @Test
+  public void testBytesToMapBooleanHandling()
+      throws IOException {
+    String jsonString = "{\"trueVal\":true,\"falseVal\":false}";
+    byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+    Map<String, Object> result = JsonUtils.bytesToMap(jsonBytes);
+
+    assertEquals(result.get("trueVal"), true);
+    assertEquals(result.get("falseVal"), false);
+
+    // Verify equivalence
+    JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes);
+    Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+    assertEquals(result, oldResult);
+  }
+
+  /**
+   * Test empty object and array
+   */
+  @Test
+  public void testBytesToMapEmptyStructures()
+      throws IOException {
+    String jsonString = "{\"emptyObj\":{},\"emptyArr\":[]}";
+    byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+    Map<String, Object> result = JsonUtils.bytesToMap(jsonBytes);
+
+    @SuppressWarnings("unchecked")
+    Map<String, Object> emptyObj = (Map<String, Object>) 
result.get("emptyObj");
+    assertTrue(emptyObj.isEmpty());
+
+    @SuppressWarnings("unchecked")
+    List<Object> emptyArr = (List<Object>) result.get("emptyArr");
+    assertTrue(emptyArr.isEmpty());
+
+    // Verify equivalence
+    JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes);
+    Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+    assertEquals(result, oldResult);
+  }
+
+  /**
+   * Test that the method correctly handles UTF-8 encoded bytes
+   */
+  @Test
+  public void testBytesToMapUtf8Encoding()
+      throws IOException {
+    // Test various UTF-8 characters
+    String jsonString = 
"{\"chinese\":\"δΈ­ζ–‡\",\"japanese\":\"ζ—₯本θͺž\",\"korean\":\"ν•œκ΅­μ–΄\","
+        + "\"emoji\":\"πŸŽ‰πŸŽŠπŸŽ\",\"mixed\":\"HelloδΈ–η•Œ\"}";
+    byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+    Map<String, Object> result = JsonUtils.bytesToMap(jsonBytes);
+
+    assertEquals(result.get("chinese"), "δΈ­ζ–‡");
+    assertEquals(result.get("japanese"), "ζ—₯本θͺž");
+    assertEquals(result.get("korean"), "ν•œκ΅­μ–΄");
+    assertEquals(result.get("emoji"), "πŸŽ‰πŸŽŠπŸŽ");
+    assertEquals(result.get("mixed"), "HelloδΈ–η•Œ");
+
+    // Verify equivalence
+    JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes);
+    Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+    assertEquals(result, oldResult);
+  }
+
+  /**
+   * Test escape sequences in strings
+   */
+  @Test
+  public void testBytesToMapEscapeSequences()
+      throws IOException {
+    String jsonString = 
"{\"quotes\":\"\\\"quoted\\\"\",\"backslash\":\"a\\\\b\","
+        + "\"newline\":\"line1\\nline2\",\"tab\":\"col1\\tcol2\","
+        + "\"unicode\":\"\\u0048\\u0065\\u006C\\u006C\\u006F\"}";
+    byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+    Map<String, Object> result = JsonUtils.bytesToMap(jsonBytes);
+
+    assertEquals(result.get("quotes"), "\"quoted\"");
+    assertEquals(result.get("backslash"), "a\\b");
+    assertEquals(result.get("newline"), "line1\nline2");
+    assertEquals(result.get("tab"), "col1\tcol2");
+    assertEquals(result.get("unicode"), "Hello");
+
+    // Verify equivalence
+    JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes);
+    Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+    assertEquals(result, oldResult);
+  }
+
+  /**
+   * Test mixed type arrays
+   */
+  @Test
+  public void testBytesToMapMixedArrays()
+      throws IOException {
+    String jsonString = 
"{\"mixed\":[1,\"two\",3.0,true,null,{\"nested\":\"obj\"},[1,2]]}";
+    byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+    Map<String, Object> result = JsonUtils.bytesToMap(jsonBytes);
+
+    @SuppressWarnings("unchecked")
+    List<Object> mixed = (List<Object>) result.get("mixed");
+    assertEquals(mixed.size(), 7);
+    assertEquals(mixed.get(0), 1);
+    assertEquals(mixed.get(1), "two");
+    assertEquals(mixed.get(2), 3.0);
+    assertEquals(mixed.get(3), true);
+    assertNull(mixed.get(4));
+
+    // Verify equivalence
+    JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes);
+    Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+    assertEquals(result, oldResult);
+  }
+
+  /**
+   * Test error handling - malformed JSON should throw IOException
+   */
+  @Test(expectedExceptions = IOException.class)
+  public void testBytesToMapMalformedJson()
+      throws IOException {
+    String malformedJson = "{\"key\":value}"; // missing quotes around value
+    byte[] jsonBytes = malformedJson.getBytes(StandardCharsets.UTF_8);
+    JsonUtils.bytesToMap(jsonBytes);
+  }
+
+  /**
+   * Test error handling - incomplete JSON
+   */
+  @Test(expectedExceptions = IOException.class)
+  public void testBytesToMapIncompleteJson()
+      throws IOException {
+    String incompleteJson = "{\"key\":\"value\""; // missing closing brace
+    byte[] jsonBytes = incompleteJson.getBytes(StandardCharsets.UTF_8);
+    JsonUtils.bytesToMap(jsonBytes);
+  }
+
+  /**
+   * Stress test with a large number of fields
+   */
+  @Test
+  public void testBytesToMapManyFields()
+      throws IOException {
+    StringBuilder sb = new StringBuilder("{");
+    for (int i = 0; i < 100; i++) {
+      if (i > 0) {
+        sb.append(",");
+      }
+      sb.append("\"field").append(i).append("\":").append(i);
+    }
+    sb.append("}");
+    String jsonString = sb.toString();
+    byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+    // Old approach
+    JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes);
+    Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+
+    // New approach
+    Map<String, Object> newResult = JsonUtils.bytesToMap(jsonBytes);
+
+    assertEquals(newResult, oldResult);
+    assertEquals(newResult.size(), 100);
+
+    for (int i = 0; i < 100; i++) {
+      assertEquals(newResult.get("field" + i), i);
+    }
+  }
+
+  /**
+   * Test with a deeply nested array structure
+   */
+  @Test
+  public void testBytesToMapDeeplyNestedArrays()
+      throws IOException {
+    String jsonString = "{\"data\":[[[[[\"deep\"]]]]]}";
+    byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+    Map<String, Object> result = JsonUtils.bytesToMap(jsonBytes);
+
+    // Verify equivalence
+    JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes);
+    Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+    assertEquals(result, oldResult);
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to