This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new e16b2d9c5a1 Perf Optimize JSON message decoding by parsing directly to
Map, avoiding intermediate JsonNode (#17485)
e16b2d9c5a1 is described below
commit e16b2d9c5a12da0a1c91abdac6f58001e2574a43
Author: Arunkumar Saravanan <[email protected]>
AuthorDate: Sun Jan 11 07:30:12 2026 +0530
Perf Optimize JSON message decoding by parsing directly to Map, avoiding
intermediate JsonNode (#17485)
* Perf Improvement in JSON Decode - Ingestion Performance
* Build issue fix
* Fix API issue
---
AGENTS.md | 20 +
pinot-perf/pom.xml | 4 +
.../apache/pinot/perf/BenchmarkJsonParsing.java | 282 ++++++++++++
.../inputformat/clplog/CLPLogMessageDecoder.java | 6 +-
.../inputformat/json/JSONMessageDecoder.java | 6 +-
.../java/org/apache/pinot/spi/utils/JsonUtils.java | 14 +
.../org/apache/pinot/spi/utils/JsonUtilsTest.java | 496 +++++++++++++++++++++
7 files changed, 822 insertions(+), 6 deletions(-)
diff --git a/AGENTS.md b/AGENTS.md
index d534135dc9e..9c59eacb726 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,3 +1,23 @@
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+
+-->
# Apache Pinot - AGENTS Guide
This file provides quick, practical guidance for coding agents working in this
diff --git a/pinot-perf/pom.xml b/pinot-perf/pom.xml
index 59c24b98068..a1295123e05 100644
--- a/pinot-perf/pom.xml
+++ b/pinot-perf/pom.xml
@@ -206,6 +206,10 @@
<mainClass>org.apache.pinot.perf.aggregation.SumIntAggregationFunctionBenchmark</mainClass>
<name>pinot-SumIntAggregationFunctionBenchmark</name>
</program>
+ <program>
+ <mainClass>org.apache.pinot.perf.BenchmarkJsonParsing</mainClass>
+ <name>pinot-BenchmarkJsonParsing</name>
+ </program>
</programs>
<repositoryLayout>flat</repositoryLayout>
<repositoryName>lib</repositoryName>
diff --git
a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkJsonParsing.java
b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkJsonParsing.java
new file mode 100644
index 00000000000..dccc089f906
--- /dev/null
+++ b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkJsonParsing.java
@@ -0,0 +1,282 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.perf;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import org.apache.pinot.spi.utils.JsonUtils;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.ChainedOptionsBuilder;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+
+
+/**
+ * Benchmark to compare JSON parsing performance:
+ * - Old approach: bytesToJsonNode() + jsonNodeToMap() (two-step parsing)
+ * - New approach: bytesToMap() (single-step parsing)
+ *
+ * This benchmark simulates the JSON decoding path in
RealtimeSegmentDataManager.consumeLoop()
+ * where JSONMessageDecoder.decode() is called for each message from the
stream.
+ *
+ * Run with: mvn exec:exec -Dexec.executable="java" -Dexec.args="-cp
%classpath org.apache.pinot.perf
+ * .BenchmarkJsonParsing"
+ * Or compile and run: java -jar pinot-perf/target/pinot-perf-*-shaded.jar
BenchmarkJsonParsing
+ */
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.SECONDS)
+@Fork(1)
+@Warmup(iterations = 3, time = 2)
+@Measurement(iterations = 5, time = 3)
+@State(Scope.Benchmark)
+public class BenchmarkJsonParsing {
+
+ public static void main(String[] args)
+ throws Exception {
+ ChainedOptionsBuilder opt = new OptionsBuilder()
+ .include(BenchmarkJsonParsing.class.getSimpleName())
+ .shouldDoGC(true);
+ new Runner(opt.build()).run();
+ }
+
+ // Simulates different JSON payload sizes
+ @Param({"small", "medium", "large", "nested"})
+ private String _payloadType;
+
+ // Number of JSON messages to pre-generate for the benchmark
+ private static final int NUM_MESSAGES = 1000;
+
+ private List<byte[]> _jsonPayloads;
+ private int _currentIndex = 0;
+
+ @Setup(Level.Trial)
+ public void setUp() {
+ _jsonPayloads = new ArrayList<>(NUM_MESSAGES);
+ Random random = new Random(42);
+
+ for (int i = 0; i < NUM_MESSAGES; i++) {
+ String json = generateJsonPayload(_payloadType, random, i);
+ _jsonPayloads.add(json.getBytes(StandardCharsets.UTF_8));
+ }
+ }
+
+ /**
+ * Generates different types of JSON payloads to simulate real-world
streaming data.
+ */
+ private String generateJsonPayload(String type, Random random, int index) {
+ switch (type) {
+ case "small":
+ // Small payload: ~100 bytes, typical for simple events
+ return String.format(
+ "{\"id\":%d,\"timestamp\":%d,\"value\":%.2f,\"status\":\"%s\"}",
+ index,
+ System.currentTimeMillis() + random.nextInt(10000),
+ random.nextDouble() * 1000,
+ random.nextBoolean() ? "active" : "inactive"
+ );
+
+ case "medium":
+ // Medium payload: ~500 bytes, typical for user events
+ return String.format(
+ "{\"eventId\":\"%s-%d\",\"userId\":\"%s\",\"sessionId\":\"%s\","
+ + "\"timestamp\":%d,\"eventType\":\"%s\",\"properties\":{"
+ +
"\"page\":\"/products/%d\",\"referrer\":\"https://example.com/search?q=%s\","
+ + "\"duration\":%d,\"scrollDepth\":%.2f,\"clicks\":%d},"
+ + "\"userAgent\":\"Mozilla/5.0 (Macintosh; Intel Mac OS X
10_15_7) AppleWebKit/537.36\","
+ +
"\"ipAddress\":\"192.168.%d.%d\",\"country\":\"US\",\"city\":\"San
Francisco\"}",
+ "evt", index,
+ "user-" + random.nextInt(100000),
+ "sess-" + random.nextInt(1000000),
+ System.currentTimeMillis(),
+ randomEventType(random),
+ random.nextInt(10000),
+ randomSearchQuery(random),
+ random.nextInt(300),
+ random.nextDouble() * 100,
+ random.nextInt(20),
+ random.nextInt(256),
+ random.nextInt(256)
+ );
+
+ case "large":
+ // Large payload: ~2KB, typical for detailed analytics events
+ StringBuilder sb = new StringBuilder();
+
sb.append("{\"eventId\":\"").append("evt-").append(index).append("\",");
+
sb.append("\"timestamp\":").append(System.currentTimeMillis()).append(",");
+ sb.append("\"data\":{");
+
+ // Add 20 fields to make it large
+ for (int j = 0; j < 20; j++) {
+ if (j > 0) {
+ sb.append(",");
+ }
+ sb.append("\"field").append(j).append("\":\"")
+ .append(randomString(random, 50 +
random.nextInt(50))).append("\"");
+ }
+ sb.append("},\"metrics\":{");
+
+ // Add 10 numeric metrics
+ for (int j = 0; j < 10; j++) {
+ if (j > 0) {
+ sb.append(",");
+ }
+
sb.append("\"metric").append(j).append("\":").append(random.nextDouble() *
1000);
+ }
+ sb.append("},\"tags\":[");
+
+ // Add 5 tags
+ for (int j = 0; j < 5; j++) {
+ if (j > 0) {
+ sb.append(",");
+ }
+ sb.append("\"tag-").append(random.nextInt(100)).append("\"");
+ }
+ sb.append("]}");
+ return sb.toString();
+
+ case "nested":
+ // Nested payload: ~1KB with nested objects and arrays
+ return String.format(
+
"{\"order\":{\"id\":%d,\"customer\":{\"id\":\"%s\",\"name\":\"%s\",\"email\":\"%[email protected]\"},"
+ +
"\"items\":[{\"sku\":\"SKU-%d\",\"name\":\"%s\",\"price\":%.2f,\"qty\":%d},"
+ +
"{\"sku\":\"SKU-%d\",\"name\":\"%s\",\"price\":%.2f,\"qty\":%d},"
+ +
"{\"sku\":\"SKU-%d\",\"name\":\"%s\",\"price\":%.2f,\"qty\":%d}],"
+ +
"\"shipping\":{\"method\":\"%s\",\"address\":{\"street\":\"%s\",\"city\":\"%s\","
+ +
"\"state\":\"%s\",\"zip\":\"%05d\"}},\"total\":%.2f,\"currency\":\"USD\"}}",
+ index,
+ "cust-" + random.nextInt(10000),
+ randomName(random),
+ "user" + random.nextInt(10000),
+ random.nextInt(10000), randomProduct(random), random.nextDouble()
* 100, random.nextInt(5) + 1,
+ random.nextInt(10000), randomProduct(random), random.nextDouble()
* 100, random.nextInt(5) + 1,
+ random.nextInt(10000), randomProduct(random), random.nextDouble()
* 100, random.nextInt(5) + 1,
+ random.nextBoolean() ? "express" : "standard",
+ random.nextInt(9999) + " Main St",
+ randomCity(random),
+ randomState(random),
+ random.nextInt(99999),
+ random.nextDouble() * 500 + 50
+ );
+
+ default:
+ return "{\"id\":" + index + "}";
+ }
+ }
+
+ private byte[] getNextPayload() {
+ byte[] payload = _jsonPayloads.get(_currentIndex);
+ _currentIndex = (_currentIndex + 1) % NUM_MESSAGES;
+ return payload;
+ }
+
+ /**
+ * OLD APPROACH: Two-step parsing (bytesToJsonNode + jsonNodeToMap)
+ * This was the original implementation that caused high CPU usage.
+ */
+ @Benchmark
+ public Map<String, Object> oldApproachTwoStepParsing(Blackhole blackhole)
+ throws IOException {
+ byte[] payload = getNextPayload();
+
+ // Step 1: Parse bytes to JsonNode (intermediate tree representation)
+ JsonNode jsonNode = JsonUtils.bytesToJsonNode(payload, 0, payload.length);
+
+ // Step 2: Convert JsonNode to Map
+ Map<String, Object> result = JsonUtils.jsonNodeToMap(jsonNode);
+
+ blackhole.consume(jsonNode);
+ return result;
+ }
+
+ /**
+ * NEW APPROACH: Single-step parsing (bytesToMap)
+ * This is the optimized implementation that parses directly to Map.
+ */
+ @Benchmark
+ public Map<String, Object> newApproachDirectParsing(Blackhole blackhole)
+ throws IOException {
+ byte[] payload = getNextPayload();
+
+ // Direct parsing to Map, skipping the intermediate JsonNode
+ Map<String, Object> result = JsonUtils.bytesToMap(payload, 0,
payload.length);
+
+ return result;
+ }
+
+ // Helper methods for generating realistic test data
+
+ private static String randomEventType(Random random) {
+ String[] types = {"pageview", "click", "scroll", "purchase", "signup",
"login", "search"};
+ return types[random.nextInt(types.length)];
+ }
+
+ private static String randomSearchQuery(Random random) {
+ String[] queries = {"laptop", "phone", "headphones", "camera", "tablet",
"watch", "speaker"};
+ return queries[random.nextInt(queries.length)];
+ }
+
+ private static String randomString(Random random, int length) {
+ StringBuilder sb = new StringBuilder(length);
+ String chars =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
+ for (int i = 0; i < length; i++) {
+ sb.append(chars.charAt(random.nextInt(chars.length())));
+ }
+ return sb.toString();
+ }
+
+ private static String randomName(Random random) {
+ String[] firstNames = {"John", "Jane", "Bob", "Alice", "Charlie", "Diana",
"Eve", "Frank"};
+ String[] lastNames = {"Smith", "Johnson", "Williams", "Brown", "Jones",
"Garcia", "Miller"};
+ return firstNames[random.nextInt(firstNames.length)] + " " +
lastNames[random.nextInt(lastNames.length)];
+ }
+
+ private static String randomProduct(Random random) {
+ String[] products = {"Widget", "Gadget", "Device", "Tool", "Component",
"Module", "Unit"};
+ String[] adjectives = {"Premium", "Standard", "Basic", "Pro", "Elite",
"Ultra"};
+ return adjectives[random.nextInt(adjectives.length)] + " " +
products[random.nextInt(products.length)];
+ }
+
+ private static String randomCity(Random random) {
+ String[] cities = {"New York", "Los Angeles", "Chicago", "Houston",
"Phoenix", "San Francisco", "Seattle"};
+ return cities[random.nextInt(cities.length)];
+ }
+
+ private static String randomState(Random random) {
+ String[] states = {"NY", "CA", "IL", "TX", "AZ", "WA", "OR", "NV", "CO",
"FL"};
+ return states[random.nextInt(states.length)];
+ }
+}
diff --git
a/pinot-plugins/pinot-input-format/pinot-clp-log/src/main/java/org/apache/pinot/plugin/inputformat/clplog/CLPLogMessageDecoder.java
b/pinot-plugins/pinot-input-format/pinot-clp-log/src/main/java/org/apache/pinot/plugin/inputformat/clplog/CLPLogMessageDecoder.java
index 07677dbd62c..dd00b08ef25 100644
---
a/pinot-plugins/pinot-input-format/pinot-clp-log/src/main/java/org/apache/pinot/plugin/inputformat/clplog/CLPLogMessageDecoder.java
+++
b/pinot-plugins/pinot-input-format/pinot-clp-log/src/main/java/org/apache/pinot/plugin/inputformat/clplog/CLPLogMessageDecoder.java
@@ -18,7 +18,6 @@
*/
package org.apache.pinot.plugin.inputformat.clplog;
-import com.fasterxml.jackson.databind.JsonNode;
import java.util.Map;
import java.util.Set;
import javax.annotation.Nullable;
@@ -99,8 +98,9 @@ public class CLPLogMessageDecoder implements
StreamMessageDecoder<byte[]> {
@Override
public GenericRow decode(byte[] payload, int offset, int length, GenericRow
destination) {
try {
- JsonNode jsonNode = JsonUtils.bytesToJsonNode(payload, offset, length);
- return _recordExtractor.extract(JsonUtils.jsonNodeToMap(jsonNode),
destination);
+ // Parse directly to Map, avoiding intermediate JsonNode representation
for better performance
+ Map<String, Object> jsonMap = JsonUtils.bytesToMap(payload, offset,
length);
+ return _recordExtractor.extract(jsonMap, destination);
} catch (Exception e) {
if (_errorSamplingPeriod != 0) {
_numErrorsUntilNextPrint--;
diff --git
a/pinot-plugins/pinot-input-format/pinot-json/src/main/java/org/apache/pinot/plugin/inputformat/json/JSONMessageDecoder.java
b/pinot-plugins/pinot-input-format/pinot-json/src/main/java/org/apache/pinot/plugin/inputformat/json/JSONMessageDecoder.java
index 52b7ff01c7d..08a1122e6c4 100644
---
a/pinot-plugins/pinot-input-format/pinot-json/src/main/java/org/apache/pinot/plugin/inputformat/json/JSONMessageDecoder.java
+++
b/pinot-plugins/pinot-input-format/pinot-json/src/main/java/org/apache/pinot/plugin/inputformat/json/JSONMessageDecoder.java
@@ -18,7 +18,6 @@
*/
package org.apache.pinot.plugin.inputformat.json;
-import com.fasterxml.jackson.databind.JsonNode;
import java.util.Map;
import java.util.Set;
import org.apache.pinot.spi.data.readers.GenericRow;
@@ -59,8 +58,9 @@ public class JSONMessageDecoder implements
StreamMessageDecoder<byte[]> {
@Override
public GenericRow decode(byte[] payload, int offset, int length, GenericRow
destination) {
try {
- JsonNode message = JsonUtils.bytesToJsonNode(payload, offset, length);
- return _jsonRecordExtractor.extract(JsonUtils.jsonNodeToMap(message),
destination);
+ // Parse directly to Map, avoiding intermediate JsonNode representation
for better performance
+ Map<String, Object> jsonMap = JsonUtils.bytesToMap(payload, offset,
length);
+ return _jsonRecordExtractor.extract(jsonMap, destination);
} catch (Exception e) {
throw new RuntimeException(
"Caught exception while decoding JSON record with payload: " + new
String(payload, offset, length), e);
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java
b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java
index 46d5607406c..16bc44c29df 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java
@@ -247,6 +247,20 @@ public class JsonUtils {
return DEFAULT_READER.forType(MAP_TYPE_REFERENCE).readValue(jsonNode);
}
+ /**
+ * Parses JSON bytes directly to a Map, avoiding the intermediate JsonNode
representation.
+ * This is more efficient than calling bytesToJsonNode followed by
jsonNodeToMap.
+ */
+ public static Map<String, Object> bytesToMap(byte[] jsonBytes)
+ throws IOException {
+ return DEFAULT_READER.forType(MAP_TYPE_REFERENCE).readValue(jsonBytes);
+ }
+
+ public static Map<String, Object> bytesToMap(byte[] jsonBytes, int offset,
int length)
+ throws IOException {
+ return DEFAULT_READER.forType(MAP_TYPE_REFERENCE).readValue(jsonBytes,
offset, length);
+ }
+
public static String objectToString(Object object)
throws JsonProcessingException {
return DEFAULT_WRITER.writeValueAsString(object);
diff --git
a/pinot-spi/src/test/java/org/apache/pinot/spi/utils/JsonUtilsTest.java
b/pinot-spi/src/test/java/org/apache/pinot/spi/utils/JsonUtilsTest.java
index bc975adff9d..f06c19675f2 100644
--- a/pinot-spi/src/test/java/org/apache/pinot/spi/utils/JsonUtilsTest.java
+++ b/pinot-spi/src/test/java/org/apache/pinot/spi/utils/JsonUtilsTest.java
@@ -22,7 +22,9 @@ import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.collect.ImmutableSet;
import java.io.File;
import java.io.IOException;
+import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
@@ -34,9 +36,11 @@ import
org.apache.pinot.spi.config.table.ingestion.ComplexTypeConfig;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.Schema;
import org.testng.Assert;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNull;
import static org.testng.Assert.assertTrue;
@@ -763,4 +767,496 @@ public class JsonUtilsTest {
assertEquals(flattenedRecord1.get(".addresses..street"), "second st");
}
}
+
+ // ==================== Tests for bytesToMap optimization
====================
+ // These tests verify that bytesToMap produces identical results to the old
+ // two-step approach (bytesToJsonNode + jsonNodeToMap)
+
+ /**
+ * Data provider for various JSON payloads to test bytesToMap equivalence
+ */
+ @DataProvider(name = "jsonPayloads")
+ public Object[][] jsonPayloads() {
+ return new Object[][]{
+ // Empty and null cases
+ {"{}"},
+ {"{\"key\":null}"},
+
+ // Simple primitives
+ {"{\"string\":\"value\"}"},
+ {"{\"integer\":42}"},
+ {"{\"negative\":-123}"},
+ {"{\"float\":3.14159}"},
+ {"{\"negative_float\":-2.718}"},
+ {"{\"boolean_true\":true}"},
+ {"{\"boolean_false\":false}"},
+ {"{\"zero\":0}"},
+ {"{\"zero_float\":0.0}"},
+
+ // Large numbers
+ {"{\"large_int\":9223372036854775807}"},
+ {"{\"large_negative\":-9223372036854775808}"},
+ {"{\"scientific\":1.23e10}"},
+ {"{\"scientific_negative\":1.23e-10}"},
+
+ // String edge cases
+ {"{\"empty_string\":\"\"}"},
+ {"{\"whitespace\":\" \"}"},
+ {"{\"with_quotes\":\"she said \\\"hello\\\"\"}"},
+ {"{\"with_backslash\":\"path\\\\to\\\\file\"}"},
+ {"{\"with_newline\":\"line1\\nline2\"}"},
+ {"{\"with_tab\":\"col1\\tcol2\"}"},
+ {"{\"with_unicode\":\"ζ₯ζ¬θͺ\"}"},
+ {"{\"emoji\":\"Hello π World π\"}"},
+ {"{\"special_chars\":\"<>&'\\\"\"}"},
+
+ // Arrays
+ {"{\"empty_array\":[]}"},
+ {"{\"int_array\":[1,2,3,4,5]}"},
+ {"{\"string_array\":[\"a\",\"b\",\"c\"]}"},
+ {"{\"mixed_array\":[1,\"two\",3.0,true,null]}"},
+ {"{\"nested_array\":[[1,2],[3,4],[5,6]]}"},
+ {"{\"array_of_objects\":[{\"a\":1},{\"b\":2}]}"},
+
+ // Nested objects
+ {"{\"nested\":{\"level1\":{\"level2\":{\"level3\":\"deep\"}}}}"},
+
{"{\"person\":{\"name\":\"John\",\"age\":30,\"address\":{\"city\":\"NYC\",\"zip\":\"10001\"}}}"},
+
+ // Complex realistic payloads
+ {
+
"{\"event\":{\"id\":123,\"type\":\"click\",\"timestamp\":1609459200000,"
+ + "\"user\":{\"id\":\"user123\",\"session\":\"sess456\"},"
+ +
"\"data\":{\"page\":\"/products\",\"element\":\"button\",\"coordinates\":{\"x\":100,\"y\":200}}}}"
+ },
+
+ // Multiple fields with same type
+
{"{\"a\":1,\"b\":2,\"c\":3,\"d\":4,\"e\":5,\"f\":6,\"g\":7,\"h\":8,\"i\":9,\"j\":10}"},
+
+ // Field names with special characters
+ {"{\"field-with-dash\":1}"},
+ {"{\"field.with.dots\":2}"},
+ {"{\"field_with_underscore\":3}"},
+ {"{\"123numeric_start\":4}"},
+
+ // Boolean and null combinations
+ {"{\"flags\":{\"active\":true,\"deleted\":false,\"pending\":null}}"},
+
+ // Array with nulls
+ {"{\"array_with_nulls\":[1,null,3,null,5]}"},
+
+ // Unicode field names
+ {"{\"ζ₯ζ¬θͺγγΌ\":\"value\"}"},
+
+ // Very long string value
+ {"{\"long_string\":\"" + "a".repeat(1000) + "\"}"},
+
+ // Deeply nested structure
+ {"{\"l1\":{\"l2\":{\"l3\":{\"l4\":{\"l5\":{\"value\":\"deep\"}}}}}}"},
+
+ // Real-world like Kafka message
+ {
+
"{\"topic\":\"events\",\"partition\":0,\"offset\":12345,\"timestamp\":1609459200000,"
+ + "\"key\":\"user123\",\"value\":{\"event_type\":\"purchase\","
+ + "\"items\":[{\"sku\":\"ABC123\",\"qty\":2,\"price\":29.99},"
+ + "{\"sku\":\"XYZ789\",\"qty\":1,\"price\":49.99}],"
+ + "\"total\":109.97,\"currency\":\"USD\"}}"
+ }
+ };
+ }
+
+ /**
+ * Test that bytesToMap produces identical results to bytesToJsonNode +
jsonNodeToMap
+ * for all payload types
+ */
+ @Test(dataProvider = "jsonPayloads")
+ public void testBytesToMapEquivalence(String jsonString)
+ throws IOException {
+ byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+ // Old approach: bytesToJsonNode + jsonNodeToMap
+ JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes, 0,
jsonBytes.length);
+ Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+
+ // New approach: bytesToMap directly
+ Map<String, Object> newResult = JsonUtils.bytesToMap(jsonBytes, 0,
jsonBytes.length);
+
+ // Verify they are equal
+ assertEquals(newResult, oldResult,
+ "bytesToMap should produce identical result to bytesToJsonNode +
jsonNodeToMap for: " + jsonString);
+ }
+
+ /**
+ * Test bytesToMap with offset and length parameters (partial array reading)
+ */
+ @Test
+ public void testBytesToMapWithOffset()
+ throws IOException {
+ String prefix = "GARBAGE";
+ String jsonString = "{\"key\":\"value\",\"num\":42}";
+ String suffix = "MORE_GARBAGE";
+ String combined = prefix + jsonString + suffix;
+
+ byte[] fullBytes = combined.getBytes(StandardCharsets.UTF_8);
+ int offset = prefix.length();
+ int length = jsonString.length();
+
+ // Old approach
+ JsonNode jsonNode = JsonUtils.bytesToJsonNode(fullBytes, offset, length);
+ Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+
+ // New approach
+ Map<String, Object> newResult = JsonUtils.bytesToMap(fullBytes, offset,
length);
+
+ assertEquals(newResult, oldResult);
+ assertEquals(newResult.get("key"), "value");
+ assertEquals(newResult.get("num"), 42);
+ }
+
+ /**
+ * Test bytesToMap with offset where offset + length would exceed array
bounds.
+ * This test ensures we're passing length (not offset+length) to Jackson API.
+ */
+ @Test
+ public void testBytesToMapWithOffsetEdgeCase()
+ throws IOException {
+ // Create a case where offset + length > array.length would fail if we
incorrectly
+ // passed offset+length as the third parameter to Jackson's readValue
+ String prefix = "XX"; // 2 bytes
+ String jsonString = "{\"a\":1}"; // 7 bytes
+ // Total: 9 bytes
+ // offset=2, length=7
+ // If buggy (passing offset+length=9), would try to read 9 bytes from
position 2,
+ // but only 7 bytes available from that position
+
+ String combined = prefix + jsonString; // No suffix - exactly at the
boundary
+ byte[] fullBytes = combined.getBytes(StandardCharsets.UTF_8);
+ int offset = prefix.length();
+ int length = jsonString.length();
+
+ // Verify array bounds: offset + length should equal array length exactly
+ assertEquals(fullBytes.length, offset + length,
+ "Test setup: offset + length should equal array length");
+
+ // This would fail with the bug: "Invalid 'offset' and/or 'len' arguments"
+ Map<String, Object> result = JsonUtils.bytesToMap(fullBytes, offset,
length);
+ assertEquals(result.get("a"), 1);
+
+ // Verify equivalence with old approach
+ JsonNode jsonNode = JsonUtils.bytesToJsonNode(fullBytes, offset, length);
+ Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+ assertEquals(result, oldResult);
+ }
+
+ /**
+ * Test bytesToMap with various offset positions to ensure correct slicing.
+ */
+ @Test
+ public void testBytesToMapWithVariousOffsets()
+ throws IOException {
+ String json = "{\"x\":\"y\"}";
+ byte[] jsonBytes = json.getBytes(StandardCharsets.UTF_8);
+
+ // Test with offset=0 (should work like no offset)
+ Map<String, Object> result0 = JsonUtils.bytesToMap(jsonBytes, 0,
jsonBytes.length);
+ assertEquals(result0.get("x"), "y");
+
+ // Test with padding on both sides at different positions
+ for (int prefixLen = 1; prefixLen <= 10; prefixLen++) {
+ String prefix = "P".repeat(prefixLen);
+ String suffix = "S".repeat(5);
+ String combined = prefix + json + suffix;
+ byte[] fullBytes = combined.getBytes(StandardCharsets.UTF_8);
+
+ Map<String, Object> result = JsonUtils.bytesToMap(fullBytes, prefixLen,
json.length());
+ assertEquals(result.get("x"), "y",
+ "Failed with prefix length " + prefixLen);
+
+ // Verify equivalence
+ JsonNode node = JsonUtils.bytesToJsonNode(fullBytes, prefixLen,
json.length());
+ assertEquals(result, JsonUtils.jsonNodeToMap(node));
+ }
+ }
+
+ /**
+ * Test bytesToMap without offset (full array)
+ */
+ @Test
+ public void testBytesToMapFullArray()
+ throws IOException {
+ String jsonString = "{\"name\":\"test\",\"values\":[1,2,3]}";
+ byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+ // Old approach using full array methods
+ JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes);
+ Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+
+ // New approach
+ Map<String, Object> newResult = JsonUtils.bytesToMap(jsonBytes);
+
+ assertEquals(newResult, oldResult);
+ }
+
+ /**
+ * Test that numeric types are preserved correctly
+ */
+ @Test
+ public void testBytesToMapNumericTypes()
+ throws IOException {
+ String jsonString =
"{\"int\":42,\"long\":9223372036854775807,\"double\":3.14159,\"float\":1.5}";
+ byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+ Map<String, Object> result = JsonUtils.bytesToMap(jsonBytes);
+
+ // Verify the types and values
+ assertEquals(result.get("int"), 42);
+ assertEquals(result.get("long"), 9223372036854775807L);
+ assertEquals(result.get("double"), 3.14159);
+ assertEquals(result.get("float"), 1.5);
+
+ // Also verify equivalence with old approach
+ JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes);
+ Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+ assertEquals(result, oldResult);
+ }
+
+ /**
+ * Test nested object access after parsing
+ */
+ @Test
+ @SuppressWarnings("unchecked")
+ public void testBytesToMapNestedAccess()
+ throws IOException {
+ String jsonString =
"{\"outer\":{\"inner\":{\"value\":\"deep\"},\"array\":[1,2,3]}}";
+ byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+ Map<String, Object> result = JsonUtils.bytesToMap(jsonBytes);
+ Map<String, Object> outer = (Map<String, Object>) result.get("outer");
+ Map<String, Object> inner = (Map<String, Object>) outer.get("inner");
+ List<Object> array = (List<Object>) outer.get("array");
+
+ assertEquals(inner.get("value"), "deep");
+ assertEquals(array, Arrays.asList(1, 2, 3));
+
+ // Verify equivalence
+ JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes);
+ Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+ assertEquals(result, oldResult);
+ }
+
+ /**
+ * Test null handling in bytesToMap
+ */
+ @Test
+ public void testBytesToMapNullHandling()
+ throws IOException {
+ String jsonString = "{\"nullField\":null,\"nested\":{\"alsoNull\":null}}";
+ byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+ Map<String, Object> result = JsonUtils.bytesToMap(jsonBytes);
+
+ assertTrue(result.containsKey("nullField"));
+ assertNull(result.get("nullField"));
+
+ @SuppressWarnings("unchecked")
+ Map<String, Object> nested = (Map<String, Object>) result.get("nested");
+ assertTrue(nested.containsKey("alsoNull"));
+ assertNull(nested.get("alsoNull"));
+
+ // Verify equivalence
+ JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes);
+ Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+ assertEquals(result, oldResult);
+ }
+
+ /**
+ * Test boolean handling
+ */
+ @Test
+ public void testBytesToMapBooleanHandling()
+ throws IOException {
+ String jsonString = "{\"trueVal\":true,\"falseVal\":false}";
+ byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+ Map<String, Object> result = JsonUtils.bytesToMap(jsonBytes);
+
+ assertEquals(result.get("trueVal"), true);
+ assertEquals(result.get("falseVal"), false);
+
+ // Verify equivalence
+ JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes);
+ Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+ assertEquals(result, oldResult);
+ }
+
+ /**
+ * Test empty object and array
+ */
+ @Test
+ public void testBytesToMapEmptyStructures()
+ throws IOException {
+ String jsonString = "{\"emptyObj\":{},\"emptyArr\":[]}";
+ byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+ Map<String, Object> result = JsonUtils.bytesToMap(jsonBytes);
+
+ @SuppressWarnings("unchecked")
+ Map<String, Object> emptyObj = (Map<String, Object>)
result.get("emptyObj");
+ assertTrue(emptyObj.isEmpty());
+
+ @SuppressWarnings("unchecked")
+ List<Object> emptyArr = (List<Object>) result.get("emptyArr");
+ assertTrue(emptyArr.isEmpty());
+
+ // Verify equivalence
+ JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes);
+ Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+ assertEquals(result, oldResult);
+ }
+
+ /**
+ * Test that the method correctly handles UTF-8 encoded bytes
+ */
+ @Test
+ public void testBytesToMapUtf8Encoding()
+ throws IOException {
+ // Test various UTF-8 characters
+ String jsonString =
"{\"chinese\":\"δΈζ\",\"japanese\":\"ζ₯ζ¬θͺ\",\"korean\":\"νκ΅μ΄\","
+ + "\"emoji\":\"πππ\",\"mixed\":\"HelloδΈη\"}";
+ byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+ Map<String, Object> result = JsonUtils.bytesToMap(jsonBytes);
+
+ assertEquals(result.get("chinese"), "δΈζ");
+ assertEquals(result.get("japanese"), "ζ₯ζ¬θͺ");
+ assertEquals(result.get("korean"), "νκ΅μ΄");
+ assertEquals(result.get("emoji"), "πππ");
+ assertEquals(result.get("mixed"), "HelloδΈη");
+
+ // Verify equivalence
+ JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes);
+ Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+ assertEquals(result, oldResult);
+ }
+
+ /**
+ * Test escape sequences in strings
+ */
+ @Test
+ public void testBytesToMapEscapeSequences()
+ throws IOException {
+ String jsonString =
"{\"quotes\":\"\\\"quoted\\\"\",\"backslash\":\"a\\\\b\","
+ + "\"newline\":\"line1\\nline2\",\"tab\":\"col1\\tcol2\","
+ + "\"unicode\":\"\\u0048\\u0065\\u006C\\u006C\\u006F\"}";
+ byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+ Map<String, Object> result = JsonUtils.bytesToMap(jsonBytes);
+
+ assertEquals(result.get("quotes"), "\"quoted\"");
+ assertEquals(result.get("backslash"), "a\\b");
+ assertEquals(result.get("newline"), "line1\nline2");
+ assertEquals(result.get("tab"), "col1\tcol2");
+ assertEquals(result.get("unicode"), "Hello");
+
+ // Verify equivalence
+ JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes);
+ Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+ assertEquals(result, oldResult);
+ }
+
+ /**
+ * Test mixed type arrays
+ */
+ @Test
+ public void testBytesToMapMixedArrays()
+ throws IOException {
+ String jsonString =
"{\"mixed\":[1,\"two\",3.0,true,null,{\"nested\":\"obj\"},[1,2]]}";
+ byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+ Map<String, Object> result = JsonUtils.bytesToMap(jsonBytes);
+
+ @SuppressWarnings("unchecked")
+ List<Object> mixed = (List<Object>) result.get("mixed");
+ assertEquals(mixed.size(), 7);
+ assertEquals(mixed.get(0), 1);
+ assertEquals(mixed.get(1), "two");
+ assertEquals(mixed.get(2), 3.0);
+ assertEquals(mixed.get(3), true);
+ assertNull(mixed.get(4));
+
+ // Verify equivalence
+ JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes);
+ Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+ assertEquals(result, oldResult);
+ }
+
+ /**
+ * Test error handling - malformed JSON should throw IOException
+ */
+ @Test(expectedExceptions = IOException.class)
+ public void testBytesToMapMalformedJson()
+ throws IOException {
+ String malformedJson = "{\"key\":value}"; // missing quotes around value
+ byte[] jsonBytes = malformedJson.getBytes(StandardCharsets.UTF_8);
+ JsonUtils.bytesToMap(jsonBytes);
+ }
+
+ /**
+ * Test error handling - incomplete JSON
+ */
+ @Test(expectedExceptions = IOException.class)
+ public void testBytesToMapIncompleteJson()
+ throws IOException {
+ String incompleteJson = "{\"key\":\"value\""; // missing closing brace
+ byte[] jsonBytes = incompleteJson.getBytes(StandardCharsets.UTF_8);
+ JsonUtils.bytesToMap(jsonBytes);
+ }
+
+ /**
+ * Stress test with a large number of fields
+ */
+ @Test
+ public void testBytesToMapManyFields()
+ throws IOException {
+ StringBuilder sb = new StringBuilder("{");
+ for (int i = 0; i < 100; i++) {
+ if (i > 0) {
+ sb.append(",");
+ }
+ sb.append("\"field").append(i).append("\":").append(i);
+ }
+ sb.append("}");
+ String jsonString = sb.toString();
+ byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+ // Old approach
+ JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes);
+ Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+
+ // New approach
+ Map<String, Object> newResult = JsonUtils.bytesToMap(jsonBytes);
+
+ assertEquals(newResult, oldResult);
+ assertEquals(newResult.size(), 100);
+
+ for (int i = 0; i < 100; i++) {
+ assertEquals(newResult.get("field" + i), i);
+ }
+ }
+
+ /**
+ * Test with a deeply nested array structure
+ */
+ @Test
+ public void testBytesToMapDeeplyNestedArrays()
+ throws IOException {
+ String jsonString = "{\"data\":[[[[[\"deep\"]]]]]}";
+ byte[] jsonBytes = jsonString.getBytes(StandardCharsets.UTF_8);
+
+ Map<String, Object> result = JsonUtils.bytesToMap(jsonBytes);
+
+ // Verify equivalence
+ JsonNode jsonNode = JsonUtils.bytesToJsonNode(jsonBytes);
+ Map<String, Object> oldResult = JsonUtils.jsonNodeToMap(jsonNode);
+ assertEquals(result, oldResult);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]