Copilot commented on code in PR #17534:
URL: https://github.com/apache/pinot/pull/17534#discussion_r2707926854
##########
pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java:
##########
@@ -350,9 +350,58 @@ public static String getStringValue(Object value) {
if (value instanceof byte[]) {
return BytesUtils.toHexString((byte[]) value);
}
+ // Handle Java collections (Map, List)
+ if (value instanceof Map || value instanceof List) {
+ try {
+ return JsonUtils.objectToString(value);
+ } catch (JsonProcessingException e) {
+ throw new RuntimeException(
+ String.format("Failed to serialize %s to JSON: %s",
value.getClass().getName(), e.getMessage()), e);
+ }
+ }
+ // Handle Scala collections - they don't implement java.util.Map/List
+ // JsonUtils doesn't have Scala module, so we handle empty collections
directly
+ final String className = value.getClass().getName();
+ if (className.startsWith("scala.collection")) {
+ return serializeScalaCollection(value, className);
+ }
return value.toString();
}
+ /**
+ * Serialize a Scala collection to JSON string.
+ * For empty collections, returns "{}" or "[]". For non-empty, throws an
error.
+ */
+ private static String serializeScalaCollection(final Object scalaCollection,
final String className) {
+ // Check if empty using reflection (all Scala collections have isEmpty
method)
+ try {
+ final boolean isEmpty = (boolean)
scalaCollection.getClass().getMethod("isEmpty").invoke(scalaCollection);
+ if (isEmpty) {
+ return getEmptyJsonForScalaCollection(className);
+ }
+ } catch (Exception e) {
+ // If we can't call isEmpty, fall through to error
+ }
+ throw new IllegalArgumentException(
+ String.format("Cannot serialize non-empty Scala collection %s. Use
JSON string instead.", className));
+ }
+
+ /**
+ * Returns the appropriate empty JSON representation for a Scala collection
based on its class name.
+ */
+ private static String getEmptyJsonForScalaCollection(final String className)
{
+ if (className.contains("Map")) {
+ return "{}";
+ }
+ // Nil$ is Scala's empty list singleton
+ if (className.contains("List") || className.contains("Seq") ||
className.contains("Set")
+ || className.contains("Vector") || className.contains("Array") ||
className.contains("Buffer")) {
Review Comment:
Using `contains()` for class name matching can produce false positives. For
example, a class named `MyArrayList` or `CustomListProcessor` would incorrectly
match. Consider using more precise pattern matching such as checking for
`className.matches('.*\\.(List|Seq|Set|Vector|Array|Buffer)(\\$|$)')` to match
only when these terms appear as actual class name components.
```suggestion
if (className.matches(".*\\.(Map)(\\$|$)")) {
return "{}";
}
// Nil$ is Scala's empty list singleton
if (className.matches(".*\\.(List|Seq|Set|Vector|Array|Buffer)(\\$|$)"))
{
```
##########
pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java:
##########
@@ -350,9 +350,58 @@ public static String getStringValue(Object value) {
if (value instanceof byte[]) {
return BytesUtils.toHexString((byte[]) value);
}
+ // Handle Java collections (Map, List)
+ if (value instanceof Map || value instanceof List) {
+ try {
+ return JsonUtils.objectToString(value);
+ } catch (JsonProcessingException e) {
+ throw new RuntimeException(
+ String.format("Failed to serialize %s to JSON: %s",
value.getClass().getName(), e.getMessage()), e);
+ }
+ }
+ // Handle Scala collections - they don't implement java.util.Map/List
+ // JsonUtils doesn't have Scala module, so we handle empty collections
directly
+ final String className = value.getClass().getName();
+ if (className.startsWith("scala.collection")) {
+ return serializeScalaCollection(value, className);
+ }
return value.toString();
}
+ /**
+ * Serialize a Scala collection to JSON string.
+ * For empty collections, returns "{}" or "[]". For non-empty, throws an
error.
+ */
+ private static String serializeScalaCollection(final Object scalaCollection,
final String className) {
+ // Check if empty using reflection (all Scala collections have isEmpty
method)
+ try {
+ final boolean isEmpty = (boolean)
scalaCollection.getClass().getMethod("isEmpty").invoke(scalaCollection);
Review Comment:
Reflection is used on every Scala collection serialization. Consider caching
the `isEmpty` Method object in a static map keyed by class name to avoid
repeated reflection lookups for the same collection types.
##########
pinot-spi/src/test/java/org/apache/pinot/spi/data/FieldSpecTest.java:
##########
@@ -501,4 +505,397 @@ public void testJsonSerializationExcludesIgnoredFields()
throws Exception {
Assert.assertTrue(metricJson.contains("\"singleValueField\":true"),
"JSON should contain singleValueField field: " + metricJson);
}
+
+
+ /**
+ * DataProvider for testing getDefaultNullValue with all valid FieldType and
DataType combinations.
+ * Each entry contains: FieldType, DataType, expected default null value
+ */
+ @DataProvider(name = "defaultNullValueCases")
+ public Object[][] defaultNullValueCases() {
+ return new Object[][]{
+ // DIMENSION field type (ordinal 0)
+ {FieldType.DIMENSION, DataType.INT,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_INT},
+ {FieldType.DIMENSION, DataType.LONG,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_LONG},
+ {FieldType.DIMENSION, DataType.FLOAT,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT},
+ {FieldType.DIMENSION, DataType.DOUBLE,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE},
+ {FieldType.DIMENSION, DataType.BIG_DECIMAL,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BIG_DECIMAL},
+ {FieldType.DIMENSION, DataType.BOOLEAN,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BOOLEAN},
+ {FieldType.DIMENSION, DataType.TIMESTAMP,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_TIMESTAMP},
+ {FieldType.DIMENSION, DataType.STRING,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_STRING},
+ {FieldType.DIMENSION, DataType.JSON,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_JSON},
+ {FieldType.DIMENSION, DataType.BYTES,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES},
+
+ // TIME field type (ordinal 2) - same as DIMENSION
+ {FieldType.TIME, DataType.INT,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_INT},
+ {FieldType.TIME, DataType.LONG,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_LONG},
+ {FieldType.TIME, DataType.FLOAT,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT},
+ {FieldType.TIME, DataType.DOUBLE,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE},
+ {FieldType.TIME, DataType.BIG_DECIMAL,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BIG_DECIMAL},
+ {FieldType.TIME, DataType.BOOLEAN,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BOOLEAN},
+ {FieldType.TIME, DataType.TIMESTAMP,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_TIMESTAMP},
+ {FieldType.TIME, DataType.STRING,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_STRING},
+ {FieldType.TIME, DataType.JSON,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_JSON},
+ {FieldType.TIME, DataType.BYTES,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES},
+
+ // DATE_TIME field type (ordinal 3) - same as DIMENSION
+ {FieldType.DATE_TIME, DataType.INT,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_INT},
+ {FieldType.DATE_TIME, DataType.LONG,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_LONG},
+ {FieldType.DATE_TIME, DataType.FLOAT,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT},
+ {FieldType.DATE_TIME, DataType.DOUBLE,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE},
+ {FieldType.DATE_TIME, DataType.BIG_DECIMAL,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BIG_DECIMAL},
+ {FieldType.DATE_TIME, DataType.BOOLEAN,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BOOLEAN},
+ {FieldType.DATE_TIME, DataType.TIMESTAMP,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_TIMESTAMP},
+ {FieldType.DATE_TIME, DataType.STRING,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_STRING},
+ {FieldType.DATE_TIME, DataType.JSON,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_JSON},
+ {FieldType.DATE_TIME, DataType.BYTES,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES},
+
+ // METRIC field type (ordinal 1)
+ {FieldType.METRIC, DataType.INT,
FieldSpec.DEFAULT_METRIC_NULL_VALUE_OF_INT},
+ {FieldType.METRIC, DataType.LONG,
FieldSpec.DEFAULT_METRIC_NULL_VALUE_OF_LONG},
+ {FieldType.METRIC, DataType.FLOAT,
FieldSpec.DEFAULT_METRIC_NULL_VALUE_OF_FLOAT},
+ {FieldType.METRIC, DataType.DOUBLE,
FieldSpec.DEFAULT_METRIC_NULL_VALUE_OF_DOUBLE},
+ {FieldType.METRIC, DataType.BIG_DECIMAL,
FieldSpec.DEFAULT_METRIC_NULL_VALUE_OF_BIG_DECIMAL},
+ {FieldType.METRIC, DataType.STRING,
FieldSpec.DEFAULT_METRIC_NULL_VALUE_OF_STRING},
+ {FieldType.METRIC, DataType.BYTES,
FieldSpec.DEFAULT_METRIC_NULL_VALUE_OF_BYTES},
+
+ // COMPLEX field type (ordinal 4)
+ {FieldType.COMPLEX, DataType.MAP,
FieldSpec.DEFAULT_COMPLEX_NULL_VALUE_OF_MAP},
+ {FieldType.COMPLEX, DataType.LIST,
FieldSpec.DEFAULT_COMPLEX_NULL_VALUE_OF_LIST},
+ };
+ }
+
+ /**
+ * Test that getDefaultNullValue returns correct default values for all
valid FieldType + DataType combinations.
+ */
+ @Test(dataProvider = "defaultNullValueCases")
+ public void testGetDefaultNullValue(final FieldType fieldType, final
DataType dataType, final Object expectedValue) {
+ final Object actualValue = FieldSpec.getDefaultNullValue(fieldType,
dataType, null);
+ Assert.assertEquals(actualValue, expectedValue,
+ String.format("Default null value mismatch for fieldType=%s,
dataType=%s", fieldType, dataType));
+ }
+
+ /**
+ * Test that default null values can be converted to string and back for all
types.
+ * This validates the round-trip: defaultValue -> getStringValue ->
dataType.convert -> original value
+ */
+ @DataProvider(name = "stringConversionCases")
+ public Object[][] stringConversionCases() {
+ return new Object[][]{
+ // Primitive types
+ {DataType.INT, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_INT},
+ {DataType.LONG, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_LONG},
+ {DataType.FLOAT, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT},
+ {DataType.DOUBLE, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE},
+ {DataType.BIG_DECIMAL,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BIG_DECIMAL},
+ {DataType.BOOLEAN, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BOOLEAN},
+ {DataType.TIMESTAMP,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_TIMESTAMP},
+ {DataType.STRING, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_STRING},
+ {DataType.JSON, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_JSON},
+ {DataType.BYTES, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES},
+ // Complex types
+ {DataType.MAP, FieldSpec.DEFAULT_COMPLEX_NULL_VALUE_OF_MAP},
+ {DataType.LIST, FieldSpec.DEFAULT_COMPLEX_NULL_VALUE_OF_LIST},
+ };
+ }
+
+ @Test(dataProvider = "stringConversionCases")
+ public void testDefaultNullValueStringConversion(final DataType dataType,
final Object defaultValue) {
+ // Convert to string
+ final String stringValue = FieldSpec.getStringValue(defaultValue);
+ Assert.assertNotNull(stringValue,
+ String.format("String value should not be null for dataType=%s",
dataType));
+
+ // Convert back using dataType.convert
+ final Object convertedValue = dataType.convert(stringValue);
+ Assert.assertEquals(convertedValue, defaultValue,
+ String.format("Round-trip conversion failed for dataType=%s:
original=%s, string=%s, converted=%s",
+ dataType, defaultValue, stringValue, convertedValue));
+ }
+
+ /**
+ * Test that MAP default null value (empty Map) can be converted to string
and back.
+ * This specifically tests the bug where Scala Map.toString() produces
"Map()" instead of "{}".
+ */
+ @Test
+ public void testMapDefaultNullValueStringConversion() {
+ final Map<?, ?> defaultMapValue =
FieldSpec.DEFAULT_COMPLEX_NULL_VALUE_OF_MAP;
+
+ // Verify the default is an empty map
+ Assert.assertTrue(defaultMapValue.isEmpty(), "Default MAP null value
should be an empty map");
+
+ // Convert to string using getStringValue
+ final String stringValue = FieldSpec.getStringValue(defaultMapValue);
+
+ // The string should be valid JSON that can be parsed back
+ // A Java HashMap.toString() returns "{}" for empty map
+ // A Scala Map.toString() returns "Map()" which would break JSON parsing
+ Assert.assertEquals(stringValue, "{}",
+ "Empty Map should serialize to '{}', not 'Map()'. "
+ + "This can happen if Scala Map is used instead of Java Map");
+
+ // Verify it can be converted back
+ final Object convertedValue = DataType.MAP.convert(stringValue);
+ Assert.assertNotNull(convertedValue, "Converted value should not be null");
+ Assert.assertTrue(convertedValue instanceof Map, "Converted value should
be a Map");
+ Assert.assertTrue(((Map<?, ?>) convertedValue).isEmpty(), "Converted Map
should be empty");
+ }
+
+ /**
+ * Test that verifies getStringValue produces valid JSON for Map/List types.
+ * The bug occurs when getStringValue() uses toString() which produces
+ * non-JSON output for certain Map/List implementations (e.g., Scala
collections).
+ *
+ * This test verifies that Java standard collections work correctly.
+ * The actual bug manifests when Scala collections are used (via Jackson
Scala module).
+ */
+ @Test
+ public void testGetStringValueProducesValidJsonForMap() {
+ // Test with various Java Map implementations
+ final Map<String, Object> hashMap = new HashMap<>();
+ final Map<String, Object> emptyMap = Map.of();
+
+ final String hashMapString = FieldSpec.getStringValue(hashMap);
+ final String emptyMapString = FieldSpec.getStringValue(emptyMap);
+
+ // Verify the string is valid JSON (can be parsed back)
+ // Note: Java HashMap.toString() returns "{}" for empty map, which is
valid JSON
+ // But this test documents the expected behavior
+ Assert.assertNotNull(hashMapString, "String value for HashMap should not
be null");
+ Assert.assertNotNull(emptyMapString, "String value for empty Map should
not be null");
+
+ // The string should be parseable by DataType.MAP.convert()
+ // If getStringValue returns something like "Map()" (Scala style), this
would fail
+ try {
+ DataType.MAP.convert(emptyMapString);
+ } catch (IllegalArgumentException e) {
+ Assert.fail("getStringValue() produced invalid JSON for Map: '" +
emptyMapString
+ + "'. Error: " + e.getMessage());
+ }
+ }
+
+ @Test
+ public void testGetStringValueProducesValidJsonForList() {
+ final List<Object> arrayList = new ArrayList<>();
+ final List<Object> emptyList = List.of();
+
+ final String arrayListString = FieldSpec.getStringValue(arrayList);
+ final String emptyListString = FieldSpec.getStringValue(emptyList);
+
+ Assert.assertNotNull(arrayListString, "String value for ArrayList should
not be null");
+ Assert.assertNotNull(emptyListString, "String value for empty List should
not be null");
+
+ // The string should be parseable by DataType.LIST.convert()
+ try {
+ DataType.LIST.convert(emptyListString);
+ } catch (IllegalArgumentException e) {
+ Assert.fail("getStringValue() produced invalid JSON for List: '" +
emptyListString
+ + "'. Error: " + e.getMessage());
+ }
+ }
+
+ /**
+ * Test that LIST default null value (empty List) can be converted to string
and back.
+ */
+ @Test
+ public void testListDefaultNullValueStringConversion() {
+ final List<?> defaultListValue =
FieldSpec.DEFAULT_COMPLEX_NULL_VALUE_OF_LIST;
+
+ // Verify the default is an empty list
+ Assert.assertTrue(defaultListValue.isEmpty(), "Default LIST null value
should be an empty list");
+
+ // Convert to string using getStringValue
+ final String stringValue = FieldSpec.getStringValue(defaultListValue);
+
+ // The string should be valid JSON that can be parsed back
+ Assert.assertEquals(stringValue, "[]",
+ "Empty List should serialize to '[]', not 'List()'");
+
+ // Verify it can be converted back
+ final Object convertedValue = DataType.LIST.convert(stringValue);
+ Assert.assertNotNull(convertedValue, "Converted value should not be null");
+ Assert.assertTrue(convertedValue instanceof List, "Converted value should
be a List");
+ Assert.assertTrue(((List<?>) convertedValue).isEmpty(), "Converted List
should be empty");
+ }
+
+ /**
+ * Test custom string default null values are correctly converted by
dataType.convert().
+ */
+ @DataProvider(name = "customStringDefaultNullValueCases")
+ public Object[][] customStringDefaultNullValueCases() {
+ return new Object[][]{
+ {FieldType.DIMENSION, DataType.INT, "42", 42},
+ {FieldType.DIMENSION, DataType.LONG, "123456789", 123456789L},
+ {FieldType.DIMENSION, DataType.FLOAT, "3.14", 3.14f},
+ {FieldType.DIMENSION, DataType.DOUBLE, "2.718281828", 2.718281828},
+ {FieldType.DIMENSION, DataType.BIG_DECIMAL, "99999.99999", new
BigDecimal("99999.99999")},
+ {FieldType.DIMENSION, DataType.STRING, "custom_default",
"custom_default"},
+ {FieldType.DIMENSION, DataType.JSON, "{\"key\":\"value\"}",
"{\"key\":\"value\"}"},
+ {FieldType.COMPLEX, DataType.MAP, "{\"a\":1}", Map.of("a", 1)},
+ {FieldType.COMPLEX, DataType.LIST, "[1,2,3]", List.of(1, 2, 3)},
+ };
+ }
+
+ @Test(dataProvider = "customStringDefaultNullValueCases")
+ public void testCustomStringDefaultNullValue(FieldType fieldType, DataType
dataType,
+ String stringDefaultNullValue, Object expectedValue) {
+ final Object actualValue = FieldSpec.getDefaultNullValue(fieldType,
dataType, stringDefaultNullValue);
+ Assert.assertEquals(actualValue, expectedValue,
+ String.format("Custom default null value conversion failed for
fieldType=%s, dataType=%s, input=%s",
+ fieldType, dataType, stringDefaultNullValue));
+ }
+
+ /**
+ * Test that invalid string default null values throw
IllegalArgumentException.
+ * This validates the error message format: "Cannot convert value: 'X' to
type: Y"
+ */
+ @Test
+ public void testInvalidMapStringDefaultNullValueThrowsException() {
+ // "Map()" is what Scala Map.toString() produces, which is NOT valid JSON
+ final String invalidMapString = "Map()";
+
+ try {
+ FieldSpec.getDefaultNullValue(FieldType.COMPLEX, DataType.MAP,
invalidMapString);
+ Assert.fail("Expected IllegalArgumentException for invalid MAP string: "
+ invalidMapString);
+ } catch (IllegalArgumentException e) {
+ // Verify the error message contains expected information
+ Assert.assertTrue(e.getMessage().contains("Cannot convert value"),
+ "Error message should contain 'Cannot convert value': " +
e.getMessage());
+ Assert.assertTrue(e.getMessage().contains("Map()"),
+ "Error message should contain the invalid value 'Map()': " +
e.getMessage());
+ Assert.assertTrue(e.getMessage().contains("MAP"),
+ "Error message should contain the target type 'MAP': " +
e.getMessage());
+ }
+ }
+
+ /**
+ * Test that getStringValue handles objects with Scala collection-like class
names.
+ * Since Scala collections don't implement java.util.Map/List, we detect
them by class name.
+ * This test uses a mock object to simulate Scala collection behavior.
+ */
+ @Test
+ public void testGetStringValueHandlesScalaStyleCollections() {
+ // Create a mock object that simulates Scala Map behavior:
+ // - Its class name starts with "scala.collection" pattern
+ // - Its toString() returns "Map()" instead of valid JSON
+ // We can't test with actual Scala classes without adding Scala dependency,
+ // so we verify the fix using a custom HashMap that overrides toString()
+ final Object scalaStyleMap = new HashMap<String, Object>() {
+ @Override
+ public String toString() {
+ return "Map()"; // This is what Scala Map.toString() returns
+ }
+ };
+
+ // With the fix, getStringValue should serialize to JSON "{}" instead of
using toString()
+ final String result = FieldSpec.getStringValue(scalaStyleMap);
+
+ // The result should be valid JSON, not "Map()"
+ Assert.assertEquals(result, "{}",
+ "getStringValue should serialize Map to JSON '{}', not '" + result +
"'");
+
+ // Verify the result can be parsed back
+ try {
+ DataType.MAP.convert(result);
+ } catch (IllegalArgumentException e) {
+ Assert.fail("getStringValue produced invalid JSON: " + result);
+ }
+ }
+
+ /**
+ * Test that invalid string default null values throw
IllegalArgumentException for LIST.
+ */
+ @Test
+ public void testInvalidListStringDefaultNullValueThrowsException() {
+ // "List()" is what Scala List.toString() produces, which is NOT valid JSON
+ final String invalidListString = "List()";
+
+ try {
+ FieldSpec.getDefaultNullValue(FieldType.COMPLEX, DataType.LIST,
invalidListString);
+ Assert.fail("Expected IllegalArgumentException for invalid LIST string:
" + invalidListString);
+ } catch (IllegalArgumentException e) {
+ Assert.assertTrue(e.getMessage().contains("Cannot convert value"),
+ "Error message should contain 'Cannot convert value': " +
e.getMessage());
+ Assert.assertTrue(e.getMessage().contains("List()"),
+ "Error message should contain the invalid value 'List()': " +
e.getMessage());
+ }
+ }
+
+ /**
+ * Test that getStringValue throws RuntimeException when JSON serialization
fails.
+ * This covers the catch block for JsonProcessingException.
+ */
+ @Test
+ public void testGetStringValueThrowsRuntimeExceptionOnSerializationFailure()
{
+ // Create a Map with a circular reference which cannot be serialized to
JSON
+ Map<String, Object> circularMap = new HashMap<>();
+ circularMap.put("self", circularMap); // Circular reference
Review Comment:
This test covers the `JsonProcessingException` catch block for Java
collections, but the similar error path in `serializeScalaCollection()` (lines
377-384 where reflection fails) is not tested. Add a test that triggers the
reflection exception branch to ensure complete coverage of error handling.
##########
pinot-spi/src/test/java/org/apache/pinot/spi/data/FieldSpecTest.java:
##########
@@ -501,4 +505,397 @@ public void testJsonSerializationExcludesIgnoredFields()
throws Exception {
Assert.assertTrue(metricJson.contains("\"singleValueField\":true"),
"JSON should contain singleValueField field: " + metricJson);
}
+
+
+ /**
+ * DataProvider for testing getDefaultNullValue with all valid FieldType and
DataType combinations.
+ * Each entry contains: FieldType, DataType, expected default null value
+ */
+ @DataProvider(name = "defaultNullValueCases")
+ public Object[][] defaultNullValueCases() {
+ return new Object[][]{
+ // DIMENSION field type (ordinal 0)
+ {FieldType.DIMENSION, DataType.INT,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_INT},
+ {FieldType.DIMENSION, DataType.LONG,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_LONG},
+ {FieldType.DIMENSION, DataType.FLOAT,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT},
+ {FieldType.DIMENSION, DataType.DOUBLE,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE},
+ {FieldType.DIMENSION, DataType.BIG_DECIMAL,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BIG_DECIMAL},
+ {FieldType.DIMENSION, DataType.BOOLEAN,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BOOLEAN},
+ {FieldType.DIMENSION, DataType.TIMESTAMP,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_TIMESTAMP},
+ {FieldType.DIMENSION, DataType.STRING,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_STRING},
+ {FieldType.DIMENSION, DataType.JSON,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_JSON},
+ {FieldType.DIMENSION, DataType.BYTES,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES},
+
+ // TIME field type (ordinal 2) - same as DIMENSION
+ {FieldType.TIME, DataType.INT,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_INT},
+ {FieldType.TIME, DataType.LONG,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_LONG},
+ {FieldType.TIME, DataType.FLOAT,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT},
+ {FieldType.TIME, DataType.DOUBLE,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE},
+ {FieldType.TIME, DataType.BIG_DECIMAL,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BIG_DECIMAL},
+ {FieldType.TIME, DataType.BOOLEAN,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BOOLEAN},
+ {FieldType.TIME, DataType.TIMESTAMP,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_TIMESTAMP},
+ {FieldType.TIME, DataType.STRING,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_STRING},
+ {FieldType.TIME, DataType.JSON,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_JSON},
+ {FieldType.TIME, DataType.BYTES,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES},
+
+ // DATE_TIME field type (ordinal 3) - same as DIMENSION
+ {FieldType.DATE_TIME, DataType.INT,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_INT},
+ {FieldType.DATE_TIME, DataType.LONG,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_LONG},
+ {FieldType.DATE_TIME, DataType.FLOAT,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT},
+ {FieldType.DATE_TIME, DataType.DOUBLE,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE},
+ {FieldType.DATE_TIME, DataType.BIG_DECIMAL,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BIG_DECIMAL},
+ {FieldType.DATE_TIME, DataType.BOOLEAN,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BOOLEAN},
+ {FieldType.DATE_TIME, DataType.TIMESTAMP,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_TIMESTAMP},
+ {FieldType.DATE_TIME, DataType.STRING,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_STRING},
+ {FieldType.DATE_TIME, DataType.JSON,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_JSON},
+ {FieldType.DATE_TIME, DataType.BYTES,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES},
+
+ // METRIC field type (ordinal 1)
+ {FieldType.METRIC, DataType.INT,
FieldSpec.DEFAULT_METRIC_NULL_VALUE_OF_INT},
+ {FieldType.METRIC, DataType.LONG,
FieldSpec.DEFAULT_METRIC_NULL_VALUE_OF_LONG},
+ {FieldType.METRIC, DataType.FLOAT,
FieldSpec.DEFAULT_METRIC_NULL_VALUE_OF_FLOAT},
+ {FieldType.METRIC, DataType.DOUBLE,
FieldSpec.DEFAULT_METRIC_NULL_VALUE_OF_DOUBLE},
+ {FieldType.METRIC, DataType.BIG_DECIMAL,
FieldSpec.DEFAULT_METRIC_NULL_VALUE_OF_BIG_DECIMAL},
+ {FieldType.METRIC, DataType.STRING,
FieldSpec.DEFAULT_METRIC_NULL_VALUE_OF_STRING},
+ {FieldType.METRIC, DataType.BYTES,
FieldSpec.DEFAULT_METRIC_NULL_VALUE_OF_BYTES},
+
+ // COMPLEX field type (ordinal 4)
+ {FieldType.COMPLEX, DataType.MAP,
FieldSpec.DEFAULT_COMPLEX_NULL_VALUE_OF_MAP},
+ {FieldType.COMPLEX, DataType.LIST,
FieldSpec.DEFAULT_COMPLEX_NULL_VALUE_OF_LIST},
+ };
+ }
+
+ /**
+ * Test that getDefaultNullValue returns correct default values for all
valid FieldType + DataType combinations.
+ */
+ @Test(dataProvider = "defaultNullValueCases")
+ public void testGetDefaultNullValue(final FieldType fieldType, final
DataType dataType, final Object expectedValue) {
+ final Object actualValue = FieldSpec.getDefaultNullValue(fieldType,
dataType, null);
+ Assert.assertEquals(actualValue, expectedValue,
+ String.format("Default null value mismatch for fieldType=%s,
dataType=%s", fieldType, dataType));
+ }
+
+ /**
+ * Test that default null values can be converted to string and back for all
types.
+ * This validates the round-trip: defaultValue -> getStringValue ->
dataType.convert -> original value
+ */
+ @DataProvider(name = "stringConversionCases")
+ public Object[][] stringConversionCases() {
+ return new Object[][]{
+ // Primitive types
+ {DataType.INT, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_INT},
+ {DataType.LONG, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_LONG},
+ {DataType.FLOAT, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT},
+ {DataType.DOUBLE, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE},
+ {DataType.BIG_DECIMAL,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BIG_DECIMAL},
+ {DataType.BOOLEAN, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BOOLEAN},
+ {DataType.TIMESTAMP,
FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_TIMESTAMP},
+ {DataType.STRING, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_STRING},
+ {DataType.JSON, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_JSON},
+ {DataType.BYTES, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES},
+ // Complex types
+ {DataType.MAP, FieldSpec.DEFAULT_COMPLEX_NULL_VALUE_OF_MAP},
+ {DataType.LIST, FieldSpec.DEFAULT_COMPLEX_NULL_VALUE_OF_LIST},
+ };
+ }
+
+ @Test(dataProvider = "stringConversionCases")
+ public void testDefaultNullValueStringConversion(final DataType dataType,
final Object defaultValue) {
+ // Convert to string
+ final String stringValue = FieldSpec.getStringValue(defaultValue);
+ Assert.assertNotNull(stringValue,
+ String.format("String value should not be null for dataType=%s",
dataType));
+
+ // Convert back using dataType.convert
+ final Object convertedValue = dataType.convert(stringValue);
+ Assert.assertEquals(convertedValue, defaultValue,
+ String.format("Round-trip conversion failed for dataType=%s:
original=%s, string=%s, converted=%s",
+ dataType, defaultValue, stringValue, convertedValue));
+ }
+
+ /**
+ * Test that MAP default null value (empty Map) can be converted to string
and back.
+ * This specifically tests the bug where Scala Map.toString() produces
"Map()" instead of "{}".
+ */
+ @Test
+ public void testMapDefaultNullValueStringConversion() {
+ final Map<?, ?> defaultMapValue =
FieldSpec.DEFAULT_COMPLEX_NULL_VALUE_OF_MAP;
+
+ // Verify the default is an empty map
+ Assert.assertTrue(defaultMapValue.isEmpty(), "Default MAP null value
should be an empty map");
+
+ // Convert to string using getStringValue
+ final String stringValue = FieldSpec.getStringValue(defaultMapValue);
+
+ // The string should be valid JSON that can be parsed back
+ // A Java HashMap.toString() returns "{}" for empty map
+ // A Scala Map.toString() returns "Map()" which would break JSON parsing
+ Assert.assertEquals(stringValue, "{}",
+ "Empty Map should serialize to '{}', not 'Map()'. "
+ + "This can happen if Scala Map is used instead of Java Map");
+
+ // Verify it can be converted back
+ final Object convertedValue = DataType.MAP.convert(stringValue);
+ Assert.assertNotNull(convertedValue, "Converted value should not be null");
+ Assert.assertTrue(convertedValue instanceof Map, "Converted value should
be a Map");
+ Assert.assertTrue(((Map<?, ?>) convertedValue).isEmpty(), "Converted Map
should be empty");
+ }
+
+ /**
+ * Test that verifies getStringValue produces valid JSON for Map/List types.
+ * The bug occurs when getStringValue() uses toString() which produces
+ * non-JSON output for certain Map/List implementations (e.g., Scala
collections).
+ *
+ * This test verifies that Java standard collections work correctly.
+ * The actual bug manifests when Scala collections are used (via Jackson
Scala module).
+ */
+ @Test
+ public void testGetStringValueProducesValidJsonForMap() {
+ // Test with various Java Map implementations
+ final Map<String, Object> hashMap = new HashMap<>();
+ final Map<String, Object> emptyMap = Map.of();
+
+ final String hashMapString = FieldSpec.getStringValue(hashMap);
+ final String emptyMapString = FieldSpec.getStringValue(emptyMap);
+
+ // Verify the string is valid JSON (can be parsed back)
+ // Note: Java HashMap.toString() returns "{}" for empty map, which is
valid JSON
+ // But this test documents the expected behavior
+ Assert.assertNotNull(hashMapString, "String value for HashMap should not
be null");
+ Assert.assertNotNull(emptyMapString, "String value for empty Map should
not be null");
+
+ // The string should be parseable by DataType.MAP.convert()
+ // If getStringValue returns something like "Map()" (Scala style), this
would fail
+ try {
+ DataType.MAP.convert(emptyMapString);
+ } catch (IllegalArgumentException e) {
+ Assert.fail("getStringValue() produced invalid JSON for Map: '" +
emptyMapString
+ + "'. Error: " + e.getMessage());
+ }
+ }
+
+ @Test
+ public void testGetStringValueProducesValidJsonForList() {
+ final List<Object> arrayList = new ArrayList<>();
+ final List<Object> emptyList = List.of();
+
+ final String arrayListString = FieldSpec.getStringValue(arrayList);
+ final String emptyListString = FieldSpec.getStringValue(emptyList);
+
+ Assert.assertNotNull(arrayListString, "String value for ArrayList should
not be null");
+ Assert.assertNotNull(emptyListString, "String value for empty List should
not be null");
+
+ // The string should be parseable by DataType.LIST.convert()
+ try {
+ DataType.LIST.convert(emptyListString);
+ } catch (IllegalArgumentException e) {
+ Assert.fail("getStringValue() produced invalid JSON for List: '" +
emptyListString
+ + "'. Error: " + e.getMessage());
+ }
+ }
+
+ /**
+ * Test that LIST default null value (empty List) can be converted to string
and back.
+ */
+ @Test
+ public void testListDefaultNullValueStringConversion() {
+ final List<?> defaultListValue =
FieldSpec.DEFAULT_COMPLEX_NULL_VALUE_OF_LIST;
+
+ // Verify the default is an empty list
+ Assert.assertTrue(defaultListValue.isEmpty(), "Default LIST null value
should be an empty list");
+
+ // Convert to string using getStringValue
+ final String stringValue = FieldSpec.getStringValue(defaultListValue);
+
+ // The string should be valid JSON that can be parsed back
+ Assert.assertEquals(stringValue, "[]",
+ "Empty List should serialize to '[]', not 'List()'");
+
+ // Verify it can be converted back
+ final Object convertedValue = DataType.LIST.convert(stringValue);
+ Assert.assertNotNull(convertedValue, "Converted value should not be null");
+ Assert.assertTrue(convertedValue instanceof List, "Converted value should
be a List");
+ Assert.assertTrue(((List<?>) convertedValue).isEmpty(), "Converted List
should be empty");
+ }
+
+ /**
+ * Test custom string default null values are correctly converted by
dataType.convert().
+ */
+ @DataProvider(name = "customStringDefaultNullValueCases")
+ public Object[][] customStringDefaultNullValueCases() {
+ return new Object[][]{
+ {FieldType.DIMENSION, DataType.INT, "42", 42},
+ {FieldType.DIMENSION, DataType.LONG, "123456789", 123456789L},
+ {FieldType.DIMENSION, DataType.FLOAT, "3.14", 3.14f},
+ {FieldType.DIMENSION, DataType.DOUBLE, "2.718281828", 2.718281828},
+ {FieldType.DIMENSION, DataType.BIG_DECIMAL, "99999.99999", new
BigDecimal("99999.99999")},
+ {FieldType.DIMENSION, DataType.STRING, "custom_default",
"custom_default"},
+ {FieldType.DIMENSION, DataType.JSON, "{\"key\":\"value\"}",
"{\"key\":\"value\"}"},
+ {FieldType.COMPLEX, DataType.MAP, "{\"a\":1}", Map.of("a", 1)},
+ {FieldType.COMPLEX, DataType.LIST, "[1,2,3]", List.of(1, 2, 3)},
+ };
+ }
+
+ @Test(dataProvider = "customStringDefaultNullValueCases")
+ public void testCustomStringDefaultNullValue(FieldType fieldType, DataType
dataType,
+ String stringDefaultNullValue, Object expectedValue) {
+ final Object actualValue = FieldSpec.getDefaultNullValue(fieldType,
dataType, stringDefaultNullValue);
+ Assert.assertEquals(actualValue, expectedValue,
+ String.format("Custom default null value conversion failed for
fieldType=%s, dataType=%s, input=%s",
+ fieldType, dataType, stringDefaultNullValue));
+ }
+
+ /**
+ * Test that invalid string default null values throw
IllegalArgumentException.
+ * This validates the error message format: "Cannot convert value: 'X' to
type: Y"
+ */
+ @Test
+ public void testInvalidMapStringDefaultNullValueThrowsException() {
+ // "Map()" is what Scala Map.toString() produces, which is NOT valid JSON
+ final String invalidMapString = "Map()";
+
+ try {
+ FieldSpec.getDefaultNullValue(FieldType.COMPLEX, DataType.MAP,
invalidMapString);
+ Assert.fail("Expected IllegalArgumentException for invalid MAP string: "
+ invalidMapString);
+ } catch (IllegalArgumentException e) {
+ // Verify the error message contains expected information
+ Assert.assertTrue(e.getMessage().contains("Cannot convert value"),
+ "Error message should contain 'Cannot convert value': " +
e.getMessage());
+ Assert.assertTrue(e.getMessage().contains("Map()"),
+ "Error message should contain the invalid value 'Map()': " +
e.getMessage());
+ Assert.assertTrue(e.getMessage().contains("MAP"),
+ "Error message should contain the target type 'MAP': " +
e.getMessage());
+ }
+ }
+
+ /**
+ * Test that getStringValue handles objects with Scala collection-like class
names.
+ * Since Scala collections don't implement java.util.Map/List, we detect
them by class name.
+ * This test uses a mock object to simulate Scala collection behavior.
+ */
+ @Test
+ public void testGetStringValueHandlesScalaStyleCollections() {
+ // Create a mock object that simulates Scala Map behavior:
+ // - Its class name starts with "scala.collection" pattern
+ // - Its toString() returns "Map()" instead of valid JSON
+ // We can't test with actual Scala classes without adding Scala dependency,
+ // so we verify the fix using a custom HashMap that overrides toString()
+ final Object scalaStyleMap = new HashMap<String, Object>() {
+ @Override
+ public String toString() {
+ return "Map()"; // This is what Scala Map.toString() returns
+ }
+ };
Review Comment:
This test verifies behavior for Java HashMap with overridden `toString()`,
but doesn't actually test the Scala collection detection logic since the class
name is still `java.util.HashMap$1` (anonymous class), not
`scala.collection.*`. The Scala detection code path at line 365
(`className.startsWith('scala.collection')`) is never exercised. Consider
adding a test with a mock class whose name actually starts with
'scala.collection' to cover this branch.
##########
pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java:
##########
@@ -350,9 +350,58 @@ public static String getStringValue(Object value) {
if (value instanceof byte[]) {
return BytesUtils.toHexString((byte[]) value);
}
+ // Handle Java collections (Map, List)
+ if (value instanceof Map || value instanceof List) {
+ try {
+ return JsonUtils.objectToString(value);
+ } catch (JsonProcessingException e) {
+ throw new RuntimeException(
+ String.format("Failed to serialize %s to JSON: %s",
value.getClass().getName(), e.getMessage()), e);
+ }
+ }
+ // Handle Scala collections - they don't implement java.util.Map/List
+ // JsonUtils doesn't have Scala module, so we handle empty collections
directly
+ final String className = value.getClass().getName();
+ if (className.startsWith("scala.collection")) {
+ return serializeScalaCollection(value, className);
+ }
return value.toString();
}
+ /**
+ * Serialize a Scala collection to JSON string.
+ * For empty collections, returns "{}" or "[]". For non-empty, throws an
error.
+ */
+ private static String serializeScalaCollection(final Object scalaCollection,
final String className) {
+ // Check if empty using reflection (all Scala collections have isEmpty
method)
+ try {
+ final boolean isEmpty = (boolean)
scalaCollection.getClass().getMethod("isEmpty").invoke(scalaCollection);
+ if (isEmpty) {
+ return getEmptyJsonForScalaCollection(className);
+ }
+ } catch (Exception e) {
+ // If we can't call isEmpty, fall through to error
+ }
+ throw new IllegalArgumentException(
+ String.format("Cannot serialize non-empty Scala collection %s. Use
JSON string instead.", className));
+ }
+
+ /**
+ * Returns the appropriate empty JSON representation for a Scala collection
based on its class name.
+ */
+ private static String getEmptyJsonForScalaCollection(final String className)
{
+ if (className.contains("Map")) {
Review Comment:
Using `contains('Map')` for class name matching can produce false positives.
A class like `MyMapper` or `EmailMapperService` would incorrectly be identified
as a Map type. Use more precise pattern matching such as
`className.matches('.*\\.Map(\\$|$)')` to ensure 'Map' appears as an actual
class name component.
```suggestion
if (className.matches(".*\\.Map(\\$|$)")) {
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]