This is an automated email from the ASF dual-hosted git repository.

gortiz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 30b5490db15 [spi] Fix Asymmetric Serialization in TextIndexConfig 
(#17628)
30b5490db15 is described below

commit 30b5490db1538f39ab60a375060272bbbe233d8f
Author: Anshul Singh <[email protected]>
AuthorDate: Wed Feb 4 16:39:29 2026 +0530

    [spi] Fix Asymmetric Serialization in TextIndexConfig (#17628)
---
 .../pinot/segment/spi/index/TextIndexConfig.java   |  45 +++++++-
 .../segment/spi/index/TextIndexConfigTest.java     | 115 +++++++++++++++++++++
 2 files changed, 155 insertions(+), 5 deletions(-)

diff --git 
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/TextIndexConfig.java
 
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/TextIndexConfig.java
index b3d17f3a0a9..471fd2c9894 100644
--- 
a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/TextIndexConfig.java
+++ 
b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/TextIndexConfig.java
@@ -153,8 +153,8 @@ public class TextIndexConfig extends IndexConfig {
       @JsonProperty("luceneUseCompoundFile") Boolean luceneUseCompoundFile,
       @JsonProperty("luceneMaxBufferSizeMB") Integer luceneMaxBufferSizeMB,
       @JsonProperty("luceneAnalyzerClass") String luceneAnalyzerClass,
-      @JsonProperty("luceneAnalyzerClassArgs") String luceneAnalyzerClassArgs,
-      @JsonProperty("luceneAnalyzerClassArgTypes") String 
luceneAnalyzerClassArgTypes,
+      @JsonProperty("luceneAnalyzerClassArgs") Object luceneAnalyzerClassArgs,
+      @JsonProperty("luceneAnalyzerClassArgTypes") Object 
luceneAnalyzerClassArgTypes,
       @JsonProperty("luceneQueryParserClass") String luceneQueryParserClass,
       @JsonProperty("enablePrefixSuffixMatchingInPhraseQueries") Boolean 
enablePrefixSuffixMatchingInPhraseQueries,
       @JsonProperty("reuseMutableIndex") Boolean reuseMutableIndex,
@@ -179,9 +179,11 @@ public class TextIndexConfig extends IndexConfig {
 
     // Note that we cannot depend on jackson's default behavior to 
automatically coerce the comma delimited args to
     // List<String>. This is because the args may contain comma and other 
special characters such as space. Therefore,
-    // we use our own csv parser to parse the values directly.
-    _luceneAnalyzerClassArgs = CsvParser.parse(luceneAnalyzerClassArgs, true, 
false);
-    _luceneAnalyzerClassArgTypes = 
CsvParser.parse(luceneAnalyzerClassArgTypes, false, true);
+    // we use our own csv parser to parse the values directly when the input 
is a String.
+    // However, when round-tripping (serializing then deserializing), Jackson 
may produce a List<String> directly,
+    // so we also handle that case.
+    _luceneAnalyzerClassArgs = parseToList(luceneAnalyzerClassArgs, true, 
false);
+    _luceneAnalyzerClassArgTypes = parseToList(luceneAnalyzerClassArgTypes, 
false, true);
     _luceneQueryParserClass = luceneQueryParserClass == null
         ? FieldConfig.TEXT_INDEX_DEFAULT_LUCENE_QUERY_PARSER_CLASS : 
luceneQueryParserClass;
     _enablePrefixSuffixMatchingInPhraseQueries =
@@ -198,6 +200,39 @@ public class TextIndexConfig extends IndexConfig {
     _storeInSegmentFile = storeInSegmentFile == null ? 
LUCENE_INDEX_DEFAULT_STORE_IN_SEGMENT_FILE : storeInSegmentFile;
   }
 
+  /**
+   * Parses the input value to a List of Strings.
+   * Handles both String (CSV format) and List<String> (from JSON array) 
inputs.
+   * This enables proper round-trip serialization/deserialization since the 
getter returns List<String>
+   * which Jackson serializes as a JSON array, but the original input format 
was CSV string.
+   *
+   * @param value the input value (can be String, List, or null)
+   * @param escapeComma if true, don't split on escaped commas when parsing 
String
+   * @param trim whether to trim each value
+   * @return parsed list of strings, empty list if input is null or empty
+   */
+  @SuppressWarnings("unchecked")
+  private static List<String> parseToList(final @Nullable Object value, final 
boolean escapeComma, final boolean trim) {
+    if (value == null) {
+      return Collections.emptyList();
+    }
+    if (value instanceof List) {
+      final List<?> list = (List<?>) value;
+      if (list.isEmpty()) {
+        return Collections.emptyList();
+      }
+      // Convert each element to String and optionally trim
+      final List<String> result = new ArrayList<>();
+      for (final Object item : list) {
+        final String strItem = item == null ? "" : item.toString();
+        result.add(trim ? strItem.trim() : strItem);
+      }
+      return result;
+    }
+    // String or other types - use CSV parser
+    return CsvParser.parse(value.toString(), escapeComma, trim);
+  }
+
   public FSTType getFstType() {
     return _fstType;
   }
diff --git 
a/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/index/TextIndexConfigTest.java
 
b/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/index/TextIndexConfigTest.java
index 96f57dcccf9..0bc0b28a46e 100644
--- 
a/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/index/TextIndexConfigTest.java
+++ 
b/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/index/TextIndexConfigTest.java
@@ -121,4 +121,119 @@ public class TextIndexConfigTest {
     assertFalse(config.isLuceneUseCompoundFile(), "Unexpected 
luceneUseCompoundFile");
     assertEquals(config.getLuceneMaxBufferSizeMB(), 1024, "Unexpected 
luceneMaxBufferSize");
   }
+
+  @Test
+  public void testRoundTripSerializationWithArrayValues()
+      throws JsonProcessingException {
+    // This test verifies that TextIndexConfig can be round-tripped through 
JSON serialization.
+    // When serialized, luceneAnalyzerClassArgs/ArgTypes are output as arrays 
(from List<String> getters).
+    // When deserialized, the constructor must accept both String (CSV) and 
Array (List) formats.
+    final String confStrWithArrays = "{\n"
+        + "        \"disabled\": false,\n"
+        + "        \"luceneUseCompoundFile\": true,\n"
+        + "        \"luceneMaxBufferSizeMB\": 500,\n"
+        + "        \"luceneAnalyzerClass\": 
\"org.apache.lucene.analysis.standard.StandardAnalyzer\",\n"
+        + "        \"luceneAnalyzerClassArgs\": [],\n"
+        + "        \"luceneAnalyzerClassArgTypes\": []\n"
+        + "}";
+    final TextIndexConfig config = JsonUtils.stringToObject(confStrWithArrays, 
TextIndexConfig.class);
+
+    assertFalse(config.isDisabled(), "Unexpected disabled");
+    assertTrue(config.isLuceneUseCompoundFile(), "Unexpected 
luceneUseCompoundFile");
+    assertEquals(config.getLuceneMaxBufferSizeMB(), 500, "Unexpected 
luceneMaxBufferSizeMB");
+    assertEquals(config.getLuceneAnalyzerClass(),
+        "org.apache.lucene.analysis.standard.StandardAnalyzer", "Unexpected 
luceneAnalyzerClass");
+    assertTrue(config.getLuceneAnalyzerClassArgs().isEmpty(), "Unexpected 
luceneAnalyzerClassArgs");
+    assertTrue(config.getLuceneAnalyzerClassArgTypes().isEmpty(), "Unexpected 
luceneAnalyzerClassArgTypes");
+
+    // Now test with non-empty arrays
+    final String confStrWithNonEmptyArrays = "{\n"
+        + "        \"luceneAnalyzerClassArgs\": [\"arg1\", \"arg2\"],\n"
+        + "        \"luceneAnalyzerClassArgTypes\": [\"java.lang.String\", 
\"java.lang.Integer\"]\n"
+        + "}";
+    final TextIndexConfig configWithArgs = 
JsonUtils.stringToObject(confStrWithNonEmptyArrays, TextIndexConfig.class);
+
+    assertEquals(configWithArgs.getLuceneAnalyzerClassArgs(), 
Lists.newArrayList("arg1", "arg2"),
+        "Unexpected luceneAnalyzerClassArgs");
+    assertEquals(configWithArgs.getLuceneAnalyzerClassArgTypes(),
+        Lists.newArrayList("java.lang.String", "java.lang.Integer"),
+        "Unexpected luceneAnalyzerClassArgTypes");
+  }
+
+  @Test
+  public void testRoundTripSerializationWithStringValues()
+      throws JsonProcessingException {
+    // Test backward compatibility - original CSV string format should still 
work
+    final String confStrWithStrings = "{\n"
+        + "        \"luceneAnalyzerClassArgs\": \"arg1,arg2\",\n"
+        + "        \"luceneAnalyzerClassArgTypes\": 
\"java.lang.String,java.lang.Integer\"\n"
+        + "}";
+    final TextIndexConfig config = 
JsonUtils.stringToObject(confStrWithStrings, TextIndexConfig.class);
+
+    assertEquals(config.getLuceneAnalyzerClassArgs(), 
Lists.newArrayList("arg1", "arg2"),
+        "Unexpected luceneAnalyzerClassArgs");
+    assertEquals(config.getLuceneAnalyzerClassArgTypes(),
+        Lists.newArrayList("java.lang.String", "java.lang.Integer"),
+        "Unexpected luceneAnalyzerClassArgTypes");
+  }
+
+  @Test
+  public void testFullRoundTrip()
+      throws JsonProcessingException {
+    // Create config, serialize to JSON, deserialize back - should work
+    final String originalConfStr = "{\n"
+        + "        \"luceneAnalyzerClassArgs\": \"arg1,arg2\",\n"
+        + "        \"luceneAnalyzerClassArgTypes\": 
\"java.lang.String,java.lang.Integer\"\n"
+        + "}";
+    final TextIndexConfig originalConfig = 
JsonUtils.stringToObject(originalConfStr, TextIndexConfig.class);
+
+    // Serialize to JSON (getters return List<String>, so it becomes JSON 
arrays)
+    final String serialized = JsonUtils.objectToString(originalConfig);
+
+    // Deserialize back - this was the failing case before the fix
+    final TextIndexConfig deserializedConfig = 
JsonUtils.stringToObject(serialized, TextIndexConfig.class);
+
+    assertEquals(deserializedConfig.getLuceneAnalyzerClassArgs(), 
originalConfig.getLuceneAnalyzerClassArgs(),
+        "Round-trip failed for luceneAnalyzerClassArgs");
+    assertEquals(deserializedConfig.getLuceneAnalyzerClassArgTypes(), 
originalConfig.getLuceneAnalyzerClassArgTypes(),
+        "Round-trip failed for luceneAnalyzerClassArgTypes");
+  }
+
+  @Test
+  public void testEmptyTextConfigRoundTrip()
+      throws JsonProcessingException {
+    // When an empty text config is created from "{}", it gets default values.
+    // When serialized, luceneAnalyzerClassArgs becomes [] (empty array).
+    // When deserialized, this was failing with:
+    // "Cannot deserialize value of type `java.lang.String` from Array value"
+    final String emptyConfig = "{}";
+    final TextIndexConfig originalConfig = 
JsonUtils.stringToObject(emptyConfig, TextIndexConfig.class);
+
+    // Verify defaults are applied
+    assertFalse(originalConfig.isDisabled());
+    assertEquals(originalConfig.getLuceneAnalyzerClass(),
+        "org.apache.lucene.analysis.standard.StandardAnalyzer");
+    assertTrue(originalConfig.getLuceneAnalyzerClassArgs().isEmpty());
+    assertTrue(originalConfig.getLuceneAnalyzerClassArgTypes().isEmpty());
+
+    // Serialize to JSON - this produces the problematic format with empty 
arrays
+    final String serialized = JsonUtils.objectToString(originalConfig);
+
+    // Verify serialized JSON contains the array format that was causing the 
bug
+    assertTrue(serialized.contains("\"luceneAnalyzerClassArgs\":[]"),
+        "Serialized JSON should contain luceneAnalyzerClassArgs as empty 
array");
+    assertTrue(serialized.contains("\"luceneAnalyzerClassArgTypes\":[]"),
+        "Serialized JSON should contain luceneAnalyzerClassArgTypes as empty 
array");
+
+    // Deserialize back
+    final TextIndexConfig deserializedConfig = 
JsonUtils.stringToObject(serialized, TextIndexConfig.class);
+
+    // Verify round-trip preserves the config
+    assertEquals(deserializedConfig.isDisabled(), originalConfig.isDisabled());
+    assertEquals(deserializedConfig.getLuceneAnalyzerClass(), 
originalConfig.getLuceneAnalyzerClass());
+    assertEquals(deserializedConfig.getLuceneAnalyzerClassArgs(), 
originalConfig.getLuceneAnalyzerClassArgs());
+    assertEquals(deserializedConfig.getLuceneAnalyzerClassArgTypes(), 
originalConfig.getLuceneAnalyzerClassArgTypes());
+    assertEquals(deserializedConfig.isLuceneUseCompoundFile(), 
originalConfig.isLuceneUseCompoundFile());
+    assertEquals(deserializedConfig.getLuceneMaxBufferSizeMB(), 
originalConfig.getLuceneMaxBufferSizeMB());
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to