This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new ec3d6ed387 add support to differentiate null and emptyLists for 
multi-value columns in avro decoder (#13572)
ec3d6ed387 is described below

commit ec3d6ed387332e802634bb87e13c5dff55ff6087
Author: Qiaochu Liu <[email protected]>
AuthorDate: Fri Sep 13 12:25:34 2024 -0700

    add support to differentiate null and emptyLists for multi-value columns in 
avro decoder (#13572)
---
 .../inputformat/avro/AvroRecordExtractor.java      |  1 +
 .../avro/AvroRecordExtractorConfig.java            | 10 ++++
 .../avro/AvroRecordToPinotRowGeneratorTest.java    | 62 ++++++++++++++++++++++
 .../spi/data/readers/BaseRecordExtractor.java      |  4 +-
 .../spi/data/readers/RecordExtractorConfig.java    |  3 +-
 5 files changed, 78 insertions(+), 2 deletions(-)

diff --git 
a/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroRecordExtractor.java
 
b/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroRecordExtractor.java
index d1a4dea0dc..770029b3e3 100644
--- 
a/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroRecordExtractor.java
+++ 
b/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroRecordExtractor.java
@@ -48,6 +48,7 @@ public class AvroRecordExtractor extends 
BaseRecordExtractor<GenericRecord> {
     AvroRecordExtractorConfig config = (AvroRecordExtractorConfig) 
recordExtractorConfig;
     if (config != null) {
       _applyLogicalTypes = config.isEnableLogicalTypes();
+      _differentiateNullAndEmptyForMV = 
config.isDifferentiateNullAndEmptyForMV();
     }
     if (fields == null || fields.isEmpty()) {
       _extractAll = true;
diff --git 
a/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroRecordExtractorConfig.java
 
b/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroRecordExtractorConfig.java
index da26b1048a..1882196777 100644
--- 
a/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroRecordExtractorConfig.java
+++ 
b/pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroRecordExtractorConfig.java
@@ -27,10 +27,12 @@ import 
org.apache.pinot.spi.data.readers.RecordExtractorConfig;
  */
 public class AvroRecordExtractorConfig implements RecordExtractorConfig {
   private boolean _enableLogicalTypes = false;
+  private boolean _differentiateNullAndEmptyForMV = false;
 
   @Override
   public void init(Map<String, String> props) {
     _enableLogicalTypes = 
Boolean.parseBoolean(props.get("enableLogicalTypes"));
+    _differentiateNullAndEmptyForMV = 
Boolean.parseBoolean(props.get("differentiateNullAndEmptyForMV"));
   }
 
   public boolean isEnableLogicalTypes() {
@@ -40,4 +42,12 @@ public class AvroRecordExtractorConfig implements 
RecordExtractorConfig {
   public void setEnableLogicalTypes(boolean enableLogicalTypes) {
     _enableLogicalTypes = enableLogicalTypes;
   }
+
+  public boolean isDifferentiateNullAndEmptyForMV() {
+    return _differentiateNullAndEmptyForMV;
+  }
+
+  public void setDifferentiateNullAndEmptyForMV(boolean 
differentiateNullAndEmptyForMV) {
+    _differentiateNullAndEmptyForMV = differentiateNullAndEmptyForMV;
+  }
 }
diff --git 
a/pinot-plugins/pinot-input-format/pinot-avro-base/src/test/java/org/apache/pinot/plugin/inputformat/avro/AvroRecordToPinotRowGeneratorTest.java
 
b/pinot-plugins/pinot-input-format/pinot-avro-base/src/test/java/org/apache/pinot/plugin/inputformat/avro/AvroRecordToPinotRowGeneratorTest.java
index 45cf943411..17402272a2 100644
--- 
a/pinot-plugins/pinot-input-format/pinot-avro-base/src/test/java/org/apache/pinot/plugin/inputformat/avro/AvroRecordToPinotRowGeneratorTest.java
+++ 
b/pinot-plugins/pinot-input-format/pinot-avro-base/src/test/java/org/apache/pinot/plugin/inputformat/avro/AvroRecordToPinotRowGeneratorTest.java
@@ -19,12 +19,15 @@
 package org.apache.pinot.plugin.inputformat.avro;
 
 import com.google.common.collect.Sets;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.Set;
 import org.apache.avro.Schema;
+import org.apache.avro.SchemaBuilder;
 import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericRecord;
 import org.apache.pinot.spi.data.readers.GenericRow;
 import org.testng.Assert;
 import org.testng.annotations.Test;
@@ -52,4 +55,63 @@ public class AvroRecordToPinotRowGeneratorTest {
         
genericRow.getFieldToValueMap().keySet().containsAll(Arrays.asList("incomingTime",
 "outgoingTime")));
     Assert.assertEquals(genericRow.getValue("incomingTime"), 12345L);
   }
+
+  @Test
+  public void testNoDifferentiateNullAndEmptyForMultiValueFields() {
+    AvroRecordExtractorConfig config = new AvroRecordExtractorConfig();
+    AvroRecordExtractor avroRecordExtractor = new AvroRecordExtractor();
+    avroRecordExtractor.init(null, config);
+
+    Schema schema =
+        
SchemaBuilder.record("GenericRow").fields().name("arrField1").type().array().items().stringType().noDefault()
+            
.name("arrField2").type().array().items().stringType().noDefault().name("arrField3").type().array().items()
+            .stringType().noDefault().endRecord();
+
+    GenericRecord genericRecord = new GenericData.Record(schema);
+    List<String> arrayData1 = Arrays.asList("value1", "value2", "value3");
+    List<String> arrayData2 = null;
+    List<String> arrayData3 = new ArrayList<>();
+
+    GenericRow genericRow = new GenericRow();
+    genericRecord.put("arrField1", arrayData1);
+    genericRecord.put("arrField2", arrayData2);
+    genericRecord.put("arrField3", arrayData3);
+
+    avroRecordExtractor.extract(genericRecord, genericRow);
+    Assert.assertTrue(
+        
genericRow.getFieldToValueMap().keySet().containsAll(Arrays.asList("arrField1", 
"arrField2", "arrField3")));
+    Assert.assertEquals(genericRow.getValue("arrField1"), 
arrayData1.toArray());
+    Assert.assertEquals(genericRow.getValue("arrField2"), null);
+    Assert.assertEquals(genericRow.getValue("arrField3"), null);
+  }
+
+  @Test
+  public void testDifferentiateNullAndEmptyForMultiValueFields() {
+    AvroRecordExtractorConfig config = new AvroRecordExtractorConfig();
+    config.setDifferentiateNullAndEmptyForMV(true);
+    AvroRecordExtractor avroRecordExtractor = new AvroRecordExtractor();
+    avroRecordExtractor.init(null, config);
+
+    Schema schema =
+        
SchemaBuilder.record("GenericRow").fields().name("arrField1").type().array().items().stringType().noDefault()
+            
.name("arrField2").type().array().items().stringType().noDefault().name("arrField3").type().array().items()
+            .stringType().noDefault().endRecord();
+
+    GenericRecord genericRecord = new GenericData.Record(schema);
+    List<String> arrayData1 = Arrays.asList("value1", "value2", "value3");
+    List<String> arrayData2 = null;
+    List<String> arrayData3 = new ArrayList<>();
+
+    GenericRow genericRow = new GenericRow();
+    genericRecord.put("arrField1", arrayData1);
+    genericRecord.put("arrField2", arrayData2);
+    genericRecord.put("arrField3", arrayData3);
+
+    avroRecordExtractor.extract(genericRecord, genericRow);
+    Assert.assertTrue(
+        
genericRow.getFieldToValueMap().keySet().containsAll(Arrays.asList("arrField1", 
"arrField2", "arrField3")));
+    Assert.assertEquals(genericRow.getValue("arrField1"), 
arrayData1.toArray());
+    Assert.assertEquals(genericRow.getValue("arrField2"), null);
+    Assert.assertEquals(genericRow.getValue("arrField3"), new String[0]);
+  }
 }
diff --git 
a/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/BaseRecordExtractor.java
 
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/BaseRecordExtractor.java
index f4efdb6d2d..39c6a6da7c 100644
--- 
a/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/BaseRecordExtractor.java
+++ 
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/BaseRecordExtractor.java
@@ -33,6 +33,8 @@ import javax.annotation.Nullable;
  */
 public abstract class BaseRecordExtractor<T> implements RecordExtractor<T> {
 
+  protected boolean _differentiateNullAndEmptyForMV = false;
+
   /**
    * Converts the field value to either a single value (string, number, 
byte[]), multi value (Object[]) or a Map.
    * Returns {@code null} if the value is an empty array/collection/map.
@@ -107,7 +109,7 @@ public abstract class BaseRecordExtractor<T> implements 
RecordExtractor<T> {
   protected Object convertMultiValue(Object value) {
     Collection collection = (Collection) value;
     if (collection.isEmpty()) {
-      return null;
+      return _differentiateNullAndEmptyForMV ? new Object[0] : null;
     }
 
     int numValues = collection.size();
diff --git 
a/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/RecordExtractorConfig.java
 
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/RecordExtractorConfig.java
index f33288de76..b4c265e351 100644
--- 
a/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/RecordExtractorConfig.java
+++ 
b/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/RecordExtractorConfig.java
@@ -25,5 +25,6 @@ import java.util.Map;
  * Interface for configs of {@link RecordExtractor}
  */
 public interface RecordExtractorConfig {
-  default void init(Map<String, String> props) { }
+  default void init(Map<String, String> props) {
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to