Copilot commented on code in PR #18378:
URL: https://github.com/apache/pinot/pull/18378#discussion_r3165699445


##########
pinot-plugins/pinot-input-format/pinot-parquet/src/test/java/org/apache/pinot/plugin/inputformat/parquet/ParquetCollectionAndEquivalenceTest.java:
##########
@@ -302,32 +295,24 @@ public void 
testAvroAndNativeReadersAgreeOnAllTestResources()
     int compared = 0;
     for (File file : parquetFiles) {
       String fileName = file.getName();
-      if (KNOWN_TIMESTAMP_DIVERGENCE_FILES.contains(fileName)) {
-        continue;
-      }
       List<GenericRow> rowsAvro;
-      List<GenericRow> rowsNative;
       try {
         rowsAvro = readAll(new ParquetAvroRecordReader(), file);
       } catch (Throwable t) {
         // Avro reader rejects some legacy schemas; only the native reader 
handles those. Skip rather than
         // fail since this test verifies cross-reader agreement, not 
single-reader coverage.
         continue;
       }
-      try {
-        rowsNative = readAll(new ParquetNativeRecordReader(), file);
-      } catch (Throwable t) {
-        continue;
-      }
+      // All files readable by Avro reader should also be readable by native 
reader
+      List<GenericRow> rowsNative = readAll(new ParquetNativeRecordReader(), 
file);
       try {
         assertReadersAgree(fileName, rowsAvro, rowsNative);
         compared++;
       } catch (AssertionError e) {
-        Map<String, Object> firstAvro = rowsAvro.isEmpty() ? 
Collections.emptyMap()
-            : canonicalize(rowsAvro.get(0).getFieldToValueMap());
-        Map<String, Object> firstNative = rowsNative.isEmpty() ? 
Collections.emptyMap()
-            : canonicalize(rowsNative.get(0).getFieldToValueMap());
-        failures.add(fileName + ": " + e.getMessage() + "\n      avro  =" + 
firstAvro
+        Map<String, Object> firstAvro = 
wrapArray(rowsAvro.get(0).getFieldToValueMap());
+        Map<String, Object> firstNative = 
wrapArray(rowsNative.get(0).getFieldToValueMap());

Review Comment:
   In the failure reporting path, `wrapArray(rowsAvro.get(0)...)` / 
`wrapArray(rowsNative.get(0)...)` will throw `IndexOutOfBoundsException` if one 
reader returns 0 rows (e.g., the assertion fails on row-count mismatch where 
one side is empty), which can mask the real assertion failure. Guard these with 
`isEmpty()` and use an empty map when there is no first row.
   



##########
pinot-plugins/pinot-input-format/pinot-thrift/src/test/java/org/apache/pinot/plugin/inputformat/thrift/ThriftRecordExtractorTest.java:
##########
@@ -18,197 +18,234 @@
  */
 package org.apache.pinot.plugin.inputformat.thrift;
 
-import com.google.common.collect.Sets;
-import java.io.BufferedOutputStream;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
-import org.apache.pinot.spi.data.readers.AbstractRecordExtractorTest;
 import org.apache.pinot.spi.data.readers.GenericRow;
-import org.apache.pinot.spi.data.readers.RecordReader;
-import org.apache.thrift.TException;
-import org.apache.thrift.protocol.TBinaryProtocol;
-import org.apache.thrift.transport.TIOStreamTransport;
+import org.apache.thrift.TBase;
+import org.apache.thrift.TFieldIdEnum;
+import org.apache.thrift.meta_data.FieldMetaData;
 import org.testng.annotations.Test;
 
-import static org.testng.Assert.assertSame;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNull;
 
 
-/**
- * Tests for the {@link ThriftRecordExtractor}
- */
-public class ThriftRecordExtractorTest extends AbstractRecordExtractorTest {
-
-  private File _tempFile = new File(_tempDir, "test_complex_thrift.data");
-
-  private static final String INT_FIELD = "intField";
-  private static final String LONG_FIELD = "longField";
-  private static final String BOOL_FIELD = "booleanField";
-  private static final String DOUBLE_FIELD = "doubleField";
-  private static final String STRING_FIELD = "stringField";
-  private static final String ENUM_FIELD = "enumField";
-  private static final String OPTIONAL_STRING_FIELD = "optionalStringField";
-  private static final String NESTED_STRUCT_FIELD = "nestedStructField";
-  private static final String SIMPLE_LIST = "simpleListField";
-  private static final String COMPLEX_LIST = "complexListField";
-  private static final String SIMPLE_MAP = "simpleMapField";
-  private static final String COMPLEX_MAP = "complexMapField";
-  private static final String NESTED_STRING_FIELD = "nestedStringField";
-  private static final String NESTED_INT_FIELD = "nestedIntField";
-
-  @Override
-  protected List<Map<String, Object>> getInputRecords() {
-    return Arrays.asList(createRecord1(), createRecord2());
-  }
-
-  /// Verify that thrift primitive types come back as their native Java boxed 
types — Boolean stays Boolean, ints stay
-  /// Integer, etc. — rather than being stringified by `convertSingleValue`.
+/// Tests [ThriftRecordExtractor] — see its class Javadoc for the thrift 
source type → Java output type
+/// matrix. Out of scope: thrift `i8` / `binary` are not declared in 
`complex_types.thrift`; they follow the
+/// same `convert` contract — mirror the pattern below if added.
+public class ThriftRecordExtractorTest {
+
+  // === Single value — order follows the type list in the class Javadoc ===
+
   @Test
-  public void testPrimitiveTypePreservation()
-      throws IOException {
-    try (RecordReader reader = createRecordReader(_sourceFieldNames)) {
-      GenericRow row = new GenericRow();
-      reader.next(row);
-      assertSame(row.getValue(BOOL_FIELD).getClass(), Boolean.class, "thrift 
bool → Boolean");
-      assertSame(row.getValue(INT_FIELD).getClass(), Integer.class, "thrift 
i32 → Integer");
-      assertSame(row.getValue(LONG_FIELD).getClass(), Long.class, "thrift i64 
→ Long");
-      assertSame(row.getValue(DOUBLE_FIELD).getClass(), Double.class, "thrift 
double → Double");
-    }
+  public void testBooleanPreserved() {
+    ComplexTypes record = baseRecord();
+    record.setBooleanField(true);
+    Object result = extract(record, "booleanField");
+    assertEquals(result, true);
   }
 
-  @Override
-  protected Set<String> getSourceFields() {
-    return Sets
-        .newHashSet(INT_FIELD, LONG_FIELD, BOOL_FIELD, DOUBLE_FIELD, 
STRING_FIELD, ENUM_FIELD, OPTIONAL_STRING_FIELD,
-            NESTED_STRUCT_FIELD, SIMPLE_LIST, COMPLEX_LIST, SIMPLE_MAP, 
COMPLEX_MAP);
-  }
-
-  /**
-   * Creates a ThriftRecordReader
-   */
-  @Override
-  protected RecordReader createRecordReader(Set<String> fieldsToRead)
-      throws IOException {
-    ThriftRecordReader recordReader = new ThriftRecordReader();
-    recordReader.init(_tempFile, getSourceFields(), 
getThriftRecordReaderConfig());
-    return recordReader;
-  }
-
-  private ThriftRecordReaderConfig getThriftRecordReaderConfig() {
-    ThriftRecordReaderConfig config = new ThriftRecordReaderConfig();
-    
config.setThriftClass("org.apache.pinot.plugin.inputformat.thrift.ComplexTypes");
-    return config;
-  }
-
-  /**
-   * Create a data input file using input records containing various Thrift 
record types
-   */
-  @Override
-  protected void createInputFile()
-      throws IOException {
-    List<ComplexTypes> thriftRecords = new ArrayList<>(2);
-
-    for (Map<String, Object> inputRecord : _inputRecords) {
-      ComplexTypes thriftRecord = new ComplexTypes();
-      thriftRecord.setIntField((int) inputRecord.get(INT_FIELD));
-      thriftRecord.setLongField((long) inputRecord.get(LONG_FIELD));
-
-      Map<String, Object> nestedStructValues = (Map<String, Object>) 
inputRecord.get(NESTED_STRUCT_FIELD);
-      thriftRecord.setNestedStructField(createNestedType((String) 
nestedStructValues.get(NESTED_STRING_FIELD),
-          (int) nestedStructValues.get(NESTED_INT_FIELD)));
-
-      thriftRecord.setSimpleListField((List<String>) 
inputRecord.get(SIMPLE_LIST));
-
-      List<NestedType> nestedTypeList = new ArrayList<>();
-      for (Map element : (List<Map>) inputRecord.get(COMPLEX_LIST)) {
-        nestedTypeList
-            .add(createNestedType((String) element.get(NESTED_STRING_FIELD), 
(Integer) element.get(NESTED_INT_FIELD)));
-      }
-
-      thriftRecord.setComplexListField(nestedTypeList);
-      thriftRecord.setBooleanField((Boolean) inputRecord.get(BOOL_FIELD));
-      thriftRecord.setDoubleField((Double) inputRecord.get(DOUBLE_FIELD));
-      thriftRecord.setStringField((String) inputRecord.get(STRING_FIELD));
-      thriftRecord.setEnumField(TestEnum.valueOf((String) 
inputRecord.get(ENUM_FIELD)));
-      thriftRecord.setSimpleMapField((Map<String, Integer>) 
inputRecord.get(SIMPLE_MAP));
-
-      Map<String, NestedType> complexMap = new HashMap<>();
-      for (Map.Entry<String, Map<String, Object>> entry : ((Map<String, 
Map<String, Object>>) inputRecord
-          .get(COMPLEX_MAP)).entrySet()) {
-        complexMap.put(entry.getKey(), createNestedType((String) 
entry.getValue().get(NESTED_STRING_FIELD),
-            (int) entry.getValue().get(NESTED_INT_FIELD)));
-      }
-      thriftRecord.setComplexMapField(complexMap);
-      thriftRecords.add(thriftRecord);
-    }
+  @Test
+  public void testShortWidenedToInteger() {
+    // ComplexTypes has no top-level i16 field; ThriftSampleData uses i16 as 
list element type. The extractor
+    // widens `Short` → `Integer` so all small-int thrift types unify behind 
`Integer`.
+    ThriftSampleData record = new ThriftSampleData();
+    record.setActive(false);
+    record.setCreated_at(0L);
+    record.setId(0);
+    record.setGroups(List.of((short) 1, (short) 4));
+    Object[] result = (Object[]) extract(record, "groups");
+    assertEquals(result.length, 2);
+    assertEquals(result, new Object[]{1, 4});
+  }
 
-    try (BufferedOutputStream bufferedOut = new BufferedOutputStream(new 
FileOutputStream(_tempFile))) {
-      TBinaryProtocol binaryOut = new TBinaryProtocol(new 
TIOStreamTransport(bufferedOut));
-      for (ComplexTypes record : thriftRecords) {
-        record.write(binaryOut);
-      }
-    } catch (TException e) {
-      throw new IOException(e);
-    }
+  @Test
+  public void testIntegerPreserved() {
+    ComplexTypes record = baseRecord();
+    record.setIntField(42);
+    Object result = extract(record, "intField");
+    assertEquals(result, 42);
   }
 
-  private Map<String, Object> createRecord1() {
-    Map<String, Object> record = new HashMap<>();
-    record.put(STRING_FIELD, "hello");
-    record.put(INT_FIELD, 10);
-    record.put(LONG_FIELD, 1000L);
-    record.put(DOUBLE_FIELD, 1.0);
-    record.put(BOOL_FIELD, false);
-    record.put(ENUM_FIELD, TestEnum.DELTA.toString());
-    record.put(NESTED_STRUCT_FIELD, createNestedMap(NESTED_STRING_FIELD, "ice 
cream", NESTED_INT_FIELD, 5));
-    record.put(SIMPLE_LIST, Arrays.asList("aaa", "bbb", "ccc"));
-    record.put(COMPLEX_LIST, 
Arrays.asList(createNestedMap(NESTED_STRING_FIELD, "hows", NESTED_INT_FIELD, 
10),
-        createNestedMap(NESTED_STRING_FIELD, "it", NESTED_INT_FIELD, 20),
-        createNestedMap(NESTED_STRING_FIELD, "going", NESTED_INT_FIELD, 30)));
-    record.put(SIMPLE_MAP, createNestedMap("Tuesday", 3, "Wednesday", 4));
-    record.put(COMPLEX_MAP,
-        createNestedMap("fruit1", createNestedMap(NESTED_STRING_FIELD, 
"apple", NESTED_INT_FIELD, 1), "fruit2",
-            createNestedMap(NESTED_STRING_FIELD, "orange", NESTED_INT_FIELD, 
2)));
-    return record;
+  @Test
+  public void testLongPreserved() {
+    ComplexTypes record = baseRecord();
+    record.setLongField(1_588_469_340_000L);
+    Object result = extract(record, "longField");
+    assertEquals(result, 1_588_469_340_000L);
   }
 
-  private Map<String, Object> createRecord2() {
-    Map<String, Object> record = new HashMap<>();
-    record.put(STRING_FIELD, "world");
-    record.put(INT_FIELD, 20);
-    record.put(LONG_FIELD, 2000L);
-    record.put(DOUBLE_FIELD, 2.0);
-    record.put(BOOL_FIELD, false);
-    record.put(ENUM_FIELD, TestEnum.GAMMA.toString());
-    record.put(NESTED_STRUCT_FIELD, createNestedMap(NESTED_STRING_FIELD, "ice 
cream", NESTED_INT_FIELD, 5));
-    record.put(SIMPLE_LIST, Arrays.asList("aaa", "bbb", "ccc"));
-    record.put(COMPLEX_LIST, 
Arrays.asList(createNestedMap(NESTED_STRING_FIELD, "hows", NESTED_INT_FIELD, 
10),
-        createNestedMap(NESTED_STRING_FIELD, "it", NESTED_INT_FIELD, 20),
-        createNestedMap(NESTED_STRING_FIELD, "going", NESTED_INT_FIELD, 30)));
-    record.put(SIMPLE_MAP, createNestedMap("Tuesday", 3, "Wednesday", 4));
-    record.put(COMPLEX_MAP,
-        createNestedMap("fruit1", createNestedMap(NESTED_STRING_FIELD, 
"apple", NESTED_INT_FIELD, 1), "fruit2",
-            createNestedMap(NESTED_STRING_FIELD, "orange", NESTED_INT_FIELD, 
2)));
-    return record;
+  @Test
+  public void testDoublePreserved() {
+    ComplexTypes record = baseRecord();
+    record.setDoubleField(1.5d);
+    Object result = extract(record, "doubleField");
+    assertEquals(result, 1.5d);
+  }
+
+  @Test
+  public void testStringPreserved() {
+    ComplexTypes record = baseRecord();
+    record.setStringField("hello");
+    Object result = extract(record, "stringField");
+    assertEquals(result, "hello");
+  }
+
+  @Test
+  public void testEnumExtractedAsString() {
+    // BaseRecordExtractor.convertSingleValue falls back to toString() for 
unknown types; TestEnum.toString()
+    // returns the enum constant name.
+    ComplexTypes record = baseRecord();
+    record.setEnumField(TestEnum.GAMMA);
+    Object result = extract(record, "enumField");
+    assertEquals(result, "GAMMA");
+  }
+
+  @Test
+  public void testNullForUnsetOptionalField() {
+    // optionalStringField is left unset; thrift returns null for unset 
optional fields.
+    assertNull(extract(baseRecord(), "optionalStringField"));
+  }
+
+  // === Nested struct → Map ===
+
+  @Test
+  public void testNestedStructExtractedAsMap() {
+    ComplexTypes record = baseRecord();
+    record.setNestedStructField(nestedOf("hello", 42));
+    Map<?, ?> result = (Map<?, ?>) extract(record, "nestedStructField");
+    assertEquals(result.get("nestedStringField"), "hello");
+    assertEquals(result.get("nestedIntField"), 42);
   }
 
-  private Map<String, Object> createNestedMap(String key1, Object value1, 
String key2, Object value2) {
-    Map<String, Object> nestedMap = new HashMap<>(2);
-    nestedMap.put(key1, value1);
-    nestedMap.put(key2, value2);
-    return nestedMap;
+  // === List / Set (thrift list / set) → Object[] ===
+
+  @Test
+  public void testListOfStrings() {
+    ComplexTypes record = baseRecord();
+    record.setSimpleListField(List.of("a", "b", "c"));
+    Object[] result = (Object[]) extract(record, "simpleListField");
+    assertEquals(result, new Object[]{"a", "b", "c"});
+  }
+
+  @Test
+  public void testListOfStructs() {
+    ComplexTypes record = baseRecord();
+    record.setComplexListField(List.of(
+        nestedOf("a", 1),
+        nestedOf("b", 2)
+    ));
+    Object[] result = (Object[]) extract(record, "complexListField");
+    assertEquals(result.length, 2);
+    Map<?, ?> r0 = (Map<?, ?>) result[0];
+    assertEquals(r0.get("nestedStringField"), "a");
+    assertEquals(r0.get("nestedIntField"), 1);
+    Map<?, ?> r1 = (Map<?, ?>) result[1];
+    assertEquals(r1.get("nestedStringField"), "b");
+    assertEquals(r1.get("nestedIntField"), 2);
+  }
+
+  @Test
+  public void testSetOfStrings() {
+    // ComplexTypes has no set<> field; ThriftSampleData has set<string>. Sets 
convert to Object[] like lists.
+    ThriftSampleData record = new ThriftSampleData();
+    record.setActive(false);
+    record.setCreated_at(0L);
+    record.setId(0);
+    record.setSet_values(new HashSet<>(List.of("alpha", "beta")));
+    Object[] result = (Object[]) extract(record, "set_values");
+    assertEquals(result.length, 2);
+    // Set has no defined order — assert content via a HashSet round-trip.
+    assertEquals(new HashSet<>(List.of(result)), new 
HashSet<>(List.of("alpha", "beta")));

Review Comment:
   In `testSetOfStrings`, `new HashSet<>(List.of(result))` treats the whole 
`Object[]` as a single element (because `List.of(result)` creates a 1-element 
list containing the array), so this assertion will never equal a `HashSet` of 
the array’s string elements. Convert the extracted `Object[]` to a collection 
of its elements (e.g., via `Arrays.asList(result)`) before wrapping in a 
`HashSet`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to