pvary commented on code in PR #15675:
URL: https://github.com/apache/iceberg/pull/15675#discussion_r2975111982


##########
data/src/test/java/org/apache/iceberg/data/BaseFormatModelTests.java:
##########
@@ -628,6 +638,390 @@ void 
testReaderBuilderRecordsPerBatchNotSupported(FileFormat fileFormat) throws
         .isInstanceOf(UnsupportedOperationException.class);
   }
 
+  @ParameterizedTest
+  @FieldSource("FILE_FORMATS")
+  void testReadMetadataColumnsFilePathAndSpecId(FileFormat fileFormat) throws 
IOException {
+
+    DataGenerator dataGenerator = new DataGenerators.DefaultSchema();
+    Schema schema = dataGenerator.schema();
+    List<Record> genericRecords = dataGenerator.generateRecords();
+    writeGenericRecords(fileFormat, schema, genericRecords);
+
+    String filePath = "test-data-file.parquet";
+    int specId = 0;
+    Schema projectionSchema = new Schema(MetadataColumns.FILE_PATH, 
MetadataColumns.SPEC_ID);
+
+    Map<Integer, Object> idToConstant =
+        ImmutableMap.of(
+            MetadataColumns.FILE_PATH.fieldId(), filePath,
+            MetadataColumns.SPEC_ID.fieldId(), specId);
+
+    InputFile inputFile = encryptedFile.encryptingOutputFile().toInputFile();
+    List<T> readRecords;
+    try (CloseableIterable<T> reader =
+        FormatModelRegistry.readBuilder(fileFormat, engineType(), inputFile)
+            .project(projectionSchema)
+            .engineProjection(engineSchema(projectionSchema))
+            .idToConstant(convertConstantsToEngine(projectionSchema, 
idToConstant))
+            .build()) {
+      readRecords = ImmutableList.copyOf(reader);
+    }
+
+    List<Record> expected =
+        IntStream.range(0, genericRecords.size())
+            .mapToObj(
+                i ->
+                    GenericRecord.create(projectionSchema)
+                        .copy(
+                            MetadataColumns.FILE_PATH.name(), filePath,
+                            MetadataColumns.SPEC_ID.name(), specId))
+            .toList();
+
+    assertThat(readRecords).hasSize(genericRecords.size());
+    assertEquals(projectionSchema, convertToEngineRecords(expected, 
projectionSchema), readRecords);
+  }
+
+  @ParameterizedTest
+  @FieldSource("FILE_FORMATS")
+  void testReadMetadataColumnRowPosition(FileFormat fileFormat) throws 
IOException {
+
+    DataGenerator dataGenerator = new DataGenerators.DefaultSchema();
+    Schema schema = dataGenerator.schema();
+    List<Record> genericRecords = dataGenerator.generateRecords();
+    writeGenericRecords(fileFormat, schema, genericRecords);
+
+    Schema projectionSchema = new Schema(MetadataColumns.ROW_POSITION);
+
+    InputFile inputFile = encryptedFile.encryptingOutputFile().toInputFile();
+    List<T> readRecords;
+    try (CloseableIterable<T> reader =
+        FormatModelRegistry.readBuilder(fileFormat, engineType(), inputFile)
+            .project(projectionSchema)
+            .engineProjection(engineSchema(projectionSchema))
+            .build()) {
+      readRecords = ImmutableList.copyOf(reader);
+    }
+
+    List<Record> expected =
+        IntStream.range(0, genericRecords.size())
+            .mapToObj(
+                i ->
+                    GenericRecord.create(projectionSchema)
+                        .copy(MetadataColumns.ROW_POSITION.name(), (long) i))
+            .toList();
+
+    assertThat(readRecords).hasSize(genericRecords.size());
+    assertEquals(projectionSchema, convertToEngineRecords(expected, 
projectionSchema), readRecords);
+  }
+
+  @ParameterizedTest
+  @FieldSource("FILE_FORMATS")
+  void testReadMetadataColumnIsDeleted(FileFormat fileFormat) throws 
IOException {
+
+    DataGenerator dataGenerator = new DataGenerators.DefaultSchema();
+    Schema schema = dataGenerator.schema();
+    List<Record> genericRecords = dataGenerator.generateRecords();
+    writeGenericRecords(fileFormat, schema, genericRecords);
+
+    Schema projectionSchema = new Schema(MetadataColumns.IS_DELETED);
+
+    InputFile inputFile = encryptedFile.encryptingOutputFile().toInputFile();
+    List<T> readRecords;
+    try (CloseableIterable<T> reader =
+        FormatModelRegistry.readBuilder(fileFormat, engineType(), inputFile)
+            .project(projectionSchema)
+            .engineProjection(engineSchema(projectionSchema))
+            .build()) {
+      readRecords = ImmutableList.copyOf(reader);
+    }
+
+    List<Record> expected =
+        IntStream.range(0, genericRecords.size())
+            .mapToObj(
+                i ->
+                    GenericRecord.create(projectionSchema)
+                        .copy(MetadataColumns.IS_DELETED.name(), false))
+            .toList();
+
+    assertThat(readRecords).hasSize(genericRecords.size());
+    assertEquals(projectionSchema, convertToEngineRecords(expected, 
projectionSchema), readRecords);
+  }
+
+  @ParameterizedTest
+  @FieldSource("FILE_FORMATS")
+  void testReadMetadataColumnRowLinage(FileFormat fileFormat) throws 
IOException {
+    assumeSupports(fileFormat, FEATURE_META_ROW_LINEAGE);
+
+    DataGenerator dataGenerator = new DataGenerators.DefaultSchema();
+    Schema schema = dataGenerator.schema();
+    List<Record> genericRecords = dataGenerator.generateRecords();
+    writeGenericRecords(fileFormat, schema, genericRecords);
+
+    long baseRowId = 100L;
+    long fileSeqNumber = 5L;
+    Schema projectionSchema =
+        new Schema(MetadataColumns.ROW_ID, 
MetadataColumns.LAST_UPDATED_SEQUENCE_NUMBER);
+
+    Map<Integer, Object> idToConstant =
+        ImmutableMap.of(
+            MetadataColumns.ROW_ID.fieldId(), baseRowId,
+            MetadataColumns.LAST_UPDATED_SEQUENCE_NUMBER.fieldId(), 
fileSeqNumber);

Review Comment:
   How does the null handling happen?
   Is it reader level stuff, or the engines need to apply the snapshot based 
row_id, sequence number?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to