rdblue commented on code in PR #6072:
URL: https://github.com/apache/iceberg/pull/6072#discussion_r1035426037
##########
core/src/test/java/org/apache/iceberg/TestScanPlanningAndReporting.java:
##########
@@ -239,12 +255,75 @@ public void
scanningWithEqualityAndPositionalDeleteFiles() throws IOException {
ScanReport scanReport = reporter.lastReport();
assertThat(scanReport).isNotNull();
+ assertThat(scanReport.schemaId()).isEqualTo(0);
+ assertThat(scanReport.projectedFieldIds()).containsExactly(1, 2);
+ assertThat(scanReport.projectedFieldNames()).containsExactly("id", "data");
+
ScanMetricsResult result = scanReport.scanMetrics();
assertThat(result.indexedDeleteFiles().value()).isEqualTo(2);
assertThat(result.equalityDeleteFiles().value()).isEqualTo(1);
assertThat(result.positionalDeleteFiles().value()).isEqualTo(1);
}
+ @Test
+ public void incrementalAppendScan() throws IOException {
+ testIncrementalScan("incremental-append-scan",
Table::newIncrementalAppendScan);
+ }
+
+ @Test
+ public void incrementalChangelogScan() throws IOException {
+ testIncrementalScan("incremental-changelog-scan",
Table::newIncrementalChangelogScan);
+ }
+
+ private <
+ S extends IncrementalScan<S, T, G>,
+ T extends ScanTask,
+ G extends ScanTaskGroup<T>,
+ X extends BaseTable>
+ void testIncrementalScan(
+ String tableName, Function<X, IncrementalScan<S, T, G>>
tableScanFunction)
+ throws IOException {
+ Table table =
+ TestTables.create(
+ tableDir, tableName, SCHEMA, SPEC, SortOrder.unsorted(),
formatVersion, reporter);
+
+ table.newAppend().appendFile(FILE_A).appendFile(FILE_D).commit();
+ table.newAppend().appendFile(FILE_B).appendFile(FILE_C).commit();
+
+ IncrementalScan<S, T, G> tableScan = tableScanFunction.apply((X) table);
+ long fromSnapshotId = 1;
+ long toSnapshotId = table.currentSnapshot().snapshotId();
+
+ try (CloseableIterable<?> scanTask =
+ tableScan.filter(Expressions.equal("data", "1")).planFiles()) {
+ scanTask.forEach(task -> {});
+ }
+
+ IncrementalScanReport scanReport = reporter.lastIncrementalReport();
+ assertThat(scanReport).isNotNull();
+
+ assertThat(scanReport.tableName()).isEqualTo(tableName);
+ assertThat(scanReport.fromSnapshotId()).isEqualTo(fromSnapshotId);
+ assertThat(scanReport.toSnapshotId()).isEqualTo(toSnapshotId);
+ assertThat(scanReport.projectedFieldIds()).containsExactly(1, 2);
+ assertThat(scanReport.projectedFieldNames()).containsExactly("id", "data");
+
+ ScanMetricsResult result = scanReport.scanMetrics();
+ assertThat(result.skippedDataFiles().value()).isEqualTo(1);
+ assertThat(result.skippedDeleteFiles().value()).isEqualTo(0);
+
assertThat(result.totalPlanningDuration().totalDuration()).isGreaterThan(Duration.ZERO);
+ assertThat(result.resultDataFiles().value()).isEqualTo(1);
+ assertThat(result.resultDeleteFiles().value()).isEqualTo(0);
Review Comment:
I don't mean just a dimension counter. I mean that "result data files" isn't
meaningful for incremental scans. We want to know how many data files were read
as appended files and how many were read as deleted files. Then we will need to
come up with appropriate metrics for the other scan tasks as well.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]