rdblue commented on a change in pull request #2926:
URL: https://github.com/apache/iceberg/pull/2926#discussion_r682769112
##########
File path: core/src/test/java/org/apache/iceberg/TestMetadataTableScans.java
##########
@@ -350,6 +354,224 @@ public void testPartitionsTableScanNotNullFilter() {
validateIncludesPartitionScan(tasksUnary, 3);
}
+ @Test
+ public void testFilesTableScanNoFilter() {
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_0)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_1)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_2)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_3)
+ .commit();
+
+ Table dataFilesTable = new DataFilesTable(table.ops(), table);
+ Types.StructType expected = new Schema(
+ required(102, "partition", Types.StructType.of(
+ optional(1000, "data_bucket", Types.IntegerType.get())),
+ "Partition data tuple, schema based on the partition
spec")).asStruct();
+
+ TableScan scanNoFilter =
dataFilesTable.newScan().select("partition.data_bucket");
+ Assert.assertEquals(expected, scanNoFilter.schema().asStruct());
+ CloseableIterable<CombinedScanTask> tasksNoFilter =
scanNoFilter.planTasks();
+ List<ManifestFile> manifests =
StreamSupport.stream(tasksNoFilter.spliterator(), false)
+ .flatMap(c -> c.files().stream().map(t ->
((DataFilesTable.ManifestReadTask) t).getManifest()))
+ .collect(Collectors.toList());
+
+ Assert.assertEquals(4, manifests.size());
+ validateIncludesManifestFile(manifests, 0);
+ validateIncludesManifestFile(manifests, 1);
+ validateIncludesManifestFile(manifests, 2);
+ validateIncludesManifestFile(manifests, 3);
+ }
+
+ @Test
+ public void testFilesTableScanAndFilter() {
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_0)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_1)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_2)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_3)
+ .commit();
+
+ Table dataFilesTable = new DataFilesTable(table.ops(), table);
+
+ Expression andEquals = Expressions.and(
+ Expressions.equal("partition.data_bucket", 0),
+ Expressions.greaterThan("record_count", 0));
+ TableScan scanAndEq = dataFilesTable.newScan().filter(andEquals);
+ CloseableIterable<CombinedScanTask> tasksAndEq = scanAndEq.planTasks();
+ List<ManifestFile> manifests =
StreamSupport.stream(tasksAndEq.spliterator(), false)
+ .flatMap(c -> c.files().stream().map(t ->
((DataFilesTable.ManifestReadTask) t).getManifest()))
+ .collect(Collectors.toList());
+ Assert.assertEquals(1, manifests.size());
+ validateIncludesManifestFile(manifests, 0);
+ }
+
+ @Test
+ public void testFilesTableScanLtFilter() {
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_0)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_1)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_2)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_3)
+ .commit();
+
+ Table dataFilesTable = new DataFilesTable(table.ops(), table);
+
+ Expression ltAnd = Expressions.and(
+ Expressions.lessThan("partition.data_bucket", 2),
+ Expressions.greaterThan("record_count", 0));
+ TableScan scan = dataFilesTable.newScan()
+ .filter(ltAnd);
+ CloseableIterable<CombinedScanTask> tasksLt = scan.planTasks();
+ List<ManifestFile> manifests = StreamSupport.stream(tasksLt.spliterator(),
false)
+ .flatMap(c -> c.files().stream().map(t ->
((DataFilesTable.ManifestReadTask) t).getManifest()))
+ .collect(Collectors.toList());
+ Assert.assertEquals(2, manifests.size());
+ validateIncludesManifestFile(manifests, 0);
+ validateIncludesManifestFile(manifests, 1);
+ }
+
+ @Test
+ public void testFilesTableScanOrFilter() {
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_0)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_1)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_2)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_3)
+ .commit();
+
+ Table dataFilesTable = new DataFilesTable(table.ops(), table);
+
+ Expression or = Expressions.or(
+ Expressions.equal("partition.data_bucket", 2),
+ Expressions.greaterThan("record_count", 0));
+ TableScan scan = dataFilesTable.newScan()
+ .filter(or);
+ CloseableIterable<CombinedScanTask> tasksOr = scan.planTasks();
+ List<ManifestFile> manifests = StreamSupport.stream(tasksOr.spliterator(),
false)
+ .flatMap(c -> c.files().stream().map(t ->
((DataFilesTable.ManifestReadTask) t).getManifest()))
+ .collect(Collectors.toList());
+ Assert.assertEquals(4, manifests.size());
+ validateIncludesManifestFile(manifests, 0);
+ validateIncludesManifestFile(manifests, 1);
+ validateIncludesManifestFile(manifests, 2);
+ validateIncludesManifestFile(manifests, 3);
+ }
+
+ @Test
+ public void testFilesScanNotFilter() {
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_0)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_1)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_2)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_3)
+ .commit();
+ Table dataFilesTable = new DataFilesTable(table.ops(), table);
+
+ Expression not =
Expressions.not(Expressions.lessThan("partition.data_bucket", 2));
+ TableScan scan = dataFilesTable.newScan()
+ .filter(not);
+ CloseableIterable<CombinedScanTask> tasksNot = scan.planTasks();
+ List<ManifestFile> manifests =
StreamSupport.stream(tasksNot.spliterator(), false)
+ .flatMap(c -> c.files().stream().map(t ->
((DataFilesTable.ManifestReadTask) t).getManifest()))
+ .collect(Collectors.toList());
+ Assert.assertEquals(2, manifests.size());
+ validateIncludesManifestFile(manifests, 2);
+ validateIncludesManifestFile(manifests, 3);
+ }
+
+
+ @Test
+ public void testFilesTableScanInFilter() {
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_0)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_1)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_2)
+ .commit();
+ table.newFastAppend()
+ .appendFile(FILE_PARTITION_3)
+ .commit();
Review comment:
You might consider a helper method to prepare the table for these tests.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]