phet commented on code in PR #4058:
URL: https://github.com/apache/gobblin/pull/4058#discussion_r1812012723
##########
gobblin-data-management/src/test/java/org/apache/gobblin/data/management/copy/iceberg/IcebergTableTest.java:
##########
@@ -226,6 +226,90 @@ public void testNewTablePropertiesAreRegistered() throws
Exception {
catalog.dropTable(destTableId);
}
+ /** Verify that getPartitionSpecificDataFiles return datafiles belonging to
the partition defined by predicate */
+ @Test
+ public void testGetPartitionSpecificDataFiles() throws IOException {
+ List<String> paths = Arrays.asList(
+ "/path/tableName/data/id=1/file1.orc",
+ "/path/tableName/data/file3.orc",
+ "/path/tableName/data/id=2/file5.orc",
+ "/path/tableName/data/file4.orc",
+ "/path/tableName/data/id=3/file2.orc"
+ );
+ // Using the schema defined in start of this class
+ PartitionData partitionData = new
PartitionData(icebergPartitionSpec.partitionType());
+ partitionData.set(0, "1");
+ Map<String, PartitionData> pathsWithPartitionData = Maps.newHashMap();
+ paths.forEach(path -> pathsWithPartitionData.put(path, partitionData));
+
+ addPartitionDataFiles(table, createDataFiles(pathsWithPartitionData));
+
+ IcebergTable icebergTable = new IcebergTable(tableId,
+ catalog.newTableOps(tableId),
+ catalogUri,
+ catalog.loadTable(tableId));
+ // Using AlwaysTrue & AlwaysFalse Predicate to avoid mocking of predicate
class
+ Predicate<StructLike> alwaysTruePredicate = partition -> true;
+ Predicate<StructLike> alwaysFalsePredicate = partition -> false;
+
Assert.assertEquals(icebergTable.getPartitionSpecificDataFiles(alwaysTruePredicate).size(),
5);
+
Assert.assertEquals(icebergTable.getPartitionSpecificDataFiles(alwaysFalsePredicate).size(),
0);
+ }
+
+ /** Verify that overwritePartition replace data files belonging to given
partition col and value */
+ @Test
+ public void testOverwritePartition() throws IOException {
+ List<String> paths = Arrays.asList(
+ "/path/tableName/data/id=1/file1.orc",
+ "/path/tableName/data/file2.orc"
+ );
+ // Using the schema defined in start of this class
+ PartitionData partitionData = new
PartitionData(icebergPartitionSpec.partitionType());
+ partitionData.set(0, "1");
+ Map<String, PartitionData> pathsWithPartitionData = Maps.newHashMap();
+ paths.forEach(path -> pathsWithPartitionData.put(path, partitionData));
+
+ addPartitionDataFiles(table, createDataFiles(pathsWithPartitionData));
+
+ IcebergTable icebergTable = new IcebergTable(tableId,
+ catalog.newTableOps(tableId),
+ catalogUri,
+ catalog.loadTable(tableId));
+
+ verifyAnyOrder(paths,
icebergTable.getCurrentSnapshotInfo().getAllDataFilePaths(), "data filepaths
should match");
+
+ List<String> paths2 = Arrays.asList(
+ "/path/tableName/data/file3.orc",
+ "/path/tableName/data/id=2/file4.orc"
+ );
+ // Using the schema defined in start of this class
+ PartitionData partitionData2 = new
PartitionData(icebergPartitionSpec.partitionType());
+ partitionData2.set(0, "2");
+ Map<String, PartitionData> paths2WithPartitionData2 = Maps.newHashMap();
+ paths2.forEach(path -> paths2WithPartitionData2.put(path, partitionData2));
+
+ List<DataFile> partition2DataFiles =
createDataFiles(paths2WithPartitionData2);
+ // here, since partition data with value 2 doesn't exist yet,
+ // we expect it to get added to the table, w/o changing or deleting any
other partitions
+ icebergTable.overwritePartition(partition2DataFiles, "id", "2");
+ List<String> expectedPaths2 = new ArrayList<>(paths);
+ expectedPaths2.addAll(paths2);
+ verifyAnyOrder(expectedPaths2,
icebergTable.getCurrentSnapshotInfo().getAllDataFilePaths(), "data filepaths
should match");
+
+ List<String> paths3 = Arrays.asList(
+ "/path/tableName/data/id=2/file5.orc",
+ "/path/tableName/data/file6.orc"
+ );
+ // Reusing same partition data to create data file with different paths
+ Map<String, PartitionData> paths3WithPartitionData = Maps.newHashMap();
+ paths3.forEach(path -> paths3WithPartitionData.put(path, partitionData));
+ List<DataFile> partition1NewDataFiles =
createDataFiles(paths3WithPartitionData);
Review Comment:
NBD, but for a one-liner:
```
List<DataFile> partition1NewDataFiles = createDataFiles(
paths3.stream().collect(Collectors.toMap(x -> x, partition1Data))
);
```
(alternative to `x -> x` is `Function.identity()` - your choice)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]