phet commented on code in PR #4058:
URL: https://github.com/apache/gobblin/pull/4058#discussion_r1774167967


##########
gobblin-data-management/src/test/java/org/apache/gobblin/data/management/copy/iceberg/IcebergTableTest.java:
##########
@@ -333,4 +338,120 @@ protected static <T> void verifyAnyOrder(Collection<T> 
actual, Collection<T> exp
   protected static <T, C extends Collection<T>> List<T> flatten(Collection<C> 
cc) {
     return cc.stream().flatMap(x -> x.stream()).collect(Collectors.toList());
   }
+
+  @Test
+  public void testGetPartitionSpecificDataFiles() throws IOException {
+    TableIdentifier testTableId = TableIdentifier.of(dbName, "testTable");
+    Table testTable = catalog.createTable(testTableId, icebergSchema, 
icebergPartitionSpec);
+
+    List<String> paths = Arrays.asList(
+        "/path/tableName/data/id=1/file1.orc",
+        "/path/tableName/data/id=1/file3.orc",
+        "/path/tableName/data/id=1/file5.orc",
+        "/path/tableName/data/id=1/file4.orc",
+        "/path/tableName/data/id=1/file2.orc"
+    );
+    // Using the schema defined in start of this class
+    PartitionData partitionData = new 
PartitionData(icebergPartitionSpec.partitionType());
+    partitionData.set(0, 1L);
+    List<PartitionData> partitionDataList = Collections.nCopies(5, 
partitionData);
+
+    addPartitionDataFiles(testTable, paths, partitionDataList);
+
+    IcebergTable icebergTable = new IcebergTable(testTableId,
+        catalog.newTableOps(testTableId),
+        catalogUri,
+        catalog.loadTable(testTableId));
+    // Using AlwaysTrue & AlwaysFalse Predicate to avoid mocking of predicate 
class
+    Predicate<StructLike> alwaysTruePredicate = partition -> true;
+    Predicate<StructLike> alwaysFalsePredicate = partition -> false;
+    
Assert.assertEquals(icebergTable.getPartitionSpecificDataFiles(alwaysTruePredicate).size(),
 5);
+    
Assert.assertEquals(icebergTable.getPartitionSpecificDataFiles(alwaysFalsePredicate).size(),
 0);
+
+    catalog.dropTable(testTableId);
+  }
+
+  @Test
+  public void testReplacePartitions() throws IOException {
+    TableIdentifier testTableId = TableIdentifier.of(dbName, "testTable");
+    Table testTable = catalog.createTable(testTableId, icebergSchema, 
icebergPartitionSpec);
+
+    List<String> paths = Arrays.asList(
+        "/path/tableName/data/id=1/file1.orc",
+        "/path/tableName/data/id=1/file2.orc"
+    );
+    // Using the schema defined in start of this class
+    PartitionData partitionData = new 
PartitionData(icebergPartitionSpec.partitionType());
+    partitionData.set(0, 1L);
+    PartitionData partitionData2 = new 
PartitionData(icebergPartitionSpec.partitionType());
+    partitionData2.set(0, 1L);
+    List<PartitionData> partitionDataList = Arrays.asList(partitionData, 
partitionData2);
+
+    addPartitionDataFiles(testTable, paths, partitionDataList);
+
+    IcebergTable icebergTable = new IcebergTable(testTableId,
+        catalog.newTableOps(testTableId),
+        catalogUri,
+        catalog.loadTable(testTableId));
+
+    List<String> paths2 = Arrays.asList(
+        "/path/tableName/data/id=2/file3.orc",
+        "/path/tableName/data/id=2/file4.orc"
+    );
+    // Using the schema defined in start of this class
+    PartitionData partitionData3 = new 
PartitionData(icebergPartitionSpec.partitionType());
+    partitionData3.set(0, 2L);
+    PartitionData partitionData4 = new 
PartitionData(icebergPartitionSpec.partitionType());
+    partitionData4.set(0, 2L);
+    List<PartitionData> partitionDataList2 = Arrays.asList(partitionData3, 
partitionData4);
+
+    List<DataFile> dataFiles = getDataFiles(paths2, partitionDataList2);
+    // here, since partition data with value 2 doesn't exist yet, we expect it 
to get added to the table
+    icebergTable.replacePartitions(dataFiles);
+    List<String> expectedPaths = new ArrayList<>(paths);
+    expectedPaths.addAll(paths2);
+    verifyAnyOrder(expectedPaths, 
icebergTable.getCurrentSnapshotInfo().getAllDataFilePaths(), "data filepaths 
should match");

Review Comment:
   technically we should probably also call 
`icebergTable.getCurrentSnapshotInfo().getAllDataFilePaths()` prior to 
`.replacePartitions`, to assert its starting value



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to