97harsh commented on code in PR #14964:
URL: https://github.com/apache/iceberg/pull/14964#discussion_r2667468071
##########
spark/v4.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java:
##########
@@ -1105,4 +1105,185 @@ private List<Object[]> currentData() {
private List<Object[]> currentData(String table) {
return rowsToJava(spark.sql("SELECT * FROM " + table + " order by c1, c2,
c3").collectAsList());
}
+
+ @TestTemplate
+ public void testRewriteDataFilesOnBranch() {
+ createTable();
+ insertData(10);
+
+ String branchName = "testBranch";
+ sql("ALTER TABLE %s CREATE BRANCH %s", tableName, branchName);
+
+ // Insert more data to the branch (multiple inserts to create multiple
small files)
+ for (int i = 0; i < 5; i++) {
+ sql("INSERT INTO %s.branch_%s VALUES (1, 'a', 'b'), (2, 'c', 'd')",
tableName, branchName);
+ }
+
+ // Get snapshot IDs before rewrite
+ Table table = validationCatalog.loadTable(tableIdent);
+ long mainSnapshotId = table.currentSnapshot().snapshotId();
+ long branchSnapshotId = table.refs().get(branchName).snapshotId();
+
+ // Call rewrite_data_files on the branch with options to force rewrite
+ List<Object[]> output =
+ sql(
+ "CALL %s.system.rewrite_data_files(table => '%s', branch => '%s',
options => map('min-input-files','2'))",
+ catalogName, tableName, branchName);
+
+ // Verify output
+ assertThat(output).hasSize(1);
+ assertThat(output.get(0)).hasSize(5);
+
+ // Check if files were actually rewritten
+ int filesRewritten = (Integer) output.get(0)[0];
+ int filesAdded = (Integer) output.get(0)[1];
+
+ // Verify files were rewritten (we created multiple small files, so they
should be compacted)
+ assertThat(filesRewritten)
+ .as("Files should be rewritten when multiple small files exist")
+ .isGreaterThan(0);
+
+ // Verify branch snapshot changed
+ table.refresh();
+ assertThat(table.refs().get(branchName).snapshotId())
+ .as("Branch snapshot should be updated when files are rewritten")
+ .isNotEqualTo(branchSnapshotId);
+
+ // Verify main snapshot unchanged
+ assertThat(table.currentSnapshot().snapshotId())
+ .as("Main snapshot should remain unchanged")
+ .isEqualTo(mainSnapshotId);
+ }
Review Comment:
Thank you for your review, Done!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]