This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 6d5ebe0dd4c [fix](iceberg)fix iceberg rewrite_data_file fail when 
table had been updated. (#61112)
6d5ebe0dd4c is described below

commit 6d5ebe0dd4c27a03fddc122682887041926ab558
Author: daidai <[email protected]>
AuthorDate: Mon Mar 9 12:36:57 2026 +0800

    [fix](iceberg)fix iceberg rewrite_data_file fail when table had been 
updated. (#61112)
    
    ### What problem does this PR solve?
    Related PR: #56413
    Problem Summary:
    fix iceberg `rewrite_data_file` fail when table had been updated.
---
 .../create_preinstalled_scripts/iceberg/run27.sql  | 40 ++++++++++++++
 .../datasource/iceberg/IcebergTransaction.java     |  6 +++
 .../action/test_iceberg_rewrite_data_files.out     | 18 +++++++
 .../action/test_iceberg_rewrite_data_files.groovy  | 63 ++++++++++++++++++++++
 4 files changed, 127 insertions(+)

diff --git 
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run27.sql
 
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run27.sql
new file mode 100644
index 00000000000..7c364fa7971
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run27.sql
@@ -0,0 +1,40 @@
+use demo.test_db;
+
+
+create table if not exists test_rewrite_data_with_update (
+  id INT,
+  name STRING
+)
+USING iceberg
+TBLPROPERTIES (
+  'format-version' = '2',
+  'write.delete.mode' = 'merge-on-read',
+  'write.update.mode' = 'merge-on-read',
+  'write.merge.mode' = 'merge-on-read'
+);
+
+
+INSERT INTO test_rewrite_data_with_update VALUES
+(1, 'a'),(2, 'b'),(3, 'c');
+
+update test_rewrite_data_with_update set name = "bb"  where id = 1;
+
+
+
+create table if not exists test_rewrite_data_with_delete (
+  id INT,
+  name STRING
+)
+USING iceberg
+TBLPROPERTIES (
+  'format-version' = '2',
+  'write.delete.mode' = 'merge-on-read',
+  'write.update.mode' = 'merge-on-read',
+  'write.merge.mode' = 'merge-on-read'
+);
+
+
+INSERT INTO test_rewrite_data_with_delete VALUES
+(1, 'a'),(2, 'b'),(3, 'c');
+
+delete from test_rewrite_data_with_delete where id = 1;
\ No newline at end of file
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergTransaction.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergTransaction.java
index c6473ca64dd..e6a59fd78bc 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergTransaction.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergTransaction.java
@@ -73,6 +73,7 @@ public class IcebergTransaction implements Transaction {
     private String branchName;
 
     // Rewrite operation support
+    long startingSnapshotId = -1L; // Track the starting snapshot ID for 
rewrite operations
     private final List<DataFile> filesToDelete = Lists.newArrayList();
     private final List<DataFile> filesToAdd = Lists.newArrayList();
     private boolean isRewriteMode = false;
@@ -133,6 +134,9 @@ public class IcebergTransaction implements Transaction {
                 // create and start the iceberg transaction
                 this.table = IcebergUtils.getIcebergTable(dorisTable);
 
+                // Capture the starting snapshot ID for validation during 
rewrite commit
+                this.startingSnapshotId = table.currentSnapshot().snapshotId();
+
                 // For rewrite operations, we work directly on the main table
                 // No branch information needed
                 this.transaction = table.newTransaction();
@@ -198,6 +202,8 @@ public class IcebergTransaction implements Transaction {
 
         RewriteFiles rewriteFiles = transaction.newRewrite();
 
+        rewriteFiles = rewriteFiles.validateFromSnapshot(startingSnapshotId);
+
         // For rewrite operations, we work directly on the main table
         rewriteFiles = 
rewriteFiles.scanManifestsWith(ops.getThreadPoolWithPreAuth());
 
diff --git 
a/regression-test/data/external_table_p0/iceberg/action/test_iceberg_rewrite_data_files.out
 
b/regression-test/data/external_table_p0/iceberg/action/test_iceberg_rewrite_data_files.out
index 0c449ed9b44..6110e2e5aae 100644
--- 
a/regression-test/data/external_table_p0/iceberg/action/test_iceberg_rewrite_data_files.out
+++ 
b/regression-test/data/external_table_p0/iceberg/action/test_iceberg_rewrite_data_files.out
@@ -68,3 +68,21 @@
 8      EAST    280.00  2024-01-02
 9      EAST    380.00  2024-01-02
 
+-- !before_rewrite_update --
+1      bb
+2      b
+3      c
+
+-- !after_rewrite_update --
+1      bb
+2      b
+3      c
+
+-- !before_rewrite_delete --
+2      b
+3      c
+
+-- !after_rewrite_delete --
+2      b
+3      c
+
diff --git 
a/regression-test/suites/external_table_p0/iceberg/action/test_iceberg_rewrite_data_files.groovy
 
b/regression-test/suites/external_table_p0/iceberg/action/test_iceberg_rewrite_data_files.groovy
index 67433722c77..623fdd16bc3 100644
--- 
a/regression-test/suites/external_table_p0/iceberg/action/test_iceberg_rewrite_data_files.groovy
+++ 
b/regression-test/suites/external_table_p0/iceberg/action/test_iceberg_rewrite_data_files.groovy
@@ -493,4 +493,67 @@ suite("test_iceberg_rewrite_data_files", "p0,external") {
     
     logger.info("Specific partition rewrite test completed successfully")
 
+    // 
=====================================================================================
+    // Test Case 4: Rewrite data files for merge-on-read update table
+    //
+    // Tables `test_rewrite_data_with_update` and 
`test_rewrite_data_with_delete`
+    // are pre-created in Docker initialization (run23.sql) using Spark SQL 
with
+    // format-version = 2 and merge-on-read enabled for delete/update/merge.
+    //
+    // This case verifies that executing rewrite_data_files on a table that has
+    // already performed UPDATE operations (implemented via delete + insert) 
does
+    // not change the logical query results.
+    // 
=====================================================================================
+    logger.info("Starting rewrite_data_files test for merge-on-read UPDATE 
table")
+
+    def table_name_update = "test_rewrite_data_with_update"
+
+    // Verify data before rewrite: id = 1 should have been updated to 'bb'
+    qt_before_rewrite_update """SELECT id, name FROM ${table_name_update} 
ORDER BY id"""
+
+    def rewriteResultUpdate = sql """
+        ALTER TABLE ${catalog_name}.${db_name}.${table_name_update}
+        EXECUTE rewrite_data_files(
+            "target-file-size-bytes" = "10485760",
+            "min-input-files" = "1"
+        )
+    """
+    logger.info("Rewrite data files result for update table: 
${rewriteResultUpdate}")
+
+    // Verify data after rewrite (logical rows should remain the same)
+    qt_after_rewrite_update """SELECT id, name FROM ${table_name_update} ORDER 
BY id"""
+
+    def totalUpdateRecords = sql """SELECT COUNT(*) FROM 
${table_name_update}"""
+    assertTrue(totalUpdateRecords[0][0] == 3, "Update table should still have 
3 logical records after rewrite")
+
+    // 
=====================================================================================
+    // Test Case 5: Rewrite data files for merge-on-read delete table
+    //
+    // This case verifies that executing rewrite_data_files on a table that has
+    // already performed DELETE operations does not resurrect deleted rows and
+    // keeps the logical result set unchanged.
+    // 
=====================================================================================
+    logger.info("Starting rewrite_data_files test for merge-on-read DELETE 
table")
+
+    def table_name_delete = "test_rewrite_data_with_delete"
+
+    // Verify data before rewrite: row with id = 1 should have been deleted
+    qt_before_rewrite_delete """SELECT id, name FROM ${table_name_delete} 
ORDER BY id"""
+
+    def rewriteResultDelete = sql """
+        ALTER TABLE ${catalog_name}.${db_name}.${table_name_delete}
+        EXECUTE rewrite_data_files(
+            "target-file-size-bytes" = "10485760",
+            "min-input-files" = "1"
+        )
+    """
+    logger.info("Rewrite data files result for delete table: 
${rewriteResultDelete}")
+
+    // Verify data after rewrite (deleted rows should not reappear)
+    qt_after_rewrite_delete """SELECT id, name FROM ${table_name_delete} ORDER 
BY id"""
+
+    def totalDeleteRecords = sql """SELECT COUNT(*) FROM 
${table_name_delete}"""
+    assertTrue(totalDeleteRecords[0][0] == 2, "Delete table should still have 
2 logical records after rewrite")
+
+    logger.info("Merge-on-read update/delete rewrite_data_files tests 
completed successfully")
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to