This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new 83b74827aa2 branch-2.1: [fix](iceberg)Fix count(*) error with dangling 
delete problem #44039 (#44101)
83b74827aa2 is described below

commit 83b74827aa24f7f55469945c4a003c2409000384
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Nov 19 17:19:25 2024 +0800

    branch-2.1: [fix](iceberg)Fix count(*) error with dangling delete problem 
#44039 (#44101)
    
    Cherry-picked from #44039
    
    Co-authored-by: wuwenchi <[email protected]>
---
 .../docker-compose/iceberg/entrypoint.sh.tpl        | 12 ++++++++++--
 .../{ => iceberg}/run01.sql                         |  0
 .../{ => iceberg}/run02.sql                         |  0
 .../{ => iceberg}/run03.sql                         |  0
 .../{ => iceberg}/run04.sql                         |  0
 .../{ => iceberg}/run05.sql                         |  0
 .../create_preinstalled_scripts/iceberg/run06.sql   | 21 +++++++++++++++++++++
 .../{run06.sql => paimon/run01.sql}                 |  0
 .../datasource/iceberg/source/IcebergScanNode.java  | 10 ++++++----
 .../iceberg/test_iceberg_optimize_count.out         |  3 +++
 .../iceberg/test_iceberg_optimize_count.groovy      | 13 ++++++++++++-
 11 files changed, 52 insertions(+), 7 deletions(-)

diff --git a/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl 
b/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl
index 1af170ff91b..a4b27bdd6c0 100644
--- a/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl
+++ b/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl
@@ -25,9 +25,17 @@ start-thriftserver.sh --driver-java-options 
"-Dderby.system.home=/tmp/derby"
 
 
 
-ls /mnt/scripts/create_preinstalled_scripts/*.sql | xargs -n 1 -I {} bash -c '
+ls /mnt/scripts/create_preinstalled_scripts/iceberg/*.sql | xargs -n 1 -I {} 
bash -c '
     START_TIME=$(date +%s)
-    spark-sql --master  spark://doris--spark-iceberg:7077   -f {} 
+    spark-sql --master spark://doris--spark-iceberg:7077 --conf 
spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions
 -f {} 
+    END_TIME=$(date +%s)
+    EXECUTION_TIME=$((END_TIME - START_TIME))
+    echo "Script: {} executed in $EXECUTION_TIME seconds"
+'
+
+ls /mnt/scripts/create_preinstalled_scripts/paimon/*.sql | xargs -n 1 -I {} 
bash -c '
+    START_TIME=$(date +%s)
+    spark-sql --master  spark://doris--spark-iceberg:7077 --conf 
spark.sql.extensions=org.apache.paimon.spark.extensions.PaimonSparkSessionExtensions
 -f {} 
     END_TIME=$(date +%s)
     EXECUTION_TIME=$((END_TIME - START_TIME))
     echo "Script: {} executed in $EXECUTION_TIME seconds"
diff --git 
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run01.sql
 
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run01.sql
similarity index 100%
rename from 
docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run01.sql
rename to 
docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run01.sql
diff --git 
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run02.sql
 
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run02.sql
similarity index 100%
rename from 
docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run02.sql
rename to 
docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run02.sql
diff --git 
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run03.sql
 
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run03.sql
similarity index 100%
rename from 
docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run03.sql
rename to 
docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run03.sql
diff --git 
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run04.sql
 
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run04.sql
similarity index 100%
rename from 
docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run04.sql
rename to 
docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run04.sql
diff --git 
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run05.sql
 
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run05.sql
similarity index 100%
rename from 
docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run05.sql
rename to 
docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run05.sql
diff --git 
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run06.sql
 
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run06.sql
new file mode 100644
index 00000000000..3ac97c50099
--- /dev/null
+++ 
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/iceberg/run06.sql
@@ -0,0 +1,21 @@
+use demo.test_db;
+
+drop table if exists dangling_delete_after_write;
+create table dangling_delete_after_write (
+  id BIGINT NOT NULL,
+  val STRING)
+USING iceberg
+TBLPROPERTIES (
+  'format' = 'iceberg/parquet',
+  'format-version' = '2',
+  'identifier-fields' = '[id]',
+  'upsert-enabled' = 'true',
+  'write.delete.mode' = 'merge-on-read',
+  'write.parquet.compression-codec' = 'zstd',
+  'write.update.mode' = 'merge-on-read',
+  'write.upsert.enabled' = 'true');
+
+insert into dangling_delete_after_write values(1, 'abd');
+update dangling_delete_after_write set val = 'def' where id = 1;
+call demo.system.rewrite_data_files(table => 
'demo.test_db.dangling_delete_after_write', options => map('min-input-files', 
'1'));
+insert into dangling_delete_after_write values(2, 'xyz');
\ No newline at end of file
diff --git 
a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run06.sql
 
b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run01.sql
similarity index 100%
rename from 
docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run06.sql
rename to 
docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/paimon/run01.sql
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
index 56dda7b4fe2..f7b58158d1a 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java
@@ -384,13 +384,15 @@ public class IcebergScanNode extends FileQueryScanNode {
             return 0;
         }
 
+        // `TOTAL_POSITION_DELETES` is need to 0,
+        // because prevent 'dangling delete' problem after `rewrite_data_files`
+        // ref: 
https://iceberg.apache.org/docs/nightly/spark-procedures/#rewrite_position_delete_files
         Map<String, String> summary = snapshot.summary();
-        if (summary.get(IcebergUtils.TOTAL_EQUALITY_DELETES).equals("0")) {
-            return Long.parseLong(summary.get(IcebergUtils.TOTAL_RECORDS))
-                - 
Long.parseLong(summary.get(IcebergUtils.TOTAL_POSITION_DELETES));
-        } else {
+        if (!summary.get(IcebergUtils.TOTAL_EQUALITY_DELETES).equals("0")
+                || 
!summary.get(IcebergUtils.TOTAL_POSITION_DELETES).equals("0")) {
             return -1;
         }
+        return Long.parseLong(summary.get(IcebergUtils.TOTAL_RECORDS));
     }
 
     @Override
diff --git 
a/regression-test/data/external_table_p0/iceberg/test_iceberg_optimize_count.out
 
b/regression-test/data/external_table_p0/iceberg/test_iceberg_optimize_count.out
index f2e945f9cec..ec9129a00d2 100644
--- 
a/regression-test/data/external_table_p0/iceberg/test_iceberg_optimize_count.out
+++ 
b/regression-test/data/external_table_p0/iceberg/test_iceberg_optimize_count.out
@@ -23,3 +23,6 @@
 -- !q08 --
 1000
 
+-- !q09 --
+2
+
diff --git 
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy
 
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy
index 927d442b8dd..4d74e1406e7 100644
--- 
a/regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy
+++ 
b/regression-test/suites/external_table_p0/iceberg/test_iceberg_optimize_count.groovy
@@ -70,7 +70,7 @@ suite("test_iceberg_optimize_count", 
"p0,external,doris,external_docker,external
         }
         explain {
             sql("""${sqlstr4}""")
-            contains """pushdown agg=COUNT (1000)"""
+            contains """pushdown agg=COUNT (-1)"""
         }
 
         // don't use push down count
@@ -98,6 +98,17 @@ suite("test_iceberg_optimize_count", 
"p0,external,doris,external_docker,external
             contains """pushdown agg=NONE"""
         }
 
+        // There has `dangling delete` after rewrite
+        sql """ set enable_count_push_down_for_external_table=true; """
+        sqlstr5 = """ select count(*) from 
${catalog_name}.test_db.dangling_delete_after_write; """
+
+        qt_q09 """${sqlstr5}""" 
+
+        explain {
+            sql("""${sqlstr5}""")
+            contains """pushdown agg=COUNT (-1)"""
+        }
+
     } finally {
         sql """ set enable_count_push_down_for_external_table=true; """
         sql """drop catalog if exists ${catalog_name}"""


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to