This is an automated email from the ASF dual-hosted git repository.

mahongbin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 98015999e [GLUTEN-5061][CH] Fix assert error when writing mergetree 
data with select * from table limit n (#5068)
98015999e is described below

commit 98015999ee093a0db6bbf151c354e13ba6637bda
Author: Zhichao Zhang <zhan...@apache.org>
AuthorDate: Thu Mar 21 16:31:10 2024 +0800

    [GLUTEN-5061][CH] Fix assert error when writing mergetree data with select 
* from table limit n (#5068)
    
    The below writing mergetree data sql will throw the assert error:
    ```
    insert into table lineitem_mergetree
    select * from lineitem **limit 10**
    ```
    
    RC:
    with limit n, there is a wrong wrapper logical with the FakeRowAdaptor for 
the query plan.
    
    Close #5061.
---
 .../delta/ClickhouseOptimisticTransaction.scala    |  2 +-
 .../GlutenClickHouseMergeTreeWriteSuite.scala      | 63 ++++++++++++++++++++++
 2 files changed, 64 insertions(+), 1 deletion(-)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala
 
b/backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala
index e4786168e..9111bea7f 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala
@@ -94,7 +94,7 @@ class ClickhouseOptimisticTransaction(
               aqe.isSubquery,
               supportsColumnar = true
             ))
-        case other => queryPlan.withNewChildren(Array(FakeRowAdaptor(other)))
+        case other => FakeRowAdaptor(other)
       }
 
       val statsTrackers: ListBuffer[WriteJobStatsTracker] = ListBuffer()
diff --git 
a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseMergeTreeWriteSuite.scala
 
b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseMergeTreeWriteSuite.scala
index f8aa2cfa4..ca192cb89 100644
--- 
a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseMergeTreeWriteSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseMergeTreeWriteSuite.scala
@@ -1379,5 +1379,68 @@ class GlutenClickHouseMergeTreeWriteSuite
         assert(plans(0).metrics("totalMarksPk").value === 74)
     }
   }
+
+  test(
+    "GLUTEN-5061: Fix assert error when writing mergetree data with select * 
from table limit n") {
+    spark.sql(s"""
+                 |DROP TABLE IF EXISTS lineitem_mergetree_5061;
+                 |""".stripMargin)
+
+    spark.sql(s"""
+                 |CREATE TABLE IF NOT EXISTS lineitem_mergetree_5061
+                 |(
+                 | l_orderkey      bigint,
+                 | l_partkey       bigint,
+                 | l_suppkey       bigint,
+                 | l_linenumber    bigint,
+                 | l_quantity      double,
+                 | l_extendedprice double,
+                 | l_discount      double,
+                 | l_tax           double,
+                 | l_returnflag    string,
+                 | l_linestatus    string,
+                 | l_shipdate      date,
+                 | l_commitdate    date,
+                 | l_receiptdate   date,
+                 | l_shipinstruct  string,
+                 | l_shipmode      string,
+                 | l_comment       string
+                 |)
+                 |USING clickhouse
+                 |LOCATION '$basePath/lineitem_mergetree_5061'
+                 |""".stripMargin)
+
+    spark.sql(s"""
+                 | insert into table lineitem_mergetree_5061
+                 | select * from lineitem limit 10
+                 |""".stripMargin)
+
+    val sqlStr =
+      s"""
+         |SELECT
+         |    count(1)
+         |FROM
+         |    lineitem_mergetree_5061
+         |""".stripMargin
+    runSql(sqlStr)(
+      df => {
+        val result = df.collect()
+        assert(result.size == 1)
+        assert(result(0).getLong(0) == 10)
+
+        val scanExec = collect(df.queryExecution.executedPlan) {
+          case f: FileSourceScanExecTransformer => f
+        }
+        assert(scanExec.size == 1)
+
+        val mergetreeScan = scanExec(0)
+        assert(mergetreeScan.nodeName.startsWith("Scan mergetree"))
+
+        val fileIndex = 
mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex]
+        val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => 
f.asInstanceOf[AddMergeTreeParts])
+        assert(addFiles.size == 1)
+        assert(addFiles(0).rows == 10)
+      })
+  }
 }
 // scalastyle:off line.size.limit


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org

Reply via email to