This is an automated email from the ASF dual-hosted git repository. zhangzc pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push: new f0e2885af [GLUTEN-5062][CH] Add a UT to ensure that IN filtering can apply on CH primary key (#5072) f0e2885af is described below commit f0e2885af60b3eaf2ad7554a95e4bd730a4f03c8 Author: Zhichao Zhang <zhan...@apache.org> AuthorDate: Thu Mar 21 19:27:31 2024 +0800 [GLUTEN-5062][CH] Add a UT to ensure that IN filtering can apply on CH primary key (#5072) Add a UT to ensure that IN filtering can apply on CH primary key Close #5062. --- .../GlutenClickHouseMergeTreeWriteSuite.scala | 91 +++++++++++++++++++++- 1 file changed, 90 insertions(+), 1 deletion(-) diff --git a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseMergeTreeWriteSuite.scala b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseMergeTreeWriteSuite.scala index 457c88e34..6750e251c 100644 --- a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseMergeTreeWriteSuite.scala +++ b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenClickHouseMergeTreeWriteSuite.scala @@ -17,7 +17,7 @@ package io.glutenproject.execution import org.apache.spark.{SPARK_VERSION_SHORT, SparkConf} -import org.apache.spark.sql.SaveMode +import org.apache.spark.sql.{DataFrame, SaveMode} import org.apache.spark.sql.delta.catalog.ClickHouseTableV2 import org.apache.spark.sql.delta.files.TahoeFileIndex import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper @@ -1452,5 +1452,94 @@ class GlutenClickHouseMergeTreeWriteSuite assert(addFiles(0).rows == 10) }) } + + test("GLUTEN-5062: Add a UT to ensure that IN filtering can apply on CH primary key") { + spark.sql(s""" + |DROP TABLE IF EXISTS lineitem_mergetree_5062; + |""".stripMargin) + + spark.sql(s""" + |CREATE TABLE IF NOT EXISTS lineitem_mergetree_5062 + |( + | l_orderkey bigint, + | l_partkey bigint, + | l_suppkey bigint, + | l_linenumber bigint, + | l_quantity double, + | l_extendedprice double, + | l_discount double, + | l_tax double, + | l_returnflag string, + | l_linestatus string, + | l_shipdate date, + | l_commitdate date, + | l_receiptdate date, + | l_shipinstruct string, + | l_shipmode string, + | l_comment string + |) + |USING clickhouse + |TBLPROPERTIES (orderByKey='l_returnflag,l_shipdate', + | primaryKey='l_returnflag,l_shipdate') + |LOCATION '$basePath/lineitem_mergetree_5062' + |""".stripMargin) + + spark.sql(s""" + | insert into table lineitem_mergetree_5062 + | select * from lineitem + |""".stripMargin) + + def checkSelectedMarksCnt(df: DataFrame, exceptedCnt: Long): Unit = { + val scanExec = collect(df.queryExecution.executedPlan) { + case f: FileSourceScanExecTransformer => f + } + assert(scanExec.size == 1) + + val mergetreeScan = scanExec(0) + assert(mergetreeScan.nodeName.startsWith("Scan mergetree")) + + val fileIndex = mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex] + val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) + assert( + (addFiles.map(_.marks).sum - addFiles.size) == mergetreeScan.metrics("totalMarksPk").value) + assert(mergetreeScan.metrics("selectedMarksPk").value == exceptedCnt) + } + + val sqlStr1 = + s""" + |SELECT + | sum(l_extendedprice) + |FROM + | lineitem_mergetree_5062 + |WHERE + | l_shipdate in (date'1998-08-15', date'1993-12-05', date'1993-03-01') + |""".stripMargin + runSql(sqlStr1)( + df => { + val result = df.collect() + assert(result.size == 1) + assert(result(0).getDouble(0).toString.substring(0, 6).equals("2.6480")) + + checkSelectedMarksCnt(df, 34) + }) + + val sqlStr2 = + s""" + |SELECT + | sum(l_extendedprice) + |FROM + | lineitem_mergetree_5062 + |WHERE + | l_returnflag not in ('N', 'A') + |""".stripMargin + runSql(sqlStr2)( + df => { + val result = df.collect() + assert(result.size == 1) + assert(result(0).getDouble(0).toString.substring(0, 6).equals("5.3379")) + + checkSelectedMarksCnt(df, 29) + }) + } } // scalastyle:off line.size.limit --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org