This is an automated email from the ASF dual-hosted git repository. rui pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push: new 628763fc3 [VL] Support regr_sxx and regr_syy aggregate functions for Spark 3.4 (#5444) 628763fc3 is described below commit 628763fc3a9f471e4b2c25c1d07efc968857be16 Author: Joey <joey....@alibaba-inc.com> AuthorDate: Fri Apr 19 13:02:15 2024 +0800 [VL] Support regr_sxx and regr_syy aggregate functions for Spark 3.4 (#5444) --- .../execution/VeloxAggregateFunctionsSuite.scala | 38 ++++++++++++++++++++-- .../substrait/SubstraitToVeloxPlanValidator.cc | 3 +- docs/velox-backend-support-progress.md | 3 ++ .../apache/gluten/expression/ExpressionNames.scala | 1 + .../gluten/sql/shims/spark34/Spark34Shims.scala | 3 +- 5 files changed, 43 insertions(+), 5 deletions(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala index 2573725a7..df0817410 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala @@ -432,14 +432,46 @@ abstract class VeloxAggregateFunctionsSuite extends VeloxWholeStageTransformerSu } // Disable for Sparke3.5. - testWithSpecifiedSparkVersion("regr_sxy", Some("3.4"), Some("3.4")) { + testWithSpecifiedSparkVersion("regr_sxy regr_sxx regr_syy", Some("3.4"), Some("3.4")) { runQueryAndCompare(""" - |select regr_sxy(l_partkey, l_suppkey) from lineitem; + |select regr_sxy(l_quantity, l_tax) from lineitem; |""".stripMargin) { checkGlutenOperatorMatch[HashAggregateExecTransformer] } runQueryAndCompare( - "select regr_sxy(l_partkey, l_suppkey), count(distinct l_orderkey) from lineitem") { + "select regr_sxy(l_quantity, l_tax), count(distinct l_orderkey) from lineitem") { + df => + { + assert( + getExecutedPlan(df).count( + plan => { + plan.isInstanceOf[HashAggregateExecTransformer] + }) == 4) + } + } + runQueryAndCompare(""" + |select regr_sxx(l_quantity, l_tax) from lineitem; + |""".stripMargin) { + checkGlutenOperatorMatch[HashAggregateExecTransformer] + } + runQueryAndCompare( + "select regr_sxx(l_quantity, l_tax), count(distinct l_orderkey) from lineitem") { + df => + { + assert( + getExecutedPlan(df).count( + plan => { + plan.isInstanceOf[HashAggregateExecTransformer] + }) == 4) + } + } + runQueryAndCompare(""" + |select regr_syy(l_quantity, l_tax) from lineitem; + |""".stripMargin) { + checkGlutenOperatorMatch[HashAggregateExecTransformer] + } + runQueryAndCompare( + "select regr_syy(l_quantity, l_tax), count(distinct l_orderkey) from lineitem") { df => { assert( diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc index f992b94c3..2a5857ae9 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc @@ -1106,7 +1106,8 @@ bool SubstraitToVeloxPlanValidator::validate(const ::substrait::AggregateRel& ag "kurtosis", "regr_slope", "regr_intercept", - "regr_sxy"}; + "regr_sxy", + "regr_replacement"}; auto udfFuncs = UdfLoader::getInstance()->getRegisteredUdafNames(); diff --git a/docs/velox-backend-support-progress.md b/docs/velox-backend-support-progress.md index 5e81081b7..4b480529e 100644 --- a/docs/velox-backend-support-progress.md +++ b/docs/velox-backend-support-progress.md @@ -384,6 +384,9 @@ Gluten supports 199 functions. (Drag to right to see all data types) | regr_r2 | regr_r2 | regr_r2 | S | | | | S | S | S | S | S | | | | | | | | | | | | | regr_intercept | regr_intercept | regr_intercept | S | | | | S | S | S | S | S | | | | | | | | | | | | | regr_slope | regr_slope | regr_slope | S | | | | S | S | S | S | S | | | | | | | | | | | | +| regr_sxy | regr_sxy | regr_sxy | S | | | | S | S | S | S | S | | | | | | | | | | | | +| regr_sxx | regr_sxx | regr_sxx | S | | | | S | S | S | S | S | | | | | | | | | | | | +| regr_syy | regr_syy | regr_syy | S | | | | S | S | S | S | S | | | | | | | | | | | | | skewness | skewness | skewness | S | | | | S | S | S | S | S | | | | | | | | | | | | | some | | | | | | | | | | | | | | | | | | | | | | | | std,stddev | stddev | | S | | | | S | S | S | S | S | | | | | | | | | | | | diff --git a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala index 851c81a8d..26f63bc75 100644 --- a/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala +++ b/shims/common/src/main/scala/org/apache/gluten/expression/ExpressionNames.scala @@ -52,6 +52,7 @@ object ExpressionNames { final val REGR_SLOPE = "regr_slope" final val REGR_INTERCEPT = "regr_intercept" final val REGR_SXY = "regr_sxy" + final val REGR_REPLACEMENT = "regr_replacement" // Function names used by Substrait plan. final val ADD = "add" diff --git a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala index fe06d7857..aa19e2a2c 100644 --- a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala +++ b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/Spark34Shims.scala @@ -86,7 +86,8 @@ class Spark34Shims extends SparkShims { Sig[RegrR2](ExpressionNames.REGR_R2), Sig[RegrSlope](ExpressionNames.REGR_SLOPE), Sig[RegrIntercept](ExpressionNames.REGR_INTERCEPT), - Sig[RegrSXY](ExpressionNames.REGR_SXY) + Sig[RegrSXY](ExpressionNames.REGR_SXY), + Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT) ) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org