(incubator-gluten) branch main updated: [CORE] Rename CoalesceExecTransformer to ColumnarCoalesceExec (#6000)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 95dcdbdbd [CORE] Rename CoalesceExecTransformer to ColumnarCoalesceExec (#6000) 95dcdbdbd is described below commit 95dcdbdbd9f4f17de40884b74a65564bba2a9bf9 Author: Xiduo You AuthorDate: Thu Jun 6 13:01:25 2024 +0800 [CORE] Rename CoalesceExecTransformer to ColumnarCoalesceExec (#6000) --- ...esceExecTransformer.scala => ColumnarCoalesceExec.scala} | 13 + .../gluten/extension/columnar/OffloadSingleNode.scala | 2 +- .../gluten/extension/columnar/TransformHintRule.scala | 5 +++-- 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/gluten-core/src/main/scala/org/apache/gluten/execution/CoalesceExecTransformer.scala b/gluten-core/src/main/scala/org/apache/gluten/execution/ColumnarCoalesceExec.scala similarity index 87% rename from gluten-core/src/main/scala/org/apache/gluten/execution/CoalesceExecTransformer.scala rename to gluten-core/src/main/scala/org/apache/gluten/execution/ColumnarCoalesceExec.scala index 8f30805be..f40a7f8f0 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/execution/CoalesceExecTransformer.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/execution/ColumnarCoalesceExec.scala @@ -16,7 +16,7 @@ */ package org.apache.gluten.execution -import org.apache.gluten.extension.{GlutenPlan, ValidationResult} +import org.apache.gluten.extension.GlutenPlan import org.apache.spark.{Partition, SparkContext, TaskContext} import org.apache.spark.rdd.RDD @@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, SinglePartiti import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode} import org.apache.spark.sql.vectorized.ColumnarBatch -case class CoalesceExecTransformer(numPartitions: Int, child: SparkPlan) +case class ColumnarCoalesceExec(numPartitions: Int, child: SparkPlan) extends UnaryExecNode with GlutenPlan { @@ -38,9 +38,6 @@ case class CoalesceExecTransformer(numPartitions: Int, child: SparkPlan) if (numPartitions == 1) SinglePartition else UnknownPartitioning(numPartitions) } - override protected def doValidateInternal(): ValidationResult = -ValidationResult.ok - override protected def doExecute(): RDD[InternalRow] = { throw new UnsupportedOperationException() } @@ -49,18 +46,18 @@ case class CoalesceExecTransformer(numPartitions: Int, child: SparkPlan) if (numPartitions == 1 && child.executeColumnar().getNumPartitions < 1) { // Make sure we don't output an RDD with 0 partitions, when claiming that we have a // `SinglePartition`. - new CoalesceExecTransformer.EmptyRDDWithPartitions(sparkContext, numPartitions) + new ColumnarCoalesceExec.EmptyRDDWithPartitions(sparkContext, numPartitions) } else { child.executeColumnar().coalesce(numPartitions, shuffle = false) } } - override protected def withNewChildInternal(newChild: SparkPlan): CoalesceExecTransformer = + override protected def withNewChildInternal(newChild: SparkPlan): ColumnarCoalesceExec = copy(child = newChild) } -object CoalesceExecTransformer { +object ColumnarCoalesceExec { class EmptyRDDWithPartitions(@transient private val sc: SparkContext, numPartitions: Int) extends RDD[ColumnarBatch](sc, Nil) { diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/OffloadSingleNode.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/OffloadSingleNode.scala index b82254072..39cc8ad2e 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/OffloadSingleNode.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/OffloadSingleNode.scala @@ -281,7 +281,7 @@ object OffloadOthers { applyScanTransformer(plan) case plan: CoalesceExec => logDebug(s"Columnar Processing for ${plan.getClass} is currently supported.") - CoalesceExecTransformer(plan.numPartitions, plan.child) + ColumnarCoalesceExec(plan.numPartitions, plan.child) case plan: ProjectExec => val columnarChild = plan.child logDebug(s"Columnar Processing for ${plan.getClass} is currently supported.") diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala index 1da0c0db8..ca35c74f6 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/TransformHintRule.scala @@ -487,8 +487,9 @@ case class AddTransformHintRule() extends Rule[SparkPlan] {
(incubator-gluten) branch main updated: [CORE] ExpandFallbackPolicy should propagate fallback reason to vanilla SparkPlan (#5971)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new c9350fb20 [CORE] ExpandFallbackPolicy should propagate fallback reason to vanilla SparkPlan (#5971) c9350fb20 is described below commit c9350fb204f9912c7cb5bfa4534b0aeabfac683c Author: Xiduo You AuthorDate: Wed Jun 5 13:44:49 2024 +0800 [CORE] ExpandFallbackPolicy should propagate fallback reason to vanilla SparkPlan (#5971) --- .../extension/columnar/ExpandFallbackPolicy.scala | 15 - .../spark/sql/gluten/GlutenFallbackSuite.scala | 65 ++ .../spark/sql/gluten/GlutenFallbackSuite.scala | 43 -- .../spark/sql/gluten/GlutenFallbackSuite.scala | 43 -- 4 files changed, 156 insertions(+), 10 deletions(-) diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/ExpandFallbackPolicy.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/ExpandFallbackPolicy.scala index 6f8d7cde7..4ee153173 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/ExpandFallbackPolicy.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/ExpandFallbackPolicy.scala @@ -235,7 +235,18 @@ case class ExpandFallbackPolicy(isAdaptiveContext: Boolean, originalPlan: SparkP } } - private def fallbackToRowBasedPlan(outputsColumnar: Boolean): SparkPlan = { + private def fallbackToRowBasedPlan(glutenPlan: SparkPlan, outputsColumnar: Boolean): SparkPlan = { +// Propagate fallback reason to vanilla SparkPlan +glutenPlan.foreach { + case _: GlutenPlan => + case p: SparkPlan if TransformHints.isNotTransformable(p) && p.logicalLink.isDefined => +originalPlan + .find(_.logicalLink.exists(_.fastEquals(p.logicalLink.get))) + .filterNot(TransformHints.isNotTransformable) + .foreach(origin => TransformHints.tag(origin, TransformHints.getHint(p))) + case _ => +} + val planWithTransitions = Transitions.insertTransitions(originalPlan, outputsColumnar) planWithTransitions } @@ -259,7 +270,7 @@ case class ExpandFallbackPolicy(isAdaptiveContext: Boolean, originalPlan: SparkP // Scan Parquet // | // ColumnarToRow - val vanillaSparkPlan = fallbackToRowBasedPlan(outputsColumnar) + val vanillaSparkPlan = fallbackToRowBasedPlan(plan, outputsColumnar) val vanillaSparkTransitionCost = countTransitionCostForVanillaSparkPlan(vanillaSparkPlan) if ( GlutenConfig.getConf.fallbackPreferColumnar && diff --git a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala index b85dd6a35..6860d6a12 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/gluten/GlutenFallbackSuite.scala @@ -23,7 +23,9 @@ import org.apache.gluten.utils.BackendTestUtils import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent} import org.apache.spark.sql.{GlutenSQLTestsTrait, Row} +import org.apache.spark.sql.execution.ProjectExec import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper +import org.apache.spark.sql.execution.aggregate.HashAggregateExec import org.apache.spark.sql.execution.ui.{GlutenSQLAppStatusStore, SparkListenerSQLExecutionStart} import org.apache.spark.status.ElementTrackingStore @@ -161,4 +163,67 @@ class GlutenFallbackSuite extends GlutenSQLTestsTrait with AdaptiveSparkPlanHelp } } } + + test("Add logical link to rewritten spark plan") { +val events = new ArrayBuffer[GlutenPlanFallbackEvent] +val listener = new SparkListener { + override def onOtherEvent(event: SparkListenerEvent): Unit = { +event match { + case e: GlutenPlanFallbackEvent => events.append(e) + case _ => +} + } +} +spark.sparkContext.addSparkListener(listener) +withSQLConf(GlutenConfig.EXPRESSION_BLACK_LIST.key -> "add") { + try { +val df = spark.sql("select sum(id + 1) from range(10)") +df.collect() +spark.sparkContext.listenerBus.waitUntilEmpty() +val project = find(df.queryExecution.executedPlan) { + _.isInstanceOf[ProjectExec] +} +assert(project.isDefined) +assert( + events.exists(_.fallbackNodeToReason.values.toSet +.exists(_.contains("Not supported to map spark function name" + } finally { +spark.sparkContext.removeSparkListener(listener) + } +} + } + + test("ExpandFallbackPolicy should pro
(incubator-gluten) branch main updated: [CORE] Move driver/executor endpoint to CH backend (#5914)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 1a0b96925 [CORE] Move driver/executor endpoint to CH backend (#5914) 1a0b96925 is described below commit 1a0b969250de57cf7cb6266484f755064058ce0e Author: Xiduo You AuthorDate: Fri May 31 09:28:14 2024 +0800 [CORE] Move driver/executor endpoint to CH backend (#5914) --- .../gluten/backendsapi/clickhouse/CHBackend.scala | 1 - .../backendsapi/clickhouse/CHBroadcastApi.scala| 45 -- .../backendsapi/clickhouse/CHListenerApi.scala | 16 ++-- .../backendsapi/clickhouse/CHTransformerApi.scala | 5 +++ .../execution/CHHashJoinExecTransformer.scala | 20 -- .../listener/CHGlutenSQLAppStatusListener.scala| 11 +- .../apache/spark/rpc/GlutenDriverEndpoint.scala| 0 .../apache/spark/rpc/GlutenExecutorEndpoint.scala | 11 -- .../org/apache/spark/rpc/GlutenRpcConstants.scala | 0 .../org/apache/spark/rpc/GlutenRpcMessages.scala | 0 .../gluten/backendsapi/velox/VeloxBackend.scala| 1 - .../backendsapi/velox/VeloxBroadcastApi.scala | 32 --- .../backendsapi/velox/VeloxListenerApi.scala | 12 +++--- .../backendsapi/velox/VeloxSparkPlanExecApi.scala | 2 +- .../ShuffledHashJoinExecTransformer.scala | 9 - ...oxBroadcastNestedLoopJoinExecTransformer.scala} | 13 +-- .../apache/gluten/utils/VeloxBloomFilterTest.java | 43 - gluten-core/pom.xml| 4 -- .../scala/org/apache/gluten/GlutenPlugin.scala | 8 +--- .../org/apache/gluten/backendsapi/Backend.scala| 2 - .../gluten/backendsapi/BackendsApiManager.scala| 4 -- .../apache/gluten/backendsapi/BroadcastApi.scala | 42 .../apache/gluten/backendsapi/ListenerApi.scala| 7 ++-- .../apache/gluten/backendsapi/TransformerApi.scala | 3 ++ .../BroadcastNestedLoopJoinExecTransformer.scala | 18 + .../gluten/execution/JoinExecTransformer.scala | 16 +--- .../spark/listener/GlutenListenerFactory.scala | 3 -- .../org/apache/spark/sql/GlutenQueryTest.scala | 4 +- 28 files changed, 131 insertions(+), 201 deletions(-) diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala index e5f68a869..c79d0aaee 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBackend.scala @@ -49,7 +49,6 @@ class CHBackend extends Backend { override def validatorApi(): ValidatorApi = new CHValidatorApi override def metricsApi(): MetricsApi = new CHMetricsApi override def listenerApi(): ListenerApi = new CHListenerApi - override def broadcastApi(): BroadcastApi = new CHBroadcastApi override def settings(): BackendSettingsApi = CHBackendSettings } diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBroadcastApi.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBroadcastApi.scala deleted file mode 100644 index d70ba6b8d..0 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHBroadcastApi.scala +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - *http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.gluten.backendsapi.clickhouse - -import org.apache.gluten.backendsapi.BroadcastApi -import org.apache.gluten.execution.CHBroadcastBuildSideCache - -import org.apache.spark.internal.Logging -import org.apache.spark.rpc.GlutenDriverEndpoint - -class CHBroadcastApi extends BroadcastApi with Logging { - override def cleanExecutionBroadcastTable( - executionId: String, - broadcastTableIds: java.util.Set[String]): Unit = { -if (broadcastTableIds != null) { - broadcastTableIds.forEach(
(incubator-gluten) branch main updated: [GLUTEN-5852][CH] fix mismatch result columns size exception (#5853)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new f10d3b7bd [GLUTEN-5852][CH] fix mismatch result columns size exception (#5853) f10d3b7bd is described below commit f10d3b7bdd02dc28f85bf25207789251a265fa67 Author: shuai.xu AuthorDate: Wed May 29 19:35:52 2024 +0800 [GLUTEN-5852][CH] fix mismatch result columns size exception (#5853) --- .../gluten/execution/GlutenClickhouseCountDistinctSuite.scala | 7 +++ .../main/scala/org/apache/gluten/utils/PullOutProjectHelper.scala | 7 ++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseCountDistinctSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseCountDistinctSuite.scala index 1b954df22..5887050d0 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseCountDistinctSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseCountDistinctSuite.scala @@ -213,4 +213,11 @@ class GlutenClickhouseCountDistinctSuite extends GlutenClickHouseWholeStageTrans } ) } + + test("GLUTEN-5852: Fix mismatch result columns size exception related to 5618") { +val sql = + "select distinct * from (select 2 as r3, count(distinct a, b, c), 2 as r1, 2 as r2 from " + +"values (0, null, 1), (1, 1, 1), (2, 2, 1), (1, 2, 1) ,(2, 2, 2) as data(a,b,c) group by c)" +compareResultsAgainstVanillaSpark(sql, true, { _ => }) + } } diff --git a/gluten-core/src/main/scala/org/apache/gluten/utils/PullOutProjectHelper.scala b/gluten-core/src/main/scala/org/apache/gluten/utils/PullOutProjectHelper.scala index 824694837..505f13f26 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/utils/PullOutProjectHelper.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/utils/PullOutProjectHelper.scala @@ -61,7 +61,12 @@ trait PullOutProjectHelper { replaceBoundReference: Boolean = false): Expression = expr match { case alias: Alias => -projectExprsMap.getOrElseUpdate(alias.child.canonicalized, alias).toAttribute +alias.child match { + case _: Literal => +projectExprsMap.getOrElseUpdate(alias, alias).toAttribute + case _ => +projectExprsMap.getOrElseUpdate(alias.child.canonicalized, alias).toAttribute +} case attr: Attribute => attr case e: BoundReference if !replaceBoundReference => e case other => - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [VL] Fix build error (#5891)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 63db78931 [VL] Fix build error (#5891) 63db78931 is described below commit 63db789311a2ee65500b3011592e62cc026f04e8 Author: Zhen Li <10524738+zhli1142...@users.noreply.github.com> AuthorDate: Tue May 28 16:08:16 2024 +0800 [VL] Fix build error (#5891) --- .../scala/org/apache/gluten/execution/SampleExecTransformer.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gluten-core/src/main/scala/org/apache/gluten/execution/SampleExecTransformer.scala b/gluten-core/src/main/scala/org/apache/gluten/execution/SampleExecTransformer.scala index 86189392a..6f9ef3428 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/execution/SampleExecTransformer.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/execution/SampleExecTransformer.scala @@ -112,8 +112,8 @@ case class SampleExecTransformer( doNativeValidation(substraitContext, relNode) } - override def doTransform(context: SubstraitContext): TransformContext = { -val childCtx = child.asInstanceOf[TransformSupport].doTransform(context) + override protected def doTransform(context: SubstraitContext): TransformContext = { +val childCtx = child.asInstanceOf[TransformSupport].transform(context) val operatorId = context.nextOperatorId(this.nodeName) val currRel = getRelNode(context, condition, child.output, operatorId, childCtx.root, validation = false) - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [CORE] Only materialize subquery before doing transform (#5862)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 761680349 [CORE] Only materialize subquery before doing transform (#5862) 761680349 is described below commit 76168034983bb4bba055e7e5940d3558880ea3a8 Author: Xiduo You AuthorDate: Tue May 28 15:46:35 2024 +0800 [CORE] Only materialize subquery before doing transform (#5862) We transform subquery(e.g., dpp) during columanr rules which is not actually been executed, so we should not materialize subquery when replacing expression as it is not in concurrent. This pr wraps doTransform with transform to always do materialize subquery before doTransform, so that the subquries can be submitted in concurrent. --- .../execution/CHHashAggregateExecTransformer.scala | 4 +- .../extension/GlutenCustomAggExpressionSuite.scala | 2 +- .../benchmarks/CHParquetReadBenchmark.scala| 2 +- .../execution/HashAggregateExecTransformer.scala | 4 +- .../apache/gluten/execution/TopNTransformer.scala | 4 +- .../BasicPhysicalOperatorTransformer.scala | 6 +- .../execution/BasicScanExecTransformer.scala | 6 +- .../BroadcastNestedLoopJoinExecTransformer.scala | 6 +- .../CartesianProductExecTransformer.scala | 6 +- .../gluten/execution/ExpandExecTransformer.scala | 4 +- .../execution/GenerateExecTransformerBase.scala| 4 +- .../gluten/execution/JoinExecTransformer.scala | 6 +- .../apache/gluten/execution/LimitTransformer.scala | 4 +- .../gluten/execution/SortExecTransformer.scala | 4 +- .../execution/SortMergeJoinExecTransformer.scala | 6 +- .../gluten/execution/WholeStageTransformer.scala | 60 +- .../gluten/execution/WindowExecTransformer.scala | 4 +- .../WindowGroupLimitExecTransformer.scala | 4 +- .../execution/WriteFilesExecTransformer.scala | 4 +- .../gluten/expression/ExpressionConverter.scala| 21 +- .../expression/ScalarSubqueryTransformer.scala | 21 +- .../columnar/enumerated/RemoveFilter.scala | 4 +- .../ColumnarCollapseTransformStages.scala | 2 +- .../python/EvalPythonExecTransformer.scala | 4 +- .../scalar-subquery/scalar-subquery-select.sql | 363 -- .../scalar-subquery/scalar-subquery-select.sql.out | 791 - .../utils/velox/VeloxSQLQueryTestSettings.scala| 6 +- .../gluten/utils/velox/VeloxTestSettings.scala | 2 - .../errors/GlutenQueryExecutionErrorsSuite.scala | 11 - .../scalar-subquery/scalar-subquery-select.sql | 257 --- .../scalar-subquery/scalar-subquery-select.sql.out | 614 .../utils/velox/VeloxSQLQueryTestSettings.scala| 6 +- .../gluten/utils/velox/VeloxTestSettings.scala | 2 - .../errors/GlutenQueryExecutionErrorsSuite.scala | 11 - 34 files changed, 88 insertions(+), 2167 deletions(-) diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHHashAggregateExecTransformer.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHHashAggregateExecTransformer.scala index 4a4d345db..7e6888143 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHHashAggregateExecTransformer.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHHashAggregateExecTransformer.scala @@ -81,8 +81,8 @@ case class CHHashAggregateExecTransformer( } } - override def doTransform(context: SubstraitContext): TransformContext = { -val childCtx = child.asInstanceOf[TransformSupport].doTransform(context) + override protected def doTransform(context: SubstraitContext): TransformContext = { +val childCtx = child.asInstanceOf[TransformSupport].transform(context) val operatorId = context.nextOperatorId(this.nodeName) val aggParams = new AggregationParams diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/extension/GlutenCustomAggExpressionSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/extension/GlutenCustomAggExpressionSuite.scala index 3a2808e70..ba7d2c8f1 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/extension/GlutenCustomAggExpressionSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/extension/GlutenCustomAggExpressionSuite.scala @@ -91,7 +91,7 @@ class GlutenCustomAggExpressionSuite extends GlutenClickHouseTPCHAbstractSuite { assert(planExecs(3).isInstanceOf[HashAggregateExec]) val substraitContext = new SubstraitContext - planExecs(2).asInstanceOf[CHHashAggregateExecTransformer].doTransform(substraitContext) + planExecs(2).asInstanceOf[CHHashAggregateExecTransformer].transform(substraitContext) // Check the functions assert
(incubator-gluten) branch main updated: [CORE] Refactor ExpressionTransformer (#5796)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 683232b74 [CORE] Refactor ExpressionTransformer (#5796) 683232b74 is described below commit 683232b746263ac6d720a4bf3c61474fe3d39e1a Author: Xiduo You AuthorDate: Tue May 21 15:18:08 2024 +0800 [CORE] Refactor ExpressionTransformer (#5796) --- .../clickhouse/CHSparkPlanExecApi.scala| 20 +++ .../backendsapi/clickhouse/CHTransformerApi.scala | 12 -- .../expression/CHExpressionTransformer.scala | 44 +++--- .../backendsapi/velox/VeloxSparkPlanExecApi.scala | 26 +++- .../backendsapi/velox/VeloxTransformerApi.scala| 13 -- .../gluten/expression/ExpressionTransformer.scala | 48 +++--- .../apache/spark/sql/expression/UDFResolver.scala | 25 +--- .../gluten/backendsapi/SparkPlanExecApi.scala | 79 +++--- .../apache/gluten/backendsapi/TransformerApi.scala | 7 - .../expression/ArrayExpressionTransformer.scala| 21 +-- .../expression/BoundReferenceTransformer.scala | 29 .../gluten/expression/ConditionalTransformer.scala | 13 +- .../DateTimeExpressionsTransformer.scala | 164 +++- .../expression/DecimalRoundTransformer.scala | 27 +--- .../gluten/expression/ExpressionConverter.scala| 147 +++--- .../gluten/expression/ExpressionTransformer.scala | 71 - .../expression/GenericExpressionTransformer.scala | 46 -- .../expression/HashExpressionTransformer.scala | 44 -- .../JsonTupleExpressionTransformer.scala | 2 +- .../expression/LambdaFunctionTransformer.scala | 22 +-- .../gluten/expression/LiteralTransformer.scala | 28 .../expression/MapExpressionTransformer.scala | 45 +- .../expression/NamedExpressionsTransformer.scala | 58 ++- .../PredicateExpressionTransformer.scala | 82 ++ .../expression/ScalarSubqueryTransformer.scala | 6 +- .../expression/StringExpressionTransformer.scala | 49 -- .../expression/StructExpressionTransformer.scala | 54 --- .../expression/TimestampAddTransformer.scala | 53 --- .../expression/UnaryExpressionTransformer.scala| 166 - .../extension/CustomerExpressionTransformer.scala | 26 +--- 30 files changed, 331 insertions(+), 1096 deletions(-) diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala index d6e323679..45f90719f 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHSparkPlanExecApi.scala @@ -627,6 +627,15 @@ class CHSparkPlanExecApi extends SparkPlanExecApi { CHSizeExpressionTransformer(substraitExprName, child, original) } + override def genLikeTransformer( + substraitExprName: String, + left: ExpressionTransformer, + right: ExpressionTransformer, + original: Like): ExpressionTransformer = { +// CH backend does not support escapeChar, so skip it here. +GenericExpressionTransformer(substraitExprName, Seq(left, right), original) + } + /** Generate an ExpressionTransformer to transform TruncTimestamp expression for CH. */ override def genTruncTimestampTransformer( substraitExprName: String, @@ -637,6 +646,17 @@ class CHSparkPlanExecApi extends SparkPlanExecApi { CHTruncTimestampTransformer(substraitExprName, format, timestamp, timeZoneId, original) } + override def genDateDiffTransformer( + substraitExprName: String, + endDate: ExpressionTransformer, + startDate: ExpressionTransformer, + original: DateDiff): ExpressionTransformer = { +GenericExpressionTransformer( + substraitExprName, + Seq(LiteralTransformer("day"), startDate, endDate), + original) + } + override def genPosExplodeTransformer( substraitExprName: String, child: ExpressionTransformer, diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHTransformerApi.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHTransformerApi.scala index ee46d685c..c75cf4788 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHTransformerApi.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHTransformerApi.scala @@ -203,18 +203,6 @@ class CHTransformerApi extends TransformerApi with Logging { } - override def createDateDiffParamList( - start: ExpressionNode, - end: ExpressionNode): Iterable[ExpressionNode] = {
(incubator-gluten) branch main updated: [VL][DOC] Update udf doc (#5814)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new acf3e6de2 [VL][DOC] Update udf doc (#5814) acf3e6de2 is described below commit acf3e6de24cf7b858d2a3b4f9f7e93824d4f4c86 Author: Rong Ma AuthorDate: Tue May 21 11:01:36 2024 +0800 [VL][DOC] Update udf doc (#5814) Update the compile command for build native udf library. --- docs/developers/VeloxNativeUDF.md | 23 --- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/docs/developers/VeloxNativeUDF.md b/docs/developers/VeloxNativeUDF.md index 13e0a97d1..b95190593 100644 --- a/docs/developers/VeloxNativeUDF.md +++ b/docs/developers/VeloxNativeUDF.md @@ -137,7 +137,16 @@ You can also specify the local or HDFS URIs to the UDF libraries or archives. Lo ## Try the example We provided Velox UDF examples in file [MyUDF.cc](../../cpp/velox/udf/examples/MyUDF.cc) and UDAF examples in file [MyUDAF.cc](../../cpp/velox/udf/examples/MyUDAF.cc). -After building gluten cpp, you can find the example libraries at /path/to/gluten/cpp/build/velox/udf/examples/ +You need to build the gluten cpp project with `--build_example=ON` to get the example libraries. + +```shell +## compile Gluten cpp module +cd /path/to/gluten/cpp +## if you use custom velox_home, make sure specified here by --velox_home +./compile.sh --build_velox_backend=ON --build_examples=ON +``` + +Then, you can find the example libraries at /path/to/gluten/cpp/build/velox/udf/examples/ Start spark-shell or spark-sql with below configuration @@ -157,16 +166,16 @@ or Run query. The functions `myudf1` and `myudf2` increment the input value by a constant of 5 ``` -select myudf1(1), myudf2(100L) +select myudf1(100L), myudf2(1) ``` The output from spark-shell will be like ``` -++--+ -|udfexpression(1)|udfexpression(100)| -++--+ -| 6| 105| -++--+ ++--++ +|udfexpression(100)|udfexpression(1)| ++--++ +| 105| 6| ++--++ ``` - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [GLUTEN-5438] feat: Dynamically sizing off-heap memory (#5439)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new db8496bdb [GLUTEN-5438] feat: Dynamically sizing off-heap memory (#5439) db8496bdb is described below commit db8496bdb16750b424d351c2fd23d831f4af769c Author: Marcus Markiewicz <43656407+supermem...@users.noreply.github.com> AuthorDate: Thu May 16 23:09:02 2024 -0400 [GLUTEN-5438] feat: Dynamically sizing off-heap memory (#5439) ## What changes were proposed in this pull request? Today, in Spark we specify the on-heap and off-heap memory sizes as a configuration value read at the beginning of executing a job. With this change, we are exposing a new feature that is enabled with a new spark.gluten.memory.dynamic.offHeap.sizing.enabled setting. When this setting is configured to true, the offheap setting will be ignored in Gluten and we will size the offheap as the same size as the spark.executor.memory setting. We will then proceed to enforcing a total memory quota, calculated by the sum of what memory is committed and in use in the Java heap (calculated with Runtime.getRuntime().totalMemory() - Runtime.GetRuntime().freeMemory()) plus the tracked off-heap memory in TreeMemoryConsumer. When there is an allocation that would tide us over this total amount of committed memory, we will fail the allocation and trigger an OOM. Note that with this change, we perform the "quota check" when an allocation in the native engine is informed to Gluten. In practice, this means that it is possible that the Java codebase can oversubscribe memory as it allocates, which is under the on-heap quota, although there is enough off-heap usage where we should fail the allocation. A test exercising this setting is part of this change. Fixes: #5438 ## How was this patch tested? Manual testing with Spark and included test --- .../execution/DynamicOffHeapSizingTest.scala | 60 ++ .../DynamicOffHeapSizingMemoryTarget.java | 95 ++ .../memory/memtarget/MemoryTargetVisitor.java | 2 + .../gluten/memory/memtarget/MemoryTargets.java | 11 ++- .../memory/memtarget/ThrowOnOomMemoryTarget.java | 19 - .../scala/org/apache/gluten/GlutenPlugin.scala | 78 +++--- .../org/apache/spark/memory/SparkMemoryUtil.scala | 7 +- .../scala/org/apache/gluten/GlutenConfig.scala | 36 8 files changed, 291 insertions(+), 17 deletions(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/DynamicOffHeapSizingTest.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/DynamicOffHeapSizingTest.scala new file mode 100644 index 0..56fc6eac3 --- /dev/null +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/DynamicOffHeapSizingTest.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.gluten.execution + +import org.apache.gluten.benchmarks.RandomParquetDataGenerator +import org.apache.gluten.tags.SkipTestTags + +import org.apache.spark.SparkConf + +@SkipTestTags +class DynamicOffHeapSizingTest extends VeloxWholeStageTransformerSuite { + override protected val resourcePath: String = "/tpch-data-parquet-velox" + override protected val fileFormat: String = "parquet" + + private val dataGenerator = RandomParquetDataGenerator(System.currentTimeMillis()) + private val outputPath = getClass.getResource("/").getPath + "dynamicoffheapsizing_output.parquet" + private val AGG_SQL = +"""select f_1, count(DISTINCT f_1) + |from tbl group + |group by 1""".stripMargin + + override def beforeAll(): Unit = { +super.beforeAll() + } + override protected def sparkConf: SparkConf = { +super.sparkConf + .set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.ColumnarShuffleManager") + .se
(incubator-gluten) branch main updated: [GLUTEN-4652][VL] Fix min_by/max_by result mismatch when RDD partition num > 1 (#5711)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 33f993554 [GLUTEN-4652][VL] Fix min_by/max_by result mismatch when RDD partition num > 1 (#5711) 33f993554 is described below commit 33f993554bebc388c7011dd91b86eaadc729f0d5 Author: zhouyifan279 <88070094+zhouyifan...@users.noreply.github.com> AuthorDate: Mon May 13 19:03:41 2024 +0800 [GLUTEN-4652][VL] Fix min_by/max_by result mismatch when RDD partition num > 1 (#5711) --- .../execution/VeloxAggregateFunctionsSuite.scala | 18 --- .../functions/RegistrationAllFunctions.cc | 23 -- .../functions/RowConstructorWithAllNull.h | 37 -- .../operators/functions/RowConstructorWithNull.cc | 10 +- .../operators/functions/RowConstructorWithNull.h | 8 + 5 files changed, 28 insertions(+), 68 deletions(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala index 398f5e05e..faa361edf 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala @@ -194,18 +194,12 @@ abstract class VeloxAggregateFunctionsSuite extends VeloxWholeStageTransformerSu } test("min_by/max_by") { -withTempPath { - path => -Seq((5: Integer, 6: Integer), (null: Integer, 11: Integer), (null: Integer, 5: Integer)) - .toDF("a", "b") - .write - .parquet(path.getCanonicalPath) -spark.read - .parquet(path.getCanonicalPath) - .createOrReplaceTempView("test") -runQueryAndCompare("select min_by(a, b), max_by(a, b) from test") { - checkGlutenOperatorMatch[HashAggregateExecTransformer] -} +withSQLConf(("spark.sql.leafNodeDefaultParallelism", "2")) { + runQueryAndCompare( +"select min_by(a, b), max_by(a, b) from " + + "values (5, 6), (null, 11), (null, 5) test(a, b)") { +checkGlutenOperatorMatch[HashAggregateExecTransformer] + } } } diff --git a/cpp/velox/operators/functions/RegistrationAllFunctions.cc b/cpp/velox/operators/functions/RegistrationAllFunctions.cc index c77fa47e5..5a6b0f6aa 100644 --- a/cpp/velox/operators/functions/RegistrationAllFunctions.cc +++ b/cpp/velox/operators/functions/RegistrationAllFunctions.cc @@ -15,11 +15,10 @@ * limitations under the License. */ #include "operators/functions/RegistrationAllFunctions.h" + #include "operators/functions/Arithmetic.h" -#include "operators/functions/RowConstructorWithAllNull.h" #include "operators/functions/RowConstructorWithNull.h" #include "operators/functions/RowFunctionWithNull.h" - #include "velox/expression/SpecialFormRegistry.h" #include "velox/expression/VectorFunction.h" #include "velox/functions/lib/RegistrationHelpers.h" @@ -45,29 +44,32 @@ void registerFunctionOverwrite() { velox::registerFunction({"round"}); velox::registerFunction({"round"}); + auto kRowConstructorWithNull = RowConstructorWithNullCallToSpecialForm::kRowConstructorWithNull; velox::exec::registerVectorFunction( - "row_constructor_with_null", + kRowConstructorWithNull, std::vector>{}, std::make_unique>(), RowFunctionWithNull::metadata()); velox::exec::registerFunctionCallToSpecialForm( - RowConstructorWithNullCallToSpecialForm::kRowConstructorWithNull, - std::make_unique()); + kRowConstructorWithNull, std::make_unique(kRowConstructorWithNull)); + + auto kRowConstructorWithAllNull = RowConstructorWithNullCallToSpecialForm::kRowConstructorWithAllNull; velox::exec::registerVectorFunction( - "row_constructor_with_all_null", + kRowConstructorWithAllNull, std::vector>{}, std::make_unique>(), RowFunctionWithNull::metadata()); velox::exec::registerFunctionCallToSpecialForm( - RowConstructorWithAllNullCallToSpecialForm::kRowConstructorWithAllNull, - std::make_unique()); + kRowConstructorWithAllNull, + std::make_unique(kRowConstructorWithAllNull)); velox::functions::sparksql::registerBitwiseFunctions("spark_"); } } // namespace void registerAllFunctions() { // The registration order matters. Spark sql functions are registered after - // presto sql functions to overwrite the registration for same named functions. + // presto sql functions to overwrite the
(incubator-gluten) branch main updated: [BUILD] Remove duplicated arrow-dataset dependency from gluten-data/pom.xml (#5703)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 7324ffe22 [BUILD] Remove duplicated arrow-dataset dependency from gluten-data/pom.xml (#5703) 7324ffe22 is described below commit 7324ffe221ee8aa2ba0985011e1a4146d0c9995a Author: zhouyifan279 <88070094+zhouyifan...@users.noreply.github.com> AuthorDate: Mon May 13 12:54:32 2024 +0800 [BUILD] Remove duplicated arrow-dataset dependency from gluten-data/pom.xml (#5703) --- gluten-data/pom.xml | 28 1 file changed, 28 deletions(-) diff --git a/gluten-data/pom.xml b/gluten-data/pom.xml index 1e4438b84..bb84a06b4 100644 --- a/gluten-data/pom.xml +++ b/gluten-data/pom.xml @@ -165,34 +165,6 @@ compile - - org.apache.arrow - arrow-dataset - ${arrow.version} - - - io.netty - netty-common - - - io.netty - netty-buffer - - - com.fasterxml.jackson.core - jackson-core - - - com.fasterxml.jackson.core - jackson-annotations - - - protobuf-java - com.google.protobuf - - - compile - org.apache.hadoop hadoop-common - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [VL] Add InsertIntoHadoopFsRelationCommand test case for csv format (#5681)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 34ea806b6 [VL] Add InsertIntoHadoopFsRelationCommand test case for csv format (#5681) 34ea806b6 is described below commit 34ea806b67c9f0d8692cb105b6f23c25ef202a7f Author: Joey AuthorDate: Sat May 11 09:21:34 2024 +0800 [VL] Add InsertIntoHadoopFsRelationCommand test case for csv format (#5681) --- .../org/apache/gluten/execution/TestOperator.scala | 17 + .../gluten/execution/WholeStageTransformerSuite.scala | 4 +++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala index 920b8e2bd..b69223be1 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala @@ -515,6 +515,23 @@ class TestOperator extends VeloxWholeStageTransformerSuite { } } + test("insert into select from csv") { +withTable("insert_csv_t") { + val filePath = rootPath + "/datasource/csv/student.csv" + val df = spark.read +.format("csv") +.option("header", "true") +.load(filePath) + df.createOrReplaceTempView("student") + spark.sql("create table insert_csv_t(Name string, Language string) using parquet;") + runQueryAndCompare(""" + |insert into insert_csv_t select * from student; + |""".stripMargin) { +checkGlutenOperatorMatch[ArrowFileSourceScanExec] + } +} + } + test("test OneRowRelation") { val df = sql("SELECT 1") checkAnswer(df, Row(1)) diff --git a/gluten-core/src/test/scala/org/apache/gluten/execution/WholeStageTransformerSuite.scala b/gluten-core/src/test/scala/org/apache/gluten/execution/WholeStageTransformerSuite.scala index 67c12a6f7..c52002b68 100644 --- a/gluten-core/src/test/scala/org/apache/gluten/execution/WholeStageTransformerSuite.scala +++ b/gluten-core/src/test/scala/org/apache/gluten/execution/WholeStageTransformerSuite.scala @@ -22,7 +22,7 @@ import org.apache.gluten.utils.{Arm, FallbackUtil} import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.sql.{DataFrame, GlutenQueryTest, Row} -import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.execution.{CommandResultExec, SparkPlan} import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, AdaptiveSparkPlanHelper, ShuffleQueryStageExec} import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.DoubleType @@ -222,6 +222,8 @@ abstract class WholeStageTransformerSuite df.queryExecution.executedPlan match { case exec: AdaptiveSparkPlanExec => getChildrenPlan(Seq(exec.executedPlan)) + case cmd: CommandResultExec => +getChildrenPlan(Seq(cmd.commandPhysicalPlan)) case plan => getChildrenPlan(Seq(plan)) } - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [VL] Add more metrics for generate (#5608)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 939f929af [VL] Add more metrics for generate (#5608) 939f929af is described below commit 939f929afe9db02e9a342fd8fddf0e43c359ebfc Author: Xiduo You AuthorDate: Mon May 6 16:05:59 2024 +0800 [VL] Add more metrics for generate (#5608) --- .../apache/gluten/execution/GenerateExecTransformer.scala | 12 +++- .../org/apache/gluten/execution/VeloxMetricsSuite.scala | 15 +++ .../apache/gluten/metrics/GenerateMetricsUpdater.scala| 6 ++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/backends-velox/src/main/scala/org/apache/gluten/execution/GenerateExecTransformer.scala b/backends-velox/src/main/scala/org/apache/gluten/execution/GenerateExecTransformer.scala index 08d2937b5..830fe396b 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/execution/GenerateExecTransformer.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/execution/GenerateExecTransformer.scala @@ -51,7 +51,17 @@ case class GenerateExecTransformer( @transient override lazy val metrics = -Map("numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) +Map( + "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"), + "numOutputVectors" -> SQLMetrics.createMetric(sparkContext, "number of output vectors"), + "numOutputBytes" -> SQLMetrics.createSizeMetric(sparkContext, "number of output bytes"), + "wallNanos" -> SQLMetrics.createNanoTimingMetric(sparkContext, "totaltime of generate"), + "cpuCount" -> SQLMetrics.createMetric(sparkContext, "cpu wall time count"), + "peakMemoryBytes" -> SQLMetrics.createSizeMetric(sparkContext, "peak memory bytes"), + "numMemoryAllocations" -> SQLMetrics.createMetric( +sparkContext, +"number of memory allocations") +) override def metricsUpdater(): MetricsUpdater = new GenerateMetricsUpdater(metrics) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxMetricsSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxMetricsSuite.scala index ac9911bba..ce8450fea 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxMetricsSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxMetricsSuite.scala @@ -128,6 +128,21 @@ class VeloxMetricsSuite extends VeloxWholeStageTransformerSuite with AdaptiveSpa } } + test("Generate metrics") { +runQueryAndCompare("SELECT explode(array(c1, c2, 1)) FROM metrics_t1") { + df => +val generate = find(df.queryExecution.executedPlan) { + case _: GenerateExecTransformer => true + case _ => false +} +assert(generate.isDefined) +val metrics = generate.get.metrics +assert(metrics("numOutputRows").value == 300) +assert(metrics("numOutputVectors").value > 0) +assert(metrics("numOutputBytes").value > 0) +} + } + test("Write metrics") { if (SparkShimLoader.getSparkVersion.startsWith("3.4")) { withSQLConf(("spark.gluten.sql.native.writer.enabled", "true")) { diff --git a/gluten-data/src/main/scala/org/apache/gluten/metrics/GenerateMetricsUpdater.scala b/gluten-data/src/main/scala/org/apache/gluten/metrics/GenerateMetricsUpdater.scala index 670fd1c4d..0a3dccd64 100644 --- a/gluten-data/src/main/scala/org/apache/gluten/metrics/GenerateMetricsUpdater.scala +++ b/gluten-data/src/main/scala/org/apache/gluten/metrics/GenerateMetricsUpdater.scala @@ -23,6 +23,12 @@ class GenerateMetricsUpdater(val metrics: Map[String, SQLMetric]) extends Metric if (operatorMetrics != null) { val nativeMetrics = operatorMetrics.asInstanceOf[OperatorMetrics] metrics("numOutputRows") += nativeMetrics.outputRows + metrics("numOutputVectors") += nativeMetrics.outputVectors + metrics("numOutputBytes") += nativeMetrics.outputBytes + metrics("cpuCount") += nativeMetrics.cpuCount + metrics("wallNanos") += nativeMetrics.wallNanos + metrics("peakMemoryBytes") += nativeMetrics.peakMemoryBytes + metrics("numMemoryAllocations") += nativeMetrics.numMemoryAllocations } } } - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [CORE] Fix gluten createOptional config contains Some (#5573)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 09974d3a4 [CORE] Fix gluten createOptional config contains Some (#5573) 09974d3a4 is described below commit 09974d3a46278759488a737ad8d929d2a31559dc Author: Xiduo You AuthorDate: Tue Apr 30 14:15:00 2024 +0800 [CORE] Fix gluten createOptional config contains Some (#5573) --- .../sql/execution/VeloxParquetReadSuite.scala | 8 +++ .../spark/sql/internal/GlutenConfigUtil.scala | 26 +- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetReadSuite.scala b/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetReadSuite.scala index d5828c738..cb3eeaec6 100644 --- a/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetReadSuite.scala +++ b/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetReadSuite.scala @@ -16,14 +16,22 @@ */ package org.apache.spark.sql.execution +import org.apache.gluten.GlutenConfig import org.apache.gluten.execution.{BasicScanExecTransformer, VeloxWholeStageTransformerSuite} +import org.apache.spark.SparkConf + import java.io.File class VeloxParquetReadSuite extends VeloxWholeStageTransformerSuite { override protected val resourcePath: String = "/parquet-for-read" override protected val fileFormat: String = "parquet" + override protected def sparkConf: SparkConf = { +super.sparkConf + .set(GlutenConfig.LOAD_QUANTUM.key, "128m") + } + testWithSpecifiedSparkVersion("read example parquet files", Some("3.5"), Some("3.5")) { withTable("test_table") { val dir = new File(getClass.getResource(resourcePath).getFile) diff --git a/shims/common/src/main/scala/org/apache/spark/sql/internal/GlutenConfigUtil.scala b/shims/common/src/main/scala/org/apache/spark/sql/internal/GlutenConfigUtil.scala index babb446d0..1a45572ac 100644 --- a/shims/common/src/main/scala/org/apache/spark/sql/internal/GlutenConfigUtil.scala +++ b/shims/common/src/main/scala/org/apache/spark/sql/internal/GlutenConfigUtil.scala @@ -21,11 +21,27 @@ import org.apache.spark.internal.config.ConfigReader import scala.collection.JavaConverters._ object GlutenConfigUtil { + private def getConfString(reader: ConfigReader, key: String, value: String): String = { +Option(SQLConf.getConfigEntry(key)) + .map { +_.readFrom(reader) match { + case o: Option[_] => o.map(_.toString).getOrElse(value) + case null => value + case v => v.toString +} + } + .getOrElse(value) + } + def parseConfig(conf: Map[String, String]): Map[String, String] = { -val reader = new ConfigReader(conf.filter(_._1.contains("spark.gluten.")).asJava) -val glutenConfigEntries = - SQLConf.getConfigEntries().asScala.filter(e => e.key.contains("spark.gluten.")) -val glutenConfig = glutenConfigEntries.map(e => (e.key, e.readFrom(reader).toString)).toMap -conf.map(e => (e._1, glutenConfig.getOrElse(e._1, e._2))) +val reader = new ConfigReader(conf.filter(_._1.startsWith("spark.gluten.")).asJava) +conf.map { + case (k, v) => +if (k.startsWith("spark.gluten.")) { + (k, getConfString(reader, k, v)) +} else { + (k, v) +} +}.toMap } } - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [CORE] Reuse broadcast exchange for different build keys with same table (#5563)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 7dad958e4 [CORE] Reuse broadcast exchange for different build keys with same table (#5563) 7dad958e4 is described below commit 7dad958e4e87cde85683ae03dddafd84c6f7408a Author: Xiduo You AuthorDate: Mon Apr 29 12:56:48 2024 +0800 [CORE] Reuse broadcast exchange for different build keys with same table (#5563) Vanilla Spark build HashRelation at driver side, so it is build keys sensitive. But we broadcast byte array and build HashRelation at executor side, the build keys are actually meaningless for the broadcast value. This pr erases the HashedRelationBroadcastMode build keys when do canonicalize. This change allows us reuse broadcast exchange for different build keys with same table. --- .../backendsapi/velox/VeloxSparkPlanExecApi.scala | 15 - .../gluten/execution/VeloxHashJoinSuite.scala | 39 +- .../gluten/backendsapi/SparkPlanExecApi.scala | 4 +++ .../execution/ColumnarBroadcastExchangeExec.scala | 4 ++- 4 files changed, 59 insertions(+), 3 deletions(-) diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala index 132b5d7dd..8318ac2d5 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala @@ -50,7 +50,7 @@ import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution._ import org.apache.spark.sql.execution.datasources.FileFormat import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ShuffleExchangeExec} -import org.apache.spark.sql.execution.joins.BuildSideRelation +import org.apache.spark.sql.execution.joins.{BuildSideRelation, HashedRelationBroadcastMode} import org.apache.spark.sql.execution.metric.SQLMetric import org.apache.spark.sql.execution.utils.ExecUtil import org.apache.spark.sql.expression.{UDFExpression, UDFResolver, UserDefinedAggregateFunction} @@ -577,6 +577,19 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi { ColumnarBuildSideRelation(child.output, serialized.map(_.getSerialized)) } + override def doCanonicalizeForBroadcastMode(mode: BroadcastMode): BroadcastMode = { +mode match { + case hash: HashedRelationBroadcastMode => +// Node: It's different with vanilla Spark. +// Vanilla Spark build HashRelation at driver side, so it is build keys sensitive. +// But we broadcast byte array and build HashRelation at executor side, +// the build keys are actually meaningless for the broadcast value. +// This change allows us reuse broadcast exchange for different build keys with same table. +hash.copy(key = Seq.empty) + case _ => mode.canonicalized +} + } + /** * * Expressions. */ diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxHashJoinSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxHashJoinSuite.scala index 9239f90e1..4c1baf856 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxHashJoinSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxHashJoinSuite.scala @@ -19,7 +19,9 @@ package org.apache.gluten.execution import org.apache.gluten.sql.shims.SparkShimLoader import org.apache.spark.SparkConf -import org.apache.spark.sql.execution.InputIteratorTransformer +import org.apache.spark.sql.Row +import org.apache.spark.sql.execution.{ColumnarBroadcastExchangeExec, InputIteratorTransformer} +import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExchangeExec} class VeloxHashJoinSuite extends VeloxWholeStageTransformerSuite { override protected val resourcePath: String = "/tpch-data-parquet-velox" @@ -107,4 +109,39 @@ class VeloxHashJoinSuite extends VeloxWholeStageTransformerSuite { } } } + + test("Reuse broadcast exchange for different build keys with same table") { +withTable("t1", "t2") { + spark.sql(""" + |CREATE TABLE t1 USING PARQUET + |AS SELECT id as c1, id as c2 FROM range(10) + |""".stripMargin) + + spark.sql(""" + |CREATE TABLE t2 USING PARQUET + |AS SELECT id as c1, id as c2 FROM range(3) + |""".stripMargin) + + val df = spark.sql(""" + |SELECT * FROM t1 + |
(incubator-gluten) branch main updated: [CORE] Fix delta.package.name error (#5564)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new aeae0a693 [CORE] Fix delta.package.name error (#5564) aeae0a693 is described below commit aeae0a693850ac9dd3c351952b607abc1ae0c1ae Author: Xiduo You AuthorDate: Mon Apr 29 09:14:50 2024 +0800 [CORE] Fix delta.package.name error (#5564) --- backends-clickhouse/pom.xml| 2 +- gluten-celeborn/clickhouse/pom.xml | 2 +- pom.xml| 20 ++-- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/backends-clickhouse/pom.xml b/backends-clickhouse/pom.xml index 76e184f38..94df2f36f 100644 --- a/backends-clickhouse/pom.xml +++ b/backends-clickhouse/pom.xml @@ -94,7 +94,7 @@ io.delta - delta-core_${scala.binary.version} + ${delta.package.name}_${scala.binary.version} provided diff --git a/gluten-celeborn/clickhouse/pom.xml b/gluten-celeborn/clickhouse/pom.xml index d22a37c81..74b81031f 100755 --- a/gluten-celeborn/clickhouse/pom.xml +++ b/gluten-celeborn/clickhouse/pom.xml @@ -112,7 +112,7 @@ io.delta - delta-core_${scala.binary.version} + ${delta.package.name}_${scala.binary.version} test diff --git a/pom.xml b/pom.xml index f666de874..dbf46ac17 100644 --- a/pom.xml +++ b/pom.xml @@ -139,8 +139,8 @@ spark32 spark-sql-columnar-shims-spark32 3.2.2 - 1.3.1 - delta-core +1.3.1 +delta-core 2.0.1 20 @@ -153,7 +153,7 @@ spark-sql-columnar-shims-spark33 3.3.1 - 1.3.1 +1.3.1 delta-core 2.2.0 22 @@ -166,8 +166,8 @@ spark34 spark-sql-columnar-shims-spark34 3.4.2 - 1.5.0 - delta-core +1.5.0 +delta-core 2.4.0 24 @@ -179,11 +179,11 @@ spark35 spark-sql-columnar-shims-spark35 3.5.1 - 1.5.0 - delta-spark +1.5.0 +delta-spark 3.1.0 - 31 - 2.15.1 +31 +2.15.1 3.3.4 @@ -494,7 +494,7 @@ io.delta - ${delta.package.name}_${scala.binary.version} +${delta.package.name}_${scala.binary.version} ${delta.version} provided - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [VL] Allow user to specify os to load corresponding third-party libraries (#5549)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new ec6db12dd [VL] Allow user to specify os to load corresponding third-party libraries (#5549) ec6db12dd is described below commit ec6db12dd9227d2154be9be71e6607237ded45df Author: Xiduo You AuthorDate: Fri Apr 26 18:24:21 2024 +0800 [VL] Allow user to specify os to load corresponding third-party libraries (#5549) ## What changes were proposed in this pull request? Some system OS are derived from Centos or Ubuntu and it's hard to add it to codebase one by one. This pr adds new configs to specify system os and version manually. - spark.gluten.loadLibOS - spark.gluten.loadLibOSVersion ## How was this patch tested? N/A --- .../backendsapi/velox/VeloxListenerApi.scala | 48 +++--- docs/Configuration.md | 2 + .../scala/org/apache/gluten/GlutenConfig.scala | 2 + 3 files changed, 37 insertions(+), 15 deletions(-) diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala index 9f1cde2be..1eaf92b5a 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxListenerApi.scala @@ -60,19 +60,11 @@ class VeloxListenerApi extends ListenerApi { override def onExecutorShutdown(): Unit = shutdown() - private def loadLibFromJar(load: JniLibLoader): Unit = { -val system = "cat /etc/os-release".!! -val systemNamePattern = "^NAME=\"?(.*)\"?".r -val systemVersionPattern = "^VERSION=\"?(.*)\"?".r -val systemInfoLines = system.stripMargin.split("\n") -val systemNamePattern(systemName) = - systemInfoLines.find(_.startsWith("NAME=")).getOrElse("") -val systemVersionPattern(systemVersion) = - systemInfoLines.find(_.startsWith("VERSION=")).getOrElse("") -if (systemName.isEmpty || systemVersion.isEmpty) { - throw new GlutenException("Failed to get OS name and version info.") -} -val loader = if (systemName.contains("Ubuntu") && systemVersion.startsWith("20.04")) { + private def getLibraryLoaderForOS( + systemName: String, + systemVersion: String, + system: String): SharedLibraryLoader = { +if (systemName.contains("Ubuntu") && systemVersion.startsWith("20.04")) { new SharedLibraryLoaderUbuntu2004 } else if (systemName.contains("Ubuntu") && systemVersion.startsWith("22.04")) { new SharedLibraryLoaderUbuntu2204 @@ -100,11 +92,37 @@ class VeloxListenerApi extends ListenerApi { new SharedLibraryLoaderDebian12 } else { throw new GlutenException( -"Found unsupported OS! Currently, Gluten's Velox backend" + +s"Found unsupported OS($systemName, $systemVersion)! Currently, Gluten's Velox backend" + " only supports Ubuntu 20.04/22.04, CentOS 7/8, " + "Alibaba Cloud Linux 2/3 & Anolis 7/8, tencentos 3.2, RedHat 7/8, " + "Debian 11/12.") } + } + + private def loadLibFromJar(load: JniLibLoader, conf: SparkConf): Unit = { +val systemName = conf.getOption(GlutenConfig.GLUTEN_LOAD_LIB_OS) +val loader = if (systemName.isDefined) { + val systemVersion = conf.getOption(GlutenConfig.GLUTEN_LOAD_LIB_OS_VERSION) + if (systemVersion.isEmpty) { +throw new GlutenException( + s"${GlutenConfig.GLUTEN_LOAD_LIB_OS_VERSION} must be specified when specifies the " + +s"${GlutenConfig.GLUTEN_LOAD_LIB_OS}") + } + getLibraryLoaderForOS(systemName.get, systemVersion.get, "") +} else { + val system = "cat /etc/os-release".!! + val systemNamePattern = "^NAME=\"?(.*)\"?".r + val systemVersionPattern = "^VERSION=\"?(.*)\"?".r + val systemInfoLines = system.stripMargin.split("\n") + val systemNamePattern(systemName) = +systemInfoLines.find(_.startsWith("NAME=")).getOrElse("") + val systemVersionPattern(systemVersion) = +systemInfoLines.find(_.startsWith("VERSION=")).getOrElse("") + if (systemName.isEmpty || systemVersion.isEmpty) { +throw new GlutenException("Failed to get OS name and version info.") + } + getLibraryLoaderForOS(systemName, systemVersion, system) +
(incubator-gluten) branch main updated: [VL] Fix load and link libglog.so.1 in SharedLibraryLoaderCentos8 (#5271)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 7bdc68135 [VL] Fix load and link libglog.so.1 in SharedLibraryLoaderCentos8 (#5271) 7bdc68135 is described below commit 7bdc6813552c7deb9ecc1f2b4d68ecdc172c882d Author: Joey AuthorDate: Wed Apr 3 09:48:08 2024 +0800 [VL] Fix load and link libglog.so.1 in SharedLibraryLoaderCentos8 (#5271) --- .../main/scala/org/apache/gluten/utils/SharedLibraryLoaderCentos8.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backends-velox/src/main/scala/org/apache/gluten/utils/SharedLibraryLoaderCentos8.scala b/backends-velox/src/main/scala/org/apache/gluten/utils/SharedLibraryLoaderCentos8.scala index 433e4616f..c1d3bf2e2 100755 --- a/backends-velox/src/main/scala/org/apache/gluten/utils/SharedLibraryLoaderCentos8.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/utils/SharedLibraryLoaderCentos8.scala @@ -35,7 +35,7 @@ class SharedLibraryLoaderCentos8 extends SharedLibraryLoader { .loadAndCreateLink("libdouble-conversion.so.3", "libdouble-conversion.so", false) .loadAndCreateLink("libevent-2.1.so.6", "libevent-2.1.so", false) .loadAndCreateLink("libgflags.so.2.2", "libgflags.so", false) - .loadAndCreateLink("libglog.so.0", "libglog.so", false) + .loadAndCreateLink("libglog.so.1", "libglog.so", false) .loadAndCreateLink("libdwarf.so.1", "libdwarf.so", false) .loadAndCreateLink("libidn.so.11", "libidn.so", false) .loadAndCreateLink("libntlm.so.0", "libntlm.so", false) - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [GLUTEN-5189][VL] Correct boost lib path (#5190)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 1cda804b9 [GLUTEN-5189][VL] Correct boost lib path (#5190) 1cda804b9 is described below commit 1cda804b994ba650b4a9b16a2d812eefde969983 Author: Zhen Wang <643348...@qq.com> AuthorDate: Fri Mar 29 11:06:22 2024 +0800 [GLUTEN-5189][VL] Correct boost lib path (#5190) --- dev/package.sh | 16 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/dev/package.sh b/dev/package.sh index 09c9f76b2..c6051e0d3 100755 --- a/dev/package.sh +++ b/dev/package.sh @@ -18,13 +18,13 @@ mvn clean package -Pbackends-velox -Prss -Pspark-3.5 -DskipTests mkdir -p $THIRDPARTY_LIB function process_setup_ubuntu_2004 { - cp /usr/lib/${ARCH}-linux-gnu/{libroken.so.18,libasn1.so.8,libboost_context.so.1.84.0,libboost_regex.so.1.84.0,libcrypto.so.1.1,libnghttp2.so.14,libnettle.so.7,libhogweed.so.5,librtmp.so.1,libssh.so.4,libssl.so.1.1,liblber-2.4.so.2,libsasl2.so.2,libwind.so.0,libheimbase.so.1,libhcrypto.so.4,libhx509.so.5,libkrb5.so.26,libheimntlm.so.0,libgssapi.so.3,libldap_r-2.4.so.2,libcurl.so.4,libdouble-conversion.so.3,libevent-2.1.so.7,libgflags.so.2.2,libunwind.so.8,libglog.so.0,libidn.so.11,libn [...] - cp /usr/local/lib/{libprotobuf.so.32,libhdfs3.so.1} $THIRDPARTY_LIB/ + cp /usr/lib/${ARCH}-linux-gnu/{libroken.so.18,libasn1.so.8,libcrypto.so.1.1,libnghttp2.so.14,libnettle.so.7,libhogweed.so.5,librtmp.so.1,libssh.so.4,libssl.so.1.1,liblber-2.4.so.2,libsasl2.so.2,libwind.so.0,libheimbase.so.1,libhcrypto.so.4,libhx509.so.5,libkrb5.so.26,libheimntlm.so.0,libgssapi.so.3,libldap_r-2.4.so.2,libcurl.so.4,libdouble-conversion.so.3,libevent-2.1.so.7,libgflags.so.2.2,libunwind.so.8,libglog.so.0,libidn.so.11,libntlm.so.0,libgsasl.so.7,libicudata.so.66,libicuuc.so. [...] + cp /usr/local/lib/{libprotobuf.so.32,libhdfs3.so.1,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/ } function process_setup_ubuntu_2204 { - cp /usr/lib/${ARCH}-linux-gnu/{libre2.so.9,libboost_context.so.1.84.0,libboost_regex.so.1.84.0,libdouble-conversion.so.3,libidn.so.12,libglog.so.0,libgflags.so.2.2,libevent-2.1.so.7,libsnappy.so.1,libunwind.so.8,libcurl.so.4,libxml2.so.2,libgsasl.so.7,libicui18n.so.70,libicuuc.so.70,libnghttp2.so.14,libldap-2.5.so.0,liblber-2.5.so.0,libntlm.so.0,librtmp.so.1,libsasl2.so.2,libssh.so.4,libicudata.so.70,libthrift-0.16.0.so} $THIRDPARTY_LIB/ - cp /usr/local/lib/{libhdfs3.so.1,libprotobuf.so.32} $THIRDPARTY_LIB/ + cp /usr/lib/${ARCH}-linux-gnu/{libre2.so.9,libdouble-conversion.so.3,libidn.so.12,libglog.so.0,libgflags.so.2.2,libevent-2.1.so.7,libsnappy.so.1,libunwind.so.8,libcurl.so.4,libxml2.so.2,libgsasl.so.7,libicui18n.so.70,libicuuc.so.70,libnghttp2.so.14,libldap-2.5.so.0,liblber-2.5.so.0,libntlm.so.0,librtmp.so.1,libsasl2.so.2,libssh.so.4,libicudata.so.70,libthrift-0.16.0.so} $THIRDPARTY_LIB/ + cp /usr/local/lib/{libhdfs3.so.1,libprotobuf.so.32,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/ } function process_setup_centos_8 { @@ -39,13 +39,13 @@ function process_setup_centos_7 { } function process_setup_debian_11 { - cp /usr/lib/x86_64-linux-gnu/{libre2.so.9,libthrift-0.13.0.so,libboost_context.so.1.84.0,libboost_regex.so.1.84.0,libdouble-conversion.so.3,libevent-2.1.so.7,libgflags.so.2.2,libglog.so.0,libsnappy.so.1,libunwind.so.8,libcurl.so.4,libicui18n.so.67,libicuuc.so.67,libnghttp2.so.14,librtmp.so.1,libssh2.so.1,libpsl.so.5,libldap_r-2.4.so.2,liblber-2.4.so.2,libbrotlidec.so.1,libicudata.so.67,libsasl2.so.2,libbrotlicommon.so.1} $THIRDPARTY_LIB/ - cp /usr/local/lib/{libhdfs3.so.1,libprotobuf.so.32} $THIRDPARTY_LIB/ + cp /usr/lib/x86_64-linux-gnu/{libre2.so.9,libthrift-0.13.0.so,libdouble-conversion.so.3,libevent-2.1.so.7,libgflags.so.2.2,libglog.so.0,libsnappy.so.1,libunwind.so.8,libcurl.so.4,libicui18n.so.67,libicuuc.so.67,libnghttp2.so.14,librtmp.so.1,libssh2.so.1,libpsl.so.5,libldap_r-2.4.so.2,liblber-2.4.so.2,libbrotlidec.so.1,libicudata.so.67,libsasl2.so.2,libbrotlicommon.so.1} $THIRDPARTY_LIB/ + cp /usr/local/lib/{libhdfs3.so.1,libprotobuf.so.32,libboost_context.so.1.84.0,libboost_regex.so.1.84.0} $THIRDPARTY_LIB/ } function process_setup_debian_12 { - cp /usr/lib/x86_64-linux-gnu/{libthrift-0.17.0.so,libboost_context.so.1.84.0,libboost_regex.so.1.84.0,libdouble-conversion.so.3,libevent-2.1.so.7,libgflags.so.2.2,libglog.so.1,libsnappy.so.1,libunwind.so.8,libcurl.so.4,libicui18n.so.72,libicuuc.so.72,libnghttp2.so.14,librtmp.so.1,libssh2.so.1,libpsl.so.5,libldap-2.5.so.0,liblber-2.5.so.0,libbrotlidec.so.1,libicudata.so.72,libsasl2.so.2,libbrotlicommon.so.1,libcrypto.so.3,libssl.so.3,libgssapi_krb5.so.2,libkrb5.so.3,libk5crypto.so.3,lib [...] - cp /usr/local/lib/{libprotobuf.so.32,libhdfs3.so.1} $THIRDPARTY_LIB/ + cp /usr/lib/
(incubator-gluten) branch main updated: [VL][CI] Add back upload golden files (#5173)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 12991ca4f [VL][CI] Add back upload golden files (#5173) 12991ca4f is described below commit 12991ca4f5d487289836812fa393a5c4052968e1 Author: Xiduo You AuthorDate: Thu Mar 28 18:21:23 2024 +0800 [VL][CI] Add back upload golden files (#5173) --- .github/workflows/velox_docker.yml | 21 - 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/.github/workflows/velox_docker.yml b/.github/workflows/velox_docker.yml index f9c8ee5d3..128b5ff0f 100644 --- a/.github/workflows/velox_docker.yml +++ b/.github/workflows/velox_docker.yml @@ -386,6 +386,13 @@ jobs: export PATH=${PATH}:${MAVEN_HOME}/bin mvn -ntp clean install -Pspark-3.2 -Pspark-ut -Pbackends-velox -Prss -DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark32/spark_home/" -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,io.glutenproject.tags.UDFTest,io.glutenproject.tags.SkipTestTags && \ mvn -ntp test -Pspark-3.2 -Pbackends-velox -DtagsToExclude=None -DtagsToInclude=io.glutenproject.tags.UDFTest + - name: Upload golden files +if: failure() +uses: actions/upload-artifact@v4 +with: + name: golden-files-spark32 + path: /tmp/tpch-approved-plan/** + run-spark-test-spark32-slow: runs-on: ubuntu-20.04 @@ -466,6 +473,13 @@ jobs: export PATH=${PATH}:${MAVEN_HOME}/bin mvn -ntp clean install -Pspark-3.3 -Pbackends-velox -Prss -Pspark-ut -DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark33/spark_home/" -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,io.glutenproject.tags.UDFTest,io.glutenproject.tags.SkipTestTags && \ mvn -ntp test -Pspark-3.3 -Pbackends-velox -DtagsToExclude=None -DtagsToInclude=io.glutenproject.tags.UDFTest + - name: Upload golden files +if: failure() +uses: actions/upload-artifact@v4 +with: + name: golden-files-spark33 + path: /tmp/tpch-approved-plan/** + run-spark-test-spark33-slow: runs-on: ubuntu-20.04 @@ -504,7 +518,6 @@ jobs: mvn -ntp clean install -Pspark-3.3 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark33/spark_home/" -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest - run-spark-test-spark34: runs-on: ubuntu-20.04 container: ghcr.io/facebookincubator/velox-dev:circleci-avx @@ -548,6 +561,12 @@ jobs: export PATH=${PATH}:${MAVEN_HOME}/bin mvn -ntp clean install -Pspark-3.4 -Pbackends-velox -Prss -Pspark-ut -DargLine="-Dspark.test.home=$GITHUB_WORKSPACE//shims/spark34/spark_home/" -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,io.glutenproject.tags.UDFTest,io.glutenproject.tags.SkipTestTags && \ mvn -ntp test -Pspark-3.4 -Pbackends-velox -DtagsToExclude=None -DtagsToInclude=io.glutenproject.tags.UDFTest + - name: Upload golden files +if: failure() +uses: actions/upload-artifact@v4 +with: + name: golden-files-spark34 + path: /tmp/tpch-approved-plan/** run-spark-test-spark34-slow: - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [VL] Fix spark34 group-by.sql(.out) in GlutenSQLQueryTestSuite (#5162)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 657966e07 [VL] Fix spark34 group-by.sql(.out) in GlutenSQLQueryTestSuite (#5162) 657966e07 is described below commit 657966e0720ede25a5dcd5b3f8cb6cd719db6eab Author: Joey AuthorDate: Thu Mar 28 14:05:30 2024 +0800 [VL] Fix spark34 group-by.sql(.out) in GlutenSQLQueryTestSuite (#5162) --- .../utils/velox/VeloxSQLQueryTestSettings.scala| 2 +- .../test/resources/sql-tests/inputs/group-by.sql | 65 ++- .../resources/sql-tests/results/group-by.sql.out | 192 + .../utils/velox/VeloxSQLQueryTestSettings.scala| 2 +- 4 files changed, 61 insertions(+), 200 deletions(-) diff --git a/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala b/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala index 11f77b9e8..26428b3a4 100644 --- a/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala +++ b/gluten-ut/spark33/src/test/scala/io/glutenproject/utils/velox/VeloxSQLQueryTestSettings.scala @@ -234,7 +234,7 @@ object VeloxSQLQueryTestSettings extends SQLQueryTestSettings { private val OVERWRITE_SQL_QUERY_LIST: Set[String] = Set( // Velox corr has better computation logic but it fails Spark's precision check. // Remove -- SPARK-24369 multiple distinct aggregations having the same argument set, -// -- SPARK-37613: Support ANSI Aggregate Function: regr_r2 +//-- SPARK-37613: Support ANSI Aggregate Function: regr_r2 "group-by.sql", // Remove -- SPARK-24369 multiple distinct aggregations having the same argument set "udf/udf-group-by.sql" diff --git a/gluten-ut/spark34/src/test/resources/sql-tests/inputs/group-by.sql b/gluten-ut/spark34/src/test/resources/sql-tests/inputs/group-by.sql index 331cd9440..b618ad1d5 100644 --- a/gluten-ut/spark34/src/test/resources/sql-tests/inputs/group-by.sql +++ b/gluten-ut/spark34/src/test/resources/sql-tests/inputs/group-by.sql @@ -7,12 +7,6 @@ CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null) AS testData(a, b); -CREATE OR REPLACE TEMPORARY VIEW testRegression AS SELECT * FROM VALUES -(1, 10, null), (2, 10, 11), (2, 20, 22), (2, 25, null), (2, 30, 35) -AS testRegression(k, y, x); -CREATE OR REPLACE TEMPORARY VIEW aggr AS SELECT * FROM VALUES -(0, 0), (0, 10), (0, 20), (0, 30), (0, 40), (1, 10), (1, 20), (2, 10), (2, 20), (2, 25), (2, 30), (3, 60), (4, null) -AS aggr(k, v); -- Aggregate with empty GroupBy expressions. SELECT a, COUNT(b) FROM testData; @@ -40,6 +34,9 @@ SELECT a + b, COUNT(b) FROM testData GROUP BY a + b; SELECT a + 2, COUNT(b) FROM testData GROUP BY a + 1; SELECT a + 1 + 1, COUNT(b) FROM testData GROUP BY a + 1; +-- struct() in group by +SELECT count(1) FROM testData GROUP BY struct(a + 0.1 AS aa); + -- Aggregate with nulls. SELECT SKEWNESS(a), KURTOSIS(a), MIN(a), MAX(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), COUNT(a) FROM testData; @@ -233,17 +230,6 @@ FROM VALUES (CAST(NULL AS DOUBLE)), (CAST(NULL AS DOUBLE)), (CAST(NULL AS DOUBLE SELECT histogram_numeric(col, 3) FROM VALUES (CAST(NULL AS INT)), (CAST(NULL AS INT)), (CAST(NULL AS INT)) AS tab(col); - --- SPARK-37613: Support ANSI Aggregate Function: regr_count -SELECT regr_count(y, x) FROM testRegression; -SELECT regr_count(y, x) FROM testRegression WHERE x IS NOT NULL; -SELECT k, count(*), regr_count(y, x) FROM testRegression GROUP BY k; -SELECT k, count(*) FILTER (WHERE x IS NOT NULL), regr_count(y, x) FROM testRegression GROUP BY k; - --- SPARK-37613: Support ANSI Aggregate Function: regr_r2 -SELECT regr_r2(y, x) FROM testRegression; -SELECT regr_r2(y, x) FROM testRegression WHERE x IS NOT NULL; - -- SPARK-27974: Support ANSI Aggregate Function: array_agg SELECT collect_list(col), @@ -258,34 +244,19 @@ FROM VALUES (1,4),(2,3),(1,4),(2,4) AS v(a,b) GROUP BY a; --- SPARK-37614: Support ANSI Aggregate Function: regr_avgx & regr_avgy -SELECT regr_avgx(y, x), regr_avgy(y, x) FROM testRegression; -SELECT regr_avgx(y, x), regr_avgy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL; -SELECT k, avg(x), avg(y), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression GROUP BY k; -SELECT k, avg(x) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), avg(y) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression GROUP BY k; --- SPARK-37676: Support ANSI Aggregation Function: percentile_cont -SELECT - percentile_cont(0.25) WITHIN GROUP (ORDER BY v), - percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) -FROM aggr; -SELECT - k, - percentile
(incubator-gluten) branch main updated: [GLUTEN-4964][CORE]Fallback complex data type in parquet write for Spark32 & Spark33 (#5107)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 445616109 [GLUTEN-4964][CORE]Fallback complex data type in parquet write for Spark32 & Spark33 (#5107) 445616109 is described below commit 44561610991f5b4c258a4c65e97e01cb13c2aa14 Author: JiaKe AuthorDate: Thu Mar 28 08:54:47 2024 +0800 [GLUTEN-4964][CORE]Fallback complex data type in parquet write for Spark32 & Spark33 (#5107) --- .../io/glutenproject/backendsapi/velox/VeloxBackend.scala | 11 +++ .../spark/sql/execution/VeloxParquetWriteSuite.scala | 14 ++ .../io/glutenproject/backendsapi/BackendSettingsApi.scala | 1 + .../execution/datasources/GlutenWriterColumnarRules.scala | 3 ++- 4 files changed, 28 insertions(+), 1 deletion(-) diff --git a/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/VeloxBackend.scala b/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/VeloxBackend.scala index 3293abe3e..9d252149d 100644 --- a/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/VeloxBackend.scala +++ b/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/VeloxBackend.scala @@ -255,6 +255,17 @@ object BackendSettings extends BackendSettingsApi { } } + override def supportNativeWrite(fields: Array[StructField]): Boolean = { +fields.map { + field => +field.dataType match { + case _: TimestampType | _: StructType | _: ArrayType | _: MapType => return false + case _ => +} +} +true + } + override def supportNativeMetadataColumns(): Boolean = true override def supportExpandExec(): Boolean = true diff --git a/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetWriteSuite.scala b/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetWriteSuite.scala index dc30f0559..6f938b7b9 100644 --- a/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetWriteSuite.scala +++ b/backends-velox/src/test/scala/org/apache/spark/sql/execution/VeloxParquetWriteSuite.scala @@ -37,6 +37,20 @@ class VeloxParquetWriteSuite extends VeloxWholeStageTransformerSuite { super.sparkConf.set("spark.gluten.sql.native.writer.enabled", "true") } + test("test Array(Struct) fallback") { +withTempPath { + f => +val path = f.getCanonicalPath +val testAppender = new LogAppender("native write tracker") +withLogAppender(testAppender) { + spark.sql("select array(struct(1), null) as var1").write.mode("overwrite").save(path) +} +assert( + testAppender.loggingEvents.exists( +_.getMessage.toString.contains("Use Gluten parquet write for hive")) == false) +} + } + test("test write parquet with compression codec") { // compression codec details see `VeloxParquetDatasource.cc` Seq("snappy", "gzip", "zstd", "lz4", "none", "uncompressed") diff --git a/gluten-core/src/main/scala/io/glutenproject/backendsapi/BackendSettingsApi.scala b/gluten-core/src/main/scala/io/glutenproject/backendsapi/BackendSettingsApi.scala index 25d71f0fc..950eed2eb 100644 --- a/gluten-core/src/main/scala/io/glutenproject/backendsapi/BackendSettingsApi.scala +++ b/gluten-core/src/main/scala/io/glutenproject/backendsapi/BackendSettingsApi.scala @@ -41,6 +41,7 @@ trait BackendSettingsApi { fields: Array[StructField], bucketSpec: Option[BucketSpec], options: Map[String, String]): ValidationResult = ValidationResult.ok + def supportNativeWrite(fields: Array[StructField]): Boolean = true def supportNativeMetadataColumns(): Boolean = false def supportExpandExec(): Boolean = false def supportSortExec(): Boolean = false diff --git a/gluten-core/src/main/scala/org/apache/spark/sql/execution/datasources/GlutenWriterColumnarRules.scala b/gluten-core/src/main/scala/org/apache/spark/sql/execution/datasources/GlutenWriterColumnarRules.scala index d2c010cc4..80ef67ad6 100644 --- a/gluten-core/src/main/scala/org/apache/spark/sql/execution/datasources/GlutenWriterColumnarRules.scala +++ b/gluten-core/src/main/scala/org/apache/spark/sql/execution/datasources/GlutenWriterColumnarRules.scala @@ -156,7 +156,8 @@ object GlutenWriterColumnarRules { if write.getClass.getName == NOOP_WRITE && BackendsApiManager.getSettings.enableNativeWriteFiles() => injectFakeRowAdaptor(rc, rc.child) - case rc @ DataWritingCommandExec(cmd, child) => + case rc @ DataWritingCommandExec(cmd, child) + if BackendsApiManager.getSettings.supportNat
(incubator-gluten) branch main updated: [CORE] Port "SPARK-39983 Should not cache unserialized broadcast relations on the driver" (#5149)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new f52885898 [CORE] Port "SPARK-39983 Should not cache unserialized broadcast relations on the driver" (#5149) f52885898 is described below commit f52885898c20e99684503e37fe1db97a13244e11 Author: Xiduo You AuthorDate: Thu Mar 28 08:55:10 2024 +0800 [CORE] Port "SPARK-39983 Should not cache unserialized broadcast relations on the driver" (#5149) --- .../execution/ColumnarBroadcastExchangeExec.scala | 4 +- .../apache/spark/sql/GlutenSQLTestsBaseTrait.scala | 70 -- .../utils/velox/VeloxTestSettings.scala| 3 +- .../execution/GlutenBroadcastExchangeSuite.scala | 39 +++- .../io/glutenproject/sql/shims/SparkShims.scala| 10 .../sql/shims/spark34/Spark34Shims.scala | 9 ++- .../scala/org/apache/spark/SparkContextUtils.scala | 12 +++- .../sql/shims/spark35/Spark35Shims.scala | 9 ++- .../scala/org/apache/spark/SparkContextUtils.scala | 12 +++- 9 files changed, 124 insertions(+), 44 deletions(-) diff --git a/gluten-core/src/main/scala/org/apache/spark/sql/execution/ColumnarBroadcastExchangeExec.scala b/gluten-core/src/main/scala/org/apache/spark/sql/execution/ColumnarBroadcastExchangeExec.scala index 645bce76d..b90ff4967 100644 --- a/gluten-core/src/main/scala/org/apache/spark/sql/execution/ColumnarBroadcastExchangeExec.scala +++ b/gluten-core/src/main/scala/org/apache/spark/sql/execution/ColumnarBroadcastExchangeExec.scala @@ -80,7 +80,9 @@ case class ColumnarBroadcastExchangeExec(mode: BroadcastMode, child: SparkPlan) val broadcasted = GlutenTimeMetric.millis(longMetric("broadcastTime")) { _ => // Broadcast the relation -sparkContext.broadcast(relation.asInstanceOf[Any]) +SparkShimLoader.getSparkShims.broadcastInternal( + sparkContext, + relation.asInstanceOf[Any]) } // Update driver metrics diff --git a/gluten-ut/common/src/test/scala/org/apache/spark/sql/GlutenSQLTestsBaseTrait.scala b/gluten-ut/common/src/test/scala/org/apache/spark/sql/GlutenSQLTestsBaseTrait.scala index 68fa08879..5cd9f3e9c 100644 --- a/gluten-ut/common/src/test/scala/org/apache/spark/sql/GlutenSQLTestsBaseTrait.scala +++ b/gluten-ut/common/src/test/scala/org/apache/spark/sql/GlutenSQLTestsBaseTrait.scala @@ -45,39 +45,7 @@ trait GlutenSQLTestsBaseTrait extends SharedSparkSession with GlutenTestsBaseTra } override def sparkConf: SparkConf = { -// Native SQL configs -val conf = super.sparkConf - .setAppName("Gluten-UT") - .set("spark.driver.memory", "1G") - .set("spark.sql.adaptive.enabled", "true") - .set("spark.sql.shuffle.partitions", "1") - .set("spark.sql.files.maxPartitionBytes", "134217728") - .set("spark.memory.offHeap.enabled", "true") - .set("spark.memory.offHeap.size", "1024MB") - .set("spark.plugins", "io.glutenproject.GlutenPlugin") - .set("spark.shuffle.manager", "org.apache.spark.shuffle.sort.ColumnarShuffleManager") - .set("spark.sql.warehouse.dir", warehouse) - .set("spark.ui.enabled", "false") - .set("spark.gluten.ui.enabled", "false") -// Avoid static evaluation by spark catalyst. But there are some UT issues -// coming from spark, e.g., expecting SparkException is thrown, but the wrapped -// exception is thrown. -// .set("spark.sql.optimizer.excludedRules", ConstantFolding.ruleName + "," + -// NullPropagation.ruleName) - -if (BackendTestUtils.isCHBackendLoaded()) { - conf -.set("spark.io.compression.codec", "LZ4") -.set("spark.gluten.sql.columnar.backend.ch.worker.id", "1") -.set("spark.gluten.sql.enable.native.validation", "false") -.set(GlutenConfig.GLUTEN_LIB_PATH, SystemParameters.getClickHouseLibPath) -.set("spark.sql.files.openCostInBytes", "134217728") -.set("spark.unsafe.exceptionOnMemoryLeak", "true") -} else { - conf.set("spark.unsafe.exceptionOnMemoryLeak", "true") -} - -conf +GlutenSQLTestsBaseTrait.nativeSparkConf(super.sparkConf, warehouse) } /** @@ -126,3 +94,39 @@ trait GlutenSQLTestsBaseTrait extends SharedSparkSession with GlutenTestsBaseTra } } } + +object GlutenSQLTestsBaseTrait { + def nativeSparkConf(origin: SparkConf, warehouse: String): SparkConf = { +
(incubator-gluten) branch main updated: [GLUTEN-5142][CELEBORN] Remove Incubating of Celeborn from reference (#5143)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 3bc5387c0 [GLUTEN-5142][CELEBORN] Remove Incubating of Celeborn from reference (#5143) 3bc5387c0 is described below commit 3bc5387c0e50f3e012f6ffad55dabbb7c52229c9 Author: Nicholas Jiang AuthorDate: Wed Mar 27 13:44:40 2024 +0800 [GLUTEN-5142][CELEBORN] Remove Incubating of Celeborn from reference (#5143) --- docs/get-started/ClickHouse.md | 8 docs/get-started/Velox.md | 8 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/get-started/ClickHouse.md b/docs/get-started/ClickHouse.md index ad7183b90..4167af2ee 100644 --- a/docs/get-started/ClickHouse.md +++ b/docs/get-started/ClickHouse.md @@ -629,14 +629,14 @@ public read-only account:gluten/hN2xX3uQ4m ### Celeborn support -Gluten with clickhouse backend has not yet supportted [Celeborn](https://github.com/apache/incubator-celeborn) natively as remote shuffle service using columar shuffle. However, you can still use Celeborn with row shuffle, which means a ColumarBatch will be converted to a row during shuffle. +Gluten with clickhouse backend has not yet supportted [Celeborn](https://github.com/apache/celeborn) natively as remote shuffle service using columar shuffle. However, you can still use Celeborn with row shuffle, which means a ColumarBatch will be converted to a row during shuffle. Below introduction is used to enable this feature: -First refer to this URL(https://github.com/apache/incubator-celeborn) to setup a celeborn cluster. +First refer to this URL(https://github.com/apache/celeborn) to setup a celeborn cluster. Then add the Spark Celeborn Client packages to your Spark application's classpath(usually add them into `$SPARK_HOME/jars`). -- Celeborn: celeborn-client-spark-3-shaded_2.12-0.3.0-incubating.jar +- Celeborn: celeborn-client-spark-3-shaded_2.12-[celebornVersion].jar Currently to use Celeborn following configurations are required in `spark-defaults.conf` @@ -666,7 +666,7 @@ spark.sql.adaptive.localShuffleReader.enabled false spark.celeborn.storage.hdfs.dir hdfs:///celeborn # If you want to use dynamic resource allocation, -# please refer to this URL (https://github.com/apache/incubator-celeborn/tree/main/assets/spark-patch) to apply the patch into your own Spark. +# please refer to this URL (https://github.com/apache/celeborn/tree/main/assets/spark-patch) to apply the patch into your own Spark. spark.dynamicAllocation.enabled false ``` diff --git a/docs/get-started/Velox.md b/docs/get-started/Velox.md index 7c3d77abc..1fabfc0fe 100644 --- a/docs/get-started/Velox.md +++ b/docs/get-started/Velox.md @@ -203,11 +203,11 @@ Currently there are several ways to asscess S3 in Spark. Please refer [Velox S3] ## Celeborn support -Gluten with velox backend supports [Celeborn](https://github.com/apache/incubator-celeborn) as remote shuffle service. Currently, the supported Celeborn versions are `0.3.x` and `0.4.0`. +Gluten with velox backend supports [Celeborn](https://github.com/apache/celeborn) as remote shuffle service. Currently, the supported Celeborn versions are `0.3.x` and `0.4.0`. Below introduction is used to enable this feature -First refer to this URL(https://github.com/apache/incubator-celeborn) to setup a celeborn cluster. +First refer to this URL(https://github.com/apache/celeborn) to setup a celeborn cluster. When compiling the Gluten Java module, it's required to enable `rss` profile, as follows: @@ -217,7 +217,7 @@ mvn clean package -Pbackends-velox -Pspark-3.3 -Prss -DskipTests Then add the Gluten and Spark Celeborn Client packages to your Spark application's classpath(usually add them into `$SPARK_HOME/jars`). -- Celeborn: celeborn-client-spark-3-shaded_2.12-0.3.0-incubating.jar +- Celeborn: celeborn-client-spark-3-shaded_2.12-[celebornVersion].jar - Gluten: gluten-velox-bundle-spark3.x_2.12-xx_xx_xx-SNAPSHOT.jar, gluten-thirdparty-lib-xx-xx.jar Currently to use Gluten following configurations are required in `spark-defaults.conf` @@ -248,7 +248,7 @@ spark.sql.adaptive.localShuffleReader.enabled false spark.celeborn.storage.hdfs.dir hdfs:///celeborn # If you want to use dynamic resource allocation, -# please refer to this URL (https://github.com/apache/incubator-celeborn/tree/main/assets/spark-patch) to apply the patch into your own Spark. +# please refer to this URL (https://github.com/apache/celeborn/tree/main/assets/spark-patch) to apply the patch into your own Spark. spark.dynamicAllocation.enabled false ``` - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [CORE] Move BackendBuildInfo case class from GlutenPlugin to Backend class file (#5129)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 6dc7885f6 [CORE] Move BackendBuildInfo case class from GlutenPlugin to Backend class file (#5129) 6dc7885f6 is described below commit 6dc7885f6c54f4ea0f773e920fb455e09298b3b7 Author: Zhen Wang <643348...@qq.com> AuthorDate: Wed Mar 27 09:15:13 2024 +0800 [CORE] Move BackendBuildInfo case class from GlutenPlugin to Backend class file (#5129) --- .../io/glutenproject/backendsapi/clickhouse/CHBackend.scala| 6 +++--- .../io/glutenproject/backendsapi/velox/VeloxBackend.scala | 6 +++--- gluten-core/src/main/scala/io/glutenproject/GlutenPlugin.scala | 6 -- .../src/main/scala/io/glutenproject/backendsapi/Backend.scala | 10 +++--- .../io/glutenproject/backendsapi/BackendsApiManager.scala | 4 +--- 5 files changed, 14 insertions(+), 18 deletions(-) diff --git a/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHBackend.scala b/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHBackend.scala index fbcb804a3..a7c5c9980 100644 --- a/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHBackend.scala +++ b/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHBackend.scala @@ -16,7 +16,7 @@ */ package io.glutenproject.backendsapi.clickhouse -import io.glutenproject.{CH_BRANCH, CH_COMMIT, GlutenConfig, GlutenPlugin} +import io.glutenproject.{CH_BRANCH, CH_COMMIT, GlutenConfig} import io.glutenproject.backendsapi._ import io.glutenproject.expression.WindowFunctionsBuilder import io.glutenproject.extension.ValidationResult @@ -41,8 +41,8 @@ import scala.util.control.Breaks.{break, breakable} class CHBackend extends Backend { override def name(): String = CHBackend.BACKEND_NAME - override def buildInfo(): GlutenPlugin.BackendBuildInfo = -GlutenPlugin.BackendBuildInfo("ClickHouse", CH_BRANCH, CH_COMMIT, "UNKNOWN") + override def buildInfo(): BackendBuildInfo = +BackendBuildInfo("ClickHouse", CH_BRANCH, CH_COMMIT, "UNKNOWN") override def iteratorApi(): IteratorApi = new CHIteratorApi override def sparkPlanExecApi(): SparkPlanExecApi = new CHSparkPlanExecApi override def transformerApi(): TransformerApi = new CHTransformerApi diff --git a/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/VeloxBackend.scala b/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/VeloxBackend.scala index 0ff2bd0d7..3293abe3e 100644 --- a/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/VeloxBackend.scala +++ b/backends-velox/src/main/scala/io/glutenproject/backendsapi/velox/VeloxBackend.scala @@ -16,7 +16,7 @@ */ package io.glutenproject.backendsapi.velox -import io.glutenproject.{GlutenConfig, GlutenPlugin, VELOX_BRANCH, VELOX_REVISION, VELOX_REVISION_TIME} +import io.glutenproject.{GlutenConfig, VELOX_BRANCH, VELOX_REVISION, VELOX_REVISION_TIME} import io.glutenproject.backendsapi._ import io.glutenproject.exception.GlutenNotSupportException import io.glutenproject.execution.WriteFilesExecTransformer @@ -44,8 +44,8 @@ import scala.util.control.Breaks.breakable class VeloxBackend extends Backend { override def name(): String = VeloxBackend.BACKEND_NAME - override def buildInfo(): GlutenPlugin.BackendBuildInfo = -GlutenPlugin.BackendBuildInfo("Velox", VELOX_BRANCH, VELOX_REVISION, VELOX_REVISION_TIME) + override def buildInfo(): BackendBuildInfo = +BackendBuildInfo("Velox", VELOX_BRANCH, VELOX_REVISION, VELOX_REVISION_TIME) override def iteratorApi(): IteratorApi = new IteratorApiImpl override def sparkPlanExecApi(): SparkPlanExecApi = new SparkPlanExecApiImpl override def transformerApi(): TransformerApi = new TransformerApiImpl diff --git a/gluten-core/src/main/scala/io/glutenproject/GlutenPlugin.scala b/gluten-core/src/main/scala/io/glutenproject/GlutenPlugin.scala index c54b78da9..5fa3083c2 100644 --- a/gluten-core/src/main/scala/io/glutenproject/GlutenPlugin.scala +++ b/gluten-core/src/main/scala/io/glutenproject/GlutenPlugin.scala @@ -278,10 +278,4 @@ private[glutenproject] object GlutenPlugin { implicit def sparkConfImplicit(conf: SparkConf): SparkConfImplicits = { new SparkConfImplicits(conf) } - - case class BackendBuildInfo( - backend: String, - backendBranch: String, - backendRevision: String, - backendRevisionTime: String) } diff --git a/gluten-core/src/main/scala/io/glutenproject/backendsapi/Backend.scala b/gluten-core/src/main/scala/io/glutenproject/backendsapi/Backend.scala index 438194a36..09799cdb1 100644 --- a/gluten-core/src/main/scala/io/glutenproject/backendsapi/Backend.scala +++ b/gluten-core/src/main
(incubator-gluten) branch main updated: [GLUTEN-5133]Modify the prompt information for TakeOrderedAndProjectExecTransformer (#5134)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 5b8b96e25 [GLUTEN-5133]Modify the prompt information for TakeOrderedAndProjectExecTransformer (#5134) 5b8b96e25 is described below commit 5b8b96e2541525544ba1e80c957a2bd8c5c1e95b Author: guixiaowen <58287738+guixiao...@users.noreply.github.com> AuthorDate: Wed Mar 27 09:14:48 2024 +0800 [GLUTEN-5133]Modify the prompt information for TakeOrderedAndProjectExecTransformer (#5134) --- .../glutenproject/execution/TakeOrderedAndProjectExecTransformer.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gluten-core/src/main/scala/io/glutenproject/execution/TakeOrderedAndProjectExecTransformer.scala b/gluten-core/src/main/scala/io/glutenproject/execution/TakeOrderedAndProjectExecTransformer.scala index 0f0137b5d..f7b1fe2f4 100644 --- a/gluten-core/src/main/scala/io/glutenproject/execution/TakeOrderedAndProjectExecTransformer.scala +++ b/gluten-core/src/main/scala/io/glutenproject/execution/TakeOrderedAndProjectExecTransformer.scala @@ -49,7 +49,7 @@ case class TakeOrderedAndProjectExecTransformer( val orderByString = truncatedString(sortOrder, "[", ",", "]", maxFields) val outputString = truncatedString(output, "[", ",", "]", maxFields) -s"TakeOrderedAndProjectExecTransform(limit=$limit, " + +s"TakeOrderedAndProjectExecTransformer (limit=$limit, " + s"orderBy=$orderByString, output=$outputString)" } - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [MINOR] Remove redundant string format (#5126)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new dba4bcd3c [MINOR] Remove redundant string format (#5126) dba4bcd3c is described below commit dba4bcd3c4587f91296cd2387dc089c8c7f4b970 Author: Zhen Wang <643348...@qq.com> AuthorDate: Tue Mar 26 19:02:41 2024 +0800 [MINOR] Remove redundant string format (#5126) --- gluten-core/src/main/scala/io/glutenproject/GlutenPlugin.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gluten-core/src/main/scala/io/glutenproject/GlutenPlugin.scala b/gluten-core/src/main/scala/io/glutenproject/GlutenPlugin.scala index 670c9411d..c54b78da9 100644 --- a/gluten-core/src/main/scala/io/glutenproject/GlutenPlugin.scala +++ b/gluten-core/src/main/scala/io/glutenproject/GlutenPlugin.scala @@ -141,7 +141,7 @@ private[glutenproject] class GlutenDriverPlugin extends DriverPlugin with Loggin } else { s"$GLUTEN_SESSION_EXTENSION_NAME" } -conf.set(SPARK_SESSION_EXTS_KEY, String.format("%s", extensions)) +conf.set(SPARK_SESSION_EXTS_KEY, extensions) // off-heap bytes if (!conf.contains(GlutenConfig.GLUTEN_OFFHEAP_SIZE_KEY)) { - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [CORE] Pullout pre-project for ExpandExec (#5066)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 3ee108f97 [CORE] Pullout pre-project for ExpandExec (#5066) 3ee108f97 is described below commit 3ee108f97d103364facfde371bdd2af2d5013d7e Author: Joey AuthorDate: Thu Mar 21 19:13:26 2024 +0800 [CORE] Pullout pre-project for ExpandExec (#5066) --- .../execution/ExpandExecTransformer.scala | 126 - .../extension/columnar/PullOutPreProject.scala | 12 +- .../columnar/RewriteSparkPlanRulesManager.scala| 1 + 3 files changed, 35 insertions(+), 104 deletions(-) diff --git a/gluten-core/src/main/scala/io/glutenproject/execution/ExpandExecTransformer.scala b/gluten-core/src/main/scala/io/glutenproject/execution/ExpandExecTransformer.scala index 4d547f771..daa195b68 100644 --- a/gluten-core/src/main/scala/io/glutenproject/execution/ExpandExecTransformer.scala +++ b/gluten-core/src/main/scala/io/glutenproject/execution/ExpandExecTransformer.scala @@ -17,12 +17,12 @@ package io.glutenproject.execution import io.glutenproject.backendsapi.BackendsApiManager -import io.glutenproject.expression.{ConverterUtils, ExpressionConverter, LiteralTransformer} +import io.glutenproject.expression.{ConverterUtils, ExpressionConverter} import io.glutenproject.extension.ValidationResult import io.glutenproject.metrics.MetricsUpdater import io.glutenproject.substrait.`type`.{TypeBuilder, TypeNode} import io.glutenproject.substrait.SubstraitContext -import io.glutenproject.substrait.expression.{ExpressionBuilder, ExpressionNode} +import io.glutenproject.substrait.expression.ExpressionNode import io.glutenproject.substrait.extensions.ExtensionBuilder import io.glutenproject.substrait.rel.{RelBuilder, RelNode} @@ -32,9 +32,6 @@ import org.apache.spark.sql.execution._ import java.util.{ArrayList => JArrayList, List => JList} -import scala.collection.JavaConverters._ -import scala.collection.mutable.ArrayBuffer - case class ExpandExecTransformer( projections: Seq[Seq[Expression]], output: Seq[Attribute], @@ -66,110 +63,33 @@ case class ExpandExecTransformer( input: RelNode, validation: Boolean): RelNode = { val args = context.registeredFunction -def needsPreProjection(projections: Seq[Seq[Expression]]): Boolean = { - projections -.exists(set => set.exists(p => !p.isInstanceOf[Attribute] && !p.isInstanceOf[Literal])) -} -if (needsPreProjection(projections)) { - // if there is not literal and attribute expression in project sets, add a project op - // to calculate them before expand op. - val preExprs = ArrayBuffer.empty[Expression] - val selectionMaps = ArrayBuffer.empty[Seq[Int]] - var preExprIndex = 0 - for (i <- projections.indices) { -val selections = ArrayBuffer.empty[Int] -for (j <- projections(i).indices) { - val proj = projections(i)(j) - if (!proj.isInstanceOf[Literal]) { -val exprIdx = preExprs.indexWhere(expr => expr.semanticEquals(proj)) -if (exprIdx != -1) { - selections += exprIdx -} else { - preExprs += proj - selections += preExprIndex - preExprIndex = preExprIndex + 1 -} - } else { -selections += -1 - } -} -selectionMaps += selections - } - // make project - val preExprNodes = preExprs -.map( - ExpressionConverter -.replaceWithExpressionTransformer(_, originalInputAttributes) -.doTransform(args)) -.asJava - - val emitStartIndex = originalInputAttributes.size - val inputRel = if (!validation) { -RelBuilder.makeProjectRel(input, preExprNodes, context, operatorId, emitStartIndex) - } else { -// Use a extension node to send the input types through Substrait plan for a validation. -val inputTypeNodeList = new java.util.ArrayList[TypeNode]() -for (attr <- originalInputAttributes) { - inputTypeNodeList.add(ConverterUtils.getTypeNode(attr.dataType, attr.nullable)) -} -val extensionNode = ExtensionBuilder.makeAdvancedExtension( - BackendsApiManager.getTransformerApiInstance.packPBMessage( -TypeBuilder.makeStruct(false, inputTypeNodeList).toProtobuf)) -RelBuilder.makeProjectRel( - input, - preExprNodes, - extensionNode, - context, - operatorId, - emitStartIndex) - } - - // make expand - val projectSetExprNodes = new JArrayList[JList[ExpressionNode]]() - for (i <- projections.indices) { +val projectSetExprNodes = new JArrayList[JList[ExpressionNode]]() +proj
(incubator-gluten) branch main updated: [GLUTEN-4899][VL] Fix 3.5 build issue with -Pspark-ut (#4975)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 6514392ad [GLUTEN-4899][VL] Fix 3.5 build issue with -Pspark-ut (#4975) 6514392ad is described below commit 6514392ad2366d13d08a86dcb91036dc1faeb143 Author: ayushi-agarwal AuthorDate: Wed Mar 20 07:21:49 2024 +0530 [GLUTEN-4899][VL] Fix 3.5 build issue with -Pspark-ut (#4975) --- .../src/test/scala/org/apache/spark/sql/GlutenSQLTestsTrait.scala | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/gluten-ut/common/src/test/scala/org/apache/spark/sql/GlutenSQLTestsTrait.scala b/gluten-ut/common/src/test/scala/org/apache/spark/sql/GlutenSQLTestsTrait.scala index 98d12f5a3..5c6cc2e3f 100644 --- a/gluten-ut/common/src/test/scala/org/apache/spark/sql/GlutenSQLTestsTrait.scala +++ b/gluten-ut/common/src/test/scala/org/apache/spark/sql/GlutenSQLTestsTrait.scala @@ -16,6 +16,8 @@ */ package org.apache.spark.sql +import io.glutenproject.sql.shims.SparkShimLoader + import org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.catalyst.util.{sideBySide, stackTraceToString} import org.apache.spark.sql.execution.SQLExecution @@ -59,10 +61,11 @@ trait GlutenSQLTestsTrait extends QueryTest with GlutenSQLTestsBaseTrait { try df catch { case ae: AnalysisException => - if (ae.plan.isDefined) { + val plan = SparkShimLoader.getSparkShims.getAnalysisExceptionPlan(ae) + if (plan.isDefined) { fail(s""" |Failed to analyze query: $ae -|${ae.plan.get} +|${plan.get} | |${stackTraceToString(ae)} |""".stripMargin) - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [GLUTEN-4903][CELEBORN] Support multiple versions of Celeborn (#4913)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 62746c40e [GLUTEN-4903][CELEBORN] Support multiple versions of Celeborn (#4913) 62746c40e is described below commit 62746c40e6e725f1c20ad06251ef976350d2bfc3 Author: Kerwin Zhang AuthorDate: Fri Mar 15 17:22:38 2024 +0800 [GLUTEN-4903][CELEBORN] Support multiple versions of Celeborn (#4913) --- .github/workflows/velox_be.yml | 22 +- docs/get-started/ClickHouse.md | 1 + docs/get-started/Velox.md | 4 +- .../CHCelebornHashBasedColumnarShuffleWriter.scala | 4 +- ...bornHashBasedColumnarShuffleWriterFactory.scala | 2 + .../gluten/celeborn/CelebornShuffleManager.java| 134 --- .../celeborn/CelebornShuffleWriterFactory.java | 1 + .../shuffle/gluten/celeborn/CelebornUtils.java | 384 + .../CelebornHashBasedColumnarShuffleWriter.scala | 3 +- gluten-celeborn/pom.xml| 10 + ...loxCelebornHashBasedColumnarShuffleWriter.scala | 2 + ...bornHashBasedColumnarShuffleWriterFactory.scala | 2 + pom.xml| 2 +- tools/gluten-it/package/pom.xml| 10 + tools/gluten-it/pom.xml| 6 + 15 files changed, 511 insertions(+), 76 deletions(-) diff --git a/.github/workflows/velox_be.yml b/.github/workflows/velox_be.yml index 8aeec14db..b53c678e5 100644 --- a/.github/workflows/velox_be.yml +++ b/.github/workflows/velox_be.yml @@ -450,11 +450,27 @@ jobs: --local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ && GLUTEN_IT_JVM_ARGS=-Xmx20G sbin/gluten-it.sh queries-compare \ --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=40g -s=10.0 --threads=32 --iterations=1' - - name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.2 with Celeborn + - name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.2 with Celeborn 0.4.0 run: | $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh \ - 'wget https://archive.apache.org/dist/incubator/celeborn/celeborn-0.3.0-incubating/apache-celeborn-0.3.0-incubating-bin.tgz && \ - tar xzf apache-celeborn-0.3.0-incubating-bin.tgz -C /opt/ && mv /opt/apache-celeborn-0.3.0-incubating-bin /opt/celeborn && cd /opt/celeborn && \ + 'wget https://archive.apache.org/dist/incubator/celeborn/celeborn-0.4.0-incubating/apache-celeborn-0.4.0-incubating-bin.tgz && \ + tar xzf apache-celeborn-0.4.0-incubating-bin.tgz -C /opt/ && mv /opt/apache-celeborn-0.4.0-incubating-bin /opt/celeborn && cd /opt/celeborn && \ + mv ./conf/celeborn-env.sh.template ./conf/celeborn-env.sh && \ + echo -e "CELEBORN_MASTER_MEMORY=4g\nCELEBORN_WORKER_MEMORY=4g\nCELEBORN_WORKER_OFFHEAP_MEMORY=8g" > ./conf/celeborn-env.sh && \ + echo -e "celeborn.worker.commitFiles.threads 128\nceleborn.worker.sortPartition.threads 64" > ./conf/celeborn-defaults.conf \ + && bash ./sbin/start-master.sh && bash ./sbin/start-worker.sh && \ + cd /opt/gluten/tools/gluten-it && mvn clean install -Pspark-3.2,rss,celeborn-0.4 \ + && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ +--local --preset=velox-with-celeborn --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ + && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ +--local --preset=velox-with-celeborn --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 && \ + bash /opt/celeborn/sbin/stop-worker.sh \ + && bash /opt/celeborn/sbin/stop-master.sh && rm -rf /opt/celeborn' + - name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.2 with Celeborn 0.3.2 +run: | + $PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh \ + 'wget https://archive.apache.org/dist/incubator/celeborn/celeborn-0.3.2-incubating/apache-celeborn-0.3.2-incubating-bin.tgz && \ + tar xzf apache-celeborn-0.3.2-incubating-bin.tgz -C /opt/ && mv /opt/apache-celeborn-0.3.2-incubating-bin /opt/celeborn && cd /opt/celeborn && \ mv ./conf/celeborn-env.sh.template ./conf/celeborn-env.sh && \ echo -e "CELEBORN_MASTER_MEMORY=4g\nCELEBORN_WORKER_MEMORY=4g\nCELEBORN_WORKER_OFFHEAP_MEMORY=8g" > ./conf
(incubator-gluten) branch main updated: [CORE] Add Complete case match in PullOutPreProject (#4968)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new c0b9efc80 [CORE] Add Complete case match in PullOutPreProject (#4968) c0b9efc80 is described below commit c0b9efc803170e32ff7579596c0d2b0426a8c9cf Author: Joey AuthorDate: Fri Mar 15 15:21:14 2024 +0800 [CORE] Add Complete case match in PullOutPreProject (#4968) --- .../io/glutenproject/execution/GlutenFunctionValidateSuite.scala | 2 +- .../scala/io/glutenproject/extension/columnar/PullOutPreProject.scala | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenFunctionValidateSuite.scala b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenFunctionValidateSuite.scala index 4f820343a..3a0d9f62c 100644 --- a/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenFunctionValidateSuite.scala +++ b/backends-clickhouse/src/test/scala/io/glutenproject/execution/GlutenFunctionValidateSuite.scala @@ -607,7 +607,7 @@ class GlutenFunctionValidateSuite extends GlutenClickHouseWholeStageTransformerS runQueryAndCompare( "select id, if(id % 2 = 0, sum(id), max(id)) as s1, " + "if(id %2 = 0, sum(id+1), sum(id+2)) as s2 from range(10) group by id") { -df => checkOperatorCount[ProjectExecTransformer](1)(df) +df => checkOperatorCount[ProjectExecTransformer](2)(df) } // CSE in sort diff --git a/gluten-core/src/main/scala/io/glutenproject/extension/columnar/PullOutPreProject.scala b/gluten-core/src/main/scala/io/glutenproject/extension/columnar/PullOutPreProject.scala index 63c7aef78..5bf70597c 100644 --- a/gluten-core/src/main/scala/io/glutenproject/extension/columnar/PullOutPreProject.scala +++ b/gluten-core/src/main/scala/io/glutenproject/extension/columnar/PullOutPreProject.scala @@ -19,7 +19,7 @@ package io.glutenproject.extension.columnar import io.glutenproject.utils.PullOutProjectHelper import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Partial} +import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete, Partial} import org.apache.spark.sql.catalyst.rules.Rule import org.apache.spark.sql.execution.{ProjectExec, SortExec, SparkPlan, TakeOrderedAndProjectExec} import org.apache.spark.sql.execution.aggregate.{BaseAggregateExec, TypedAggregateExpression} @@ -53,7 +53,7 @@ object PullOutPreProject extends Rule[SparkPlan] with PullOutProjectHelper { } else { expr.filter.exists(isNotAttribute) || (expr.mode match { -case Partial => +case Partial | Complete => expr.aggregateFunction.children.exists(isNotAttributeAndLiteral) case _ => false }) - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [VL] Verify unhex has been offloaded to native successfully (#4937)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 825caceab [VL] Verify unhex has been offloaded to native successfully (#4937) 825caceab is described below commit 825caceab6e6fce61c06ec069134cc2de1dcd57a Author: Yang Zhang AuthorDate: Thu Mar 14 09:11:56 2024 +0800 [VL] Verify unhex has been offloaded to native successfully (#4937) --- .../io/glutenproject/execution/VeloxFunctionsValidateSuite.scala| 6 ++ 1 file changed, 6 insertions(+) diff --git a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala index 587492ef3..33f5e48ad 100644 --- a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala @@ -449,4 +449,10 @@ class VeloxFunctionsValidateSuite extends VeloxWholeStageTransformerSuite { checkOperatorMatch[ProjectExecTransformer] } } + + test("Test unhex function") { +runQueryAndCompare("SELECT unhex(hex(l_shipmode)) FROM lineitem limit 1") { + checkOperatorMatch[ProjectExecTransformer] +} + } } - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [GLUTEN-4926][CELEBORN] CelebornShuffleManager should remove shuffleId from columnarShuffleIds after unregistering shuffle (#4927)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 494692d3f [GLUTEN-4926][CELEBORN] CelebornShuffleManager should remove shuffleId from columnarShuffleIds after unregistering shuffle (#4927) 494692d3f is described below commit 494692d3f3eba5aad8b2425c120b0a83fa282c02 Author: Nicholas Jiang AuthorDate: Tue Mar 12 15:29:15 2024 +0800 [GLUTEN-4926][CELEBORN] CelebornShuffleManager should remove shuffleId from columnarShuffleIds after unregistering shuffle (#4927) --- .../spark/shuffle/gluten/celeborn/CelebornShuffleManager.java | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java b/gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java index b0f74fad2..c447e7ade 100644 --- a/gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java +++ b/gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java @@ -242,7 +242,11 @@ public class CelebornShuffleManager implements ShuffleManager { @Override public boolean unregisterShuffle(int shuffleId) { if (columnarShuffleIds.contains(shuffleId)) { - return columnarShuffleManager().unregisterShuffle(shuffleId); + if (columnarShuffleManager().unregisterShuffle(shuffleId)) { +return columnarShuffleIds.remove(shuffleId); + } else { +return false; + } } if (appUniqueId == null) { return true; - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [GLUTEN-4835][CORE] Match metric names with Spark (#4834)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new cfa9afa58 [GLUTEN-4835][CORE] Match metric names with Spark (#4834) cfa9afa58 is described below commit cfa9afa58ce17ff001282a82f632100d941a0322 Author: Chungmin Lee AuthorDate: Mon Mar 11 02:21:58 2024 -0700 [GLUTEN-4835][CORE] Match metric names with Spark (#4834) --- .../backendsapi/clickhouse/CHMetricsApi.scala | 52 +++--- .../metrics/BatchScanMetricsUpdater.scala | 6 +-- .../metrics/ExpandMetricsUpdater.scala | 4 +- .../metrics/FileSourceScanMetricsUpdater.scala | 6 +-- .../metrics/FilterMetricsUpdater.scala | 4 +- .../metrics/GenerateMetricsUpdater.scala | 4 +- .../metrics/HashAggregateMetricsUpdater.scala | 4 +- .../metrics/HashJoinMetricsUpdater.scala | 4 +- .../metrics/HiveTableScanMetricsUpdater.scala | 4 +- .../metrics/InputIteratorMetricsUpdater.scala | 4 +- .../metrics/ProjectMetricsUpdater.scala| 4 +- .../glutenproject/metrics/SortMetricsUpdater.scala | 4 +- .../metrics/WindowMetricsUpdater.scala | 4 +- .../GlutenClickHouseTPCHBucketSuite.scala | 18 ...ckHouseTPCHColumnarShuffleParquetAQESuite.scala | 44 +- .../GlutenClickHouseTPCHParquetBucketSuite.scala | 18 .../GlutenClickHouseTPCDSMetricsSuite.scala| 12 ++--- .../metrics/GlutenClickHouseTPCHMetricsSuite.scala | 36 +++ .../benchmarks/CHParquetReadBenchmark.scala| 2 +- .../backendsapi/velox/MetricsApiImpl.scala | 20 - .../execution/BasicScanExecTransformer.scala | 2 +- .../metrics/BatchScanMetricsUpdater.scala | 4 +- .../metrics/ExpandMetricsUpdater.scala | 2 +- .../metrics/FileSourceScanMetricsUpdater.scala | 2 +- .../metrics/FilterMetricsUpdater.scala | 2 +- .../metrics/HiveTableScanMetricsUpdater.scala | 2 +- .../metrics/InputIteratorMetricsUpdater.scala | 4 +- .../metrics/LimitMetricsUpdater.scala | 2 +- .../metrics/ProjectMetricsUpdater.scala| 2 +- .../glutenproject/metrics/SortMetricsUpdater.scala | 2 +- .../metrics/WindowMetricsUpdater.scala | 2 +- .../org/apache/spark/sql/GlutenSQLQuerySuite.scala | 26 +++ 32 files changed, 165 insertions(+), 141 deletions(-) diff --git a/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHMetricsApi.scala b/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHMetricsApi.scala index 838612036..488686e93 100644 --- a/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHMetricsApi.scala +++ b/backends-clickhouse/src/main/scala/io/glutenproject/backendsapi/clickhouse/CHMetricsApi.scala @@ -44,8 +44,8 @@ class CHMetricsApi extends MetricsApi with Logging with LogLevelUtil { "iterReadTime" -> SQLMetrics.createTimingMetric( sparkContext, "time of reading from iterator"), - "inputRows" -> SQLMetrics.createMetric(sparkContext, "number of input rows"), - "outputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"), + "numInputRows" -> SQLMetrics.createMetric(sparkContext, "number of input rows"), + "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"), "fillingRightJoinSideTime" -> SQLMetrics.createTimingMetric( sparkContext, "filling right join side time") @@ -59,12 +59,12 @@ class CHMetricsApi extends MetricsApi with Logging with LogLevelUtil { override def genBatchScanTransformerMetrics(sparkContext: SparkContext): Map[String, SQLMetric] = Map( - "inputRows" -> SQLMetrics.createMetric(sparkContext, "number of input rows"), + "numInputRows" -> SQLMetrics.createMetric(sparkContext, "number of input rows"), "inputVectors" -> SQLMetrics.createMetric(sparkContext, "number of input vectors"), "inputBytes" -> SQLMetrics.createSizeMetric(sparkContext, "number of input bytes"), "rawInputRows" -> SQLMetrics.createMetric(sparkContext, "number of raw input rows"), "rawInputBytes" -> SQLMetrics.createSizeMetric(sparkContext, "number of raw input bytes"), - "outputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"), + "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
(incubator-gluten) branch main updated: [GLUTEN-4424][CORE] Follow up upgrading spark version to 3.5.1 (#4845)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 42b78780c [GLUTEN-4424][CORE] Follow up upgrading spark version to 3.5.1 (#4845) 42b78780c is described below commit 42b78780c71fe81f47be9be01cdedddb84388f9a Author: JiaKe AuthorDate: Fri Mar 8 12:17:33 2024 +0800 [GLUTEN-4424][CORE] Follow up upgrading spark version to 3.5.1 (#4845) --- dev/buildbundle-veloxbe.sh| 1 + dev/package.sh| 1 + .../main/scala/io/glutenproject/execution/WholeStageTransformer.scala | 2 -- .../scala/io/glutenproject/execution/GenerateTreeStringShim.scala | 4 ++-- .../scala/io/glutenproject/execution/GenerateTreeStringShim.scala | 4 ++-- .../scala/io/glutenproject/execution/GenerateTreeStringShim.scala | 4 ++-- .../scala/io/glutenproject/execution/GenerateTreeStringShim.scala | 4 ++-- tools/gluten-te/ubuntu/dockerfile-buildenv| 4 8 files changed, 14 insertions(+), 10 deletions(-) diff --git a/dev/buildbundle-veloxbe.sh b/dev/buildbundle-veloxbe.sh index 3bfd6994a..1c77b52a6 100755 --- a/dev/buildbundle-veloxbe.sh +++ b/dev/buildbundle-veloxbe.sh @@ -7,3 +7,4 @@ cd $GLUTEN_DIR mvn clean package -Pbackends-velox -Prss -Pspark-3.2 -DskipTests mvn clean package -Pbackends-velox -Prss -Pspark-3.3 -DskipTests mvn clean package -Pbackends-velox -Prss -Pspark-3.4 -DskipTests +mvn clean package -Pbackends-velox -Prss -Pspark-3.5 -DskipTests diff --git a/dev/package.sh b/dev/package.sh index 0e1f362bc..2f6602d00 100755 --- a/dev/package.sh +++ b/dev/package.sh @@ -14,6 +14,7 @@ $GLUTEN_DIR/dev/builddeps-veloxbe.sh --build_tests=ON --build_benchmarks=ON --en mvn clean package -Pbackends-velox -Prss -Pspark-3.2 -DskipTests mvn clean package -Pbackends-velox -Prss -Pspark-3.3 -DskipTests mvn clean package -Pbackends-velox -Prss -Pspark-3.4 -DskipTests +mvn clean package -Pbackends-velox -Prss -Pspark-3.5 -DskipTests mkdir -p $THIRDPARTY_LIB function process_setup_ubuntu_2004 { diff --git a/gluten-core/src/main/scala/io/glutenproject/execution/WholeStageTransformer.scala b/gluten-core/src/main/scala/io/glutenproject/execution/WholeStageTransformer.scala index 2bbc80afc..24957d1c2 100644 --- a/gluten-core/src/main/scala/io/glutenproject/execution/WholeStageTransformer.scala +++ b/gluten-core/src/main/scala/io/glutenproject/execution/WholeStageTransformer.scala @@ -102,8 +102,6 @@ case class WholeStageTransformer(child: SparkPlan, materializeInput: Boolean = f def stageId: Int = transformStageId - def substraitPlanJsonValue: String = substraitPlanJson - def wholeStageTransformerContextDefined: Boolean = wholeStageTransformerContext.isDefined // For WholeStageCodegen-like operator, only pipeline time will be handled in graph plotting. diff --git a/shims/spark32/src/main/scala/io/glutenproject/execution/GenerateTreeStringShim.scala b/shims/spark32/src/main/scala/io/glutenproject/execution/GenerateTreeStringShim.scala index d99fb18d7..d06728bc7 100644 --- a/shims/spark32/src/main/scala/io/glutenproject/execution/GenerateTreeStringShim.scala +++ b/shims/spark32/src/main/scala/io/glutenproject/execution/GenerateTreeStringShim.scala @@ -29,7 +29,7 @@ trait GenerateTreeStringShim extends UnaryExecNode { def stageId: Int - def substraitPlanJsonValue: String + def substraitPlanJson: String def wholeStageTransformerContextDefined: Boolean @@ -57,7 +57,7 @@ trait GenerateTreeStringShim extends UnaryExecNode { if (verbose && wholeStageTransformerContextDefined) { append(prefix + "Substrait plan:\n") - append(substraitPlanJsonValue) + append(substraitPlanJson) append("\n") } } diff --git a/shims/spark33/src/main/scala/io/glutenproject/execution/GenerateTreeStringShim.scala b/shims/spark33/src/main/scala/io/glutenproject/execution/GenerateTreeStringShim.scala index d99fb18d7..d06728bc7 100644 --- a/shims/spark33/src/main/scala/io/glutenproject/execution/GenerateTreeStringShim.scala +++ b/shims/spark33/src/main/scala/io/glutenproject/execution/GenerateTreeStringShim.scala @@ -29,7 +29,7 @@ trait GenerateTreeStringShim extends UnaryExecNode { def stageId: Int - def substraitPlanJsonValue: String + def substraitPlanJson: String def wholeStageTransformerContextDefined: Boolean @@ -57,7 +57,7 @@ trait GenerateTreeStringShim extends UnaryExecNode { if (verbose && wholeStageTransformerContextDefined) { append(prefix + "Substrait plan:\n") - append(substraitPlanJsonValue) + append(substraitPlanJson) append("\n") } } diff --git a/shims/spark34/src/main/scala/io/glutenproject/execution/Gener
(incubator-gluten) branch main updated (879ea76ce -> 00e3607c6)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a change to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git from 879ea76ce [VL] Merge Partial and PartialMerge logic in generateMergeCompanionNode (#4883) add 00e3607c6 [CORE] Fix Spark-3.5 CI (#4886) No new revisions were added by this update. Summary of changes: .github/workflows/velox_be.yml | 5 +++ .../sql/shims/spark35/Spark35Shims.scala | 4 +-- .../glutenproject/integration/tpc/ShimUtils.scala | 30 .../integration/tpc/ds/TpcdsDataGen.scala | 9 ++--- .../integration/tpc/h/TpchDataGen.scala| 5 ++- tools/gluten-it/pom.xml| 42 +++--- 6 files changed, 57 insertions(+), 38 deletions(-) copy gluten-core/src/main/scala/io/glutenproject/execution/BaseDataSource.scala => tools/gluten-it/common/src/main/scala/io/glutenproject/integration/tpc/ShimUtils.scala (56%) - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org
(incubator-gluten) branch main updated: [VL] Merge Partial and PartialMerge logic in generateMergeCompanionNode (#4883)
This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git The following commit(s) were added to refs/heads/main by this push: new 879ea76ce [VL] Merge Partial and PartialMerge logic in generateMergeCompanionNode (#4883) 879ea76ce is described below commit 879ea76ce2dae22f97e61c81254464dc5292a1ac Author: Joey AuthorDate: Fri Mar 8 09:52:08 2024 +0800 [VL] Merge Partial and PartialMerge logic in generateMergeCompanionNode (#4883) --- .../execution/HashAggregateExecTransformer.scala| 13 ++--- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/backends-velox/src/main/scala/io/glutenproject/execution/HashAggregateExecTransformer.scala b/backends-velox/src/main/scala/io/glutenproject/execution/HashAggregateExecTransformer.scala index cc428b592..cc7a1a852 100644 --- a/backends-velox/src/main/scala/io/glutenproject/execution/HashAggregateExecTransformer.scala +++ b/backends-velox/src/main/scala/io/glutenproject/execution/HashAggregateExecTransformer.scala @@ -217,18 +217,9 @@ abstract class HashAggregateExecTransformer( def generateMergeCompanionNode(): Unit = { aggregateMode match { -case Partial => - val partialNode = ExpressionBuilder.makeAggregateFunction( -VeloxAggregateFunctionsBuilder.create(args, aggregateFunction, aggregateMode), -childrenNodeList, -modeKeyWord, -VeloxIntermediateData.getIntermediateTypeNode(aggregateFunction) - ) - aggregateNodeList.add(partialNode) -case PartialMerge => +case Partial | PartialMerge => val aggFunctionNode = ExpressionBuilder.makeAggregateFunction( -VeloxAggregateFunctionsBuilder - .create(args, aggregateFunction, aggregateMode), +VeloxAggregateFunctionsBuilder.create(args, aggregateFunction, aggregateMode), childrenNodeList, modeKeyWord, VeloxIntermediateData.getIntermediateTypeNode(aggregateFunction) - To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org