This is an automated email from the ASF dual-hosted git repository.
liujiayi771 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 2fd5fe0304 [CORE] Make each RewriteSingleNode evaluates its own
isRewritable (#9935)
2fd5fe0304 is described below
commit 2fd5fe0304479321c1a999195973c6bcbdb5e494
Author: Zouxxyy <[email protected]>
AuthorDate: Tue Jul 1 09:22:07 2025 +0800
[CORE] Make each RewriteSingleNode evaluates its own isRewritable (#9935)
---
.../heuristic/RewriteSparkPlanRulesManager.scala | 2 +-
.../columnar/rewrite/ProjectColumnPruning.scala | 5 ++-
.../columnar/rewrite/PullOutPostProject.scala | 7 +++-
.../columnar/rewrite/PullOutPreProject.scala | 12 +++++-
.../columnar/rewrite/RewriteEligibility.scala | 48 ----------------------
.../extension/columnar/rewrite/RewriteIn.scala | 6 ++-
.../extension/columnar/rewrite/RewriteJoin.scala | 5 ++-
.../rewrite/RewriteMultiChildrenCount.scala | 5 ++-
8 files changed, 35 insertions(+), 55 deletions(-)
diff --git
a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/heuristic/RewriteSparkPlanRulesManager.scala
b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/heuristic/RewriteSparkPlanRulesManager.scala
index 59a3700590..24fa7a6fc9 100644
---
a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/heuristic/RewriteSparkPlanRulesManager.scala
+++
b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/heuristic/RewriteSparkPlanRulesManager.scala
@@ -47,7 +47,7 @@ class RewriteSparkPlanRulesManager private (
extends Rule[SparkPlan] {
private def mayNeedRewrite(plan: SparkPlan): Boolean = {
- FallbackTags.maybeOffloadable(plan) &&
rewriteRules.forall(_.isRewritable(plan))
+ FallbackTags.maybeOffloadable(plan) &&
rewriteRules.exists(_.isRewritable(plan))
}
private def getFallbackTagBack(rewrittenPlan: SparkPlan):
Option[FallbackTag] = {
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/ProjectColumnPruning.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/ProjectColumnPruning.scala
index cdef646a2d..258edae199 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/ProjectColumnPruning.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/ProjectColumnPruning.scala
@@ -24,7 +24,10 @@ import org.apache.spark.sql.execution.{ProjectExec,
SparkPlan, UnaryExecNode}
*/
object ProjectColumnPruning extends RewriteSingleNode {
override def isRewritable(plan: SparkPlan): Boolean = {
- RewriteEligibility.isRewritable(plan)
+ plan match {
+ case parent: UnaryExecNode if parent.child.isInstanceOf[ProjectExec] =>
true
+ case _ => false
+ }
}
override def rewrite(plan: SparkPlan): SparkPlan = plan match {
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/PullOutPostProject.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/PullOutPostProject.scala
index 1a3ecca16a..5c1cade61c 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/PullOutPostProject.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/PullOutPostProject.scala
@@ -34,7 +34,12 @@ import scala.collection.mutable.ArrayBuffer
*/
object PullOutPostProject extends RewriteSingleNode with PullOutProjectHelper {
override def isRewritable(plan: SparkPlan): Boolean = {
- RewriteEligibility.isRewritable(plan)
+ plan match {
+ case _: BaseAggregateExec => true
+ case _: WindowExec => true
+ case _: GenerateExec => true
+ case _ => false
+ }
}
private def needsPostProjection(plan: SparkPlan): Boolean = {
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/PullOutPreProject.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/PullOutPreProject.scala
index 28e3ffb7d1..5fca907a72 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/PullOutPreProject.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/PullOutPreProject.scala
@@ -38,7 +38,17 @@ import scala.collection.mutable
*/
object PullOutPreProject extends RewriteSingleNode with PullOutProjectHelper {
override def isRewritable(plan: SparkPlan): Boolean = {
- RewriteEligibility.isRewritable(plan)
+ plan match {
+ case _: SortExec => true
+ case _: TakeOrderedAndProjectExec => true
+ case _: BaseAggregateExec => true
+ case _: WindowExec => true
+ case plan if SparkShimLoader.getSparkShims.isWindowGroupLimitExec(plan)
=> true
+ case _: ExpandExec => true
+ case _: GenerateExec => true
+ case _: ArrowEvalPythonExec => true
+ case _ => false
+ }
}
private def needsPreProject(plan: SparkPlan): Boolean = {
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteEligibility.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteEligibility.scala
deleted file mode 100644
index dd2847df6f..0000000000
---
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteEligibility.scala
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.gluten.extension.columnar.rewrite
-
-import org.apache.gluten.sql.shims.SparkShimLoader
-
-import org.apache.spark.sql.execution.{ExpandExec, FileSourceScanExec,
FilterExec, GenerateExec, SortExec, SparkPlan, TakeOrderedAndProjectExec}
-import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
-import org.apache.spark.sql.execution.joins.BaseJoinExec
-import org.apache.spark.sql.execution.python.ArrowEvalPythonExec
-import org.apache.spark.sql.execution.window.WindowExec
-
-/**
- * TODO: Remove this then implement API #isRewritable in rewrite rules.
- *
- * Since https://github.com/apache/incubator-gluten/pull/4645
- */
-object RewriteEligibility {
- def isRewritable(plan: SparkPlan): Boolean = plan match {
- case _: SortExec => true
- case _: TakeOrderedAndProjectExec => true
- case _: BaseAggregateExec => true
- case _: BaseJoinExec => true
- case _: WindowExec => true
- case _: FilterExec => true
- case _: FileSourceScanExec => true
- case _: ExpandExec => true
- case _: GenerateExec => true
- case plan if SparkShimLoader.getSparkShims.isWindowGroupLimitExec(plan) =>
true
- case _: ArrowEvalPythonExec => true
- case _ => false
- }
-
-}
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteIn.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteIn.scala
index 5a28576750..deec91dfb9 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteIn.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteIn.scala
@@ -33,7 +33,11 @@ import org.apache.spark.sql.types.StructType
*/
object RewriteIn extends RewriteSingleNode {
override def isRewritable(plan: SparkPlan): Boolean = {
- RewriteEligibility.isRewritable(plan)
+ plan match {
+ case _: FileSourceScanExec => true
+ case _: FilterExec => true
+ case _ => false
+ }
}
private def shouldRewrite(e: Expression): Boolean = {
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteJoin.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteJoin.scala
index 42bbbba39d..6d456857d7 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteJoin.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteJoin.scala
@@ -27,7 +27,10 @@ import
org.apache.spark.sql.execution.joins.{ShuffledHashJoinExec, SortMergeJoin
/** If force ShuffledHashJoin, convert [[SortMergeJoinExec]] to
[[ShuffledHashJoinExec]]. */
object RewriteJoin extends RewriteSingleNode with JoinSelectionHelper {
override def isRewritable(plan: SparkPlan): Boolean = {
- RewriteEligibility.isRewritable(plan)
+ plan match {
+ case _: SortMergeJoinExec => true
+ case _ => false
+ }
}
private def getSmjBuildSide(join: SortMergeJoinExec): Option[BuildSide] = {
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteMultiChildrenCount.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteMultiChildrenCount.scala
index 1d11dcc917..1003407191 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteMultiChildrenCount.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/extension/columnar/rewrite/RewriteMultiChildrenCount.scala
@@ -49,7 +49,10 @@ object RewriteMultiChildrenCount extends RewriteSingleNode
with PullOutProjectHe
private lazy val shouldRewriteCount =
BackendsApiManager.getSettings.shouldRewriteCount()
override def isRewritable(plan: SparkPlan): Boolean = {
- RewriteEligibility.isRewritable(plan)
+ plan match {
+ case _: BaseAggregateExec => true
+ case _ => false
+ }
}
private def extractCountForRewrite(aggExpr: AggregateExpression):
Option[Count] = {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]