This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 7435f45 [MINOR][DOC] Refine comments of QueryPlan regarding subquery 7435f45 is described below commit 7435f4543ea6f2b927da6055c1cfb75f4a62f19d Author: Wenchen Fan <wenc...@databricks.com> AuthorDate: Fri Mar 27 09:35:35 2020 -0700 [MINOR][DOC] Refine comments of QueryPlan regarding subquery ### What changes were proposed in this pull request? The query plan of Spark SQL is a mutually recursive structure: QueryPlan -> Expression (PlanExpression) -> QueryPlan, but the transformations do not take this into account. This PR refines the comments of `QueryPlan` to highlight this fact. ### Why are the changes needed? better document. ### Does this PR introduce any user-facing change? no ### How was this patch tested? N/A Closes #28050 from cloud-fan/comment. Authored-by: Wenchen Fan <wenc...@databricks.com> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> (cherry picked from commit 8a5d49610d875c473114781e92300c79e24a53cc) Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../spark/sql/catalyst/plans/QueryPlan.scala | 32 ++++++++++++++-------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index 1248266..9f86fb2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -23,6 +23,16 @@ import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, TreeNode, TreeNodeTag import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{DataType, StructType} +/** + * An abstraction of the Spark SQL query plan tree, which can be logical or physical. This class + * defines some basic properties of a query plan node, as well as some new transform APIs to + * transform the expressions of the plan node. + * + * Note that, the query plan is a mutually recursive structure: + * QueryPlan -> Expression (subquery) -> QueryPlan + * The tree traverse APIs like `transform`, `foreach`, `collect`, etc. that are + * inherited from `TreeNode`, do not traverse into query plans inside subqueries. + */ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanType] { self: PlanType => @@ -133,7 +143,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT /** * Returns the result of running [[transformExpressions]] on this node - * and all its children. + * and all its children. Note that this method skips expressions inside subqueries. */ def transformAllExpressions(rule: PartialFunction[Expression, Expression]): this.type = { transform { @@ -204,7 +214,7 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT } /** - * All the subqueries of current plan. + * All the top-level subqueries of the current plan node. Nested subqueries are not included. */ def subqueries: Seq[PlanType] = { expressions.flatMap(_.collect { @@ -213,21 +223,21 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]] extends TreeNode[PlanT } /** - * Returns a sequence containing the result of applying a partial function to all elements in this - * plan, also considering all the plans in its (nested) subqueries - */ - def collectInPlanAndSubqueries[B](f: PartialFunction[PlanType, B]): Seq[B] = - (this +: subqueriesAll).flatMap(_.collect(f)) - - /** - * Returns a sequence containing the subqueries in this plan, also including the (nested) - * subquries in its children + * All the subqueries of the current plan node and all its children. Nested subqueries are also + * included. */ def subqueriesAll: Seq[PlanType] = { val subqueries = this.flatMap(_.subqueries) subqueries ++ subqueries.flatMap(_.subqueriesAll) } + /** + * Returns a sequence containing the result of applying a partial function to all elements in this + * plan, also considering all the plans in its (nested) subqueries + */ + def collectInPlanAndSubqueries[B](f: PartialFunction[PlanType, B]): Seq[B] = + (this +: subqueriesAll).flatMap(_.collect(f)) + override def innerChildren: Seq[QueryPlan[_]] = subqueries /** --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org