This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 493493d6c5bb [SPARK-48173][SQL] CheckAnalysis should see the entire query plan 493493d6c5bb is described below commit 493493d6c5bbbaa0b04f5548ac1ccd9502e8b8fa Author: Wenchen Fan <wenc...@databricks.com> AuthorDate: Tue May 7 08:02:25 2024 -0700 [SPARK-48173][SQL] CheckAnalysis should see the entire query plan ### What changes were proposed in this pull request? This is a follow-up of https://github.com/apache/spark/pull/38029 . Some custom check rules need to see the entire query plan tree to get some context, but https://github.com/apache/spark/pull/38029 breaks it as it checks the query plan of dangling CTE relations recursively. This PR fixes it by putting back the dangling CTE relation in the main query plan and then check the main query plan. ### Why are the changes needed? Revert the breaking change to custom check rules ### Does this PR introduce _any_ user-facing change? No for most users. This restores the behavior of Spark 3.3 and earlier for custom check rules. ### How was this patch tested? existing tests. ### Was this patch authored or co-authored using generative AI tooling? No Closes #46439 from cloud-fan/check. Authored-by: Wenchen Fan <wenc...@databricks.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .../sql/catalyst/analysis/CheckAnalysis.scala | 39 +++++++++++----------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index d1b336b08955..e55f23b6aa86 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -145,15 +145,16 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB private def checkUnreferencedCTERelations( cteMap: mutable.Map[Long, (CTERelationDef, Int, mutable.Map[Long, Int])], visited: mutable.Map[Long, Boolean], + danglingCTERelations: mutable.ArrayBuffer[CTERelationDef], cteId: Long): Unit = { if (visited(cteId)) { return } val (cteDef, _, refMap) = cteMap(cteId) refMap.foreach { case (id, _) => - checkUnreferencedCTERelations(cteMap, visited, id) + checkUnreferencedCTERelations(cteMap, visited, danglingCTERelations, id) } - checkAnalysis0(cteDef.child) + danglingCTERelations.append(cteDef) visited(cteId) = true } @@ -161,35 +162,35 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB val inlineCTE = InlineCTE(alwaysInline = true) val cteMap = mutable.HashMap.empty[Long, (CTERelationDef, Int, mutable.Map[Long, Int])] inlineCTE.buildCTEMap(plan, cteMap) + val danglingCTERelations = mutable.ArrayBuffer.empty[CTERelationDef] val visited: mutable.Map[Long, Boolean] = mutable.Map.empty.withDefaultValue(false) - cteMap.foreach { case (cteId, (relation, refCount, _)) => - // If a CTE relation is never used, it will disappear after inline. Here we explicitly check - // analysis for it, to make sure the entire query plan is valid. - try { - // If a CTE relation ref count is 0, the other CTE relations that reference it - // should also be checked by checkAnalysis0. This code will also guarantee the leaf - // relations that do not reference any others are checked first. - if (refCount == 0) { - checkUnreferencedCTERelations(cteMap, visited, cteId) - } - } catch { - case e: AnalysisException => - throw new ExtendedAnalysisException(e, relation.child) + // If a CTE relation is never used, it will disappear after inline. Here we explicitly collect + // these dangling CTE relations, and put them back in the main query, to make sure the entire + // query plan is valid. + cteMap.foreach { case (cteId, (_, refCount, _)) => + // If a CTE relation ref count is 0, the other CTE relations that reference it should also be + // collected. This code will also guarantee the leaf relations that do not reference + // any others are collected first. + if (refCount == 0) { + checkUnreferencedCTERelations(cteMap, visited, danglingCTERelations, cteId) } } // Inline all CTEs in the plan to help check query plan structures in subqueries. - var inlinedPlan: Option[LogicalPlan] = None + var inlinedPlan: LogicalPlan = plan try { - inlinedPlan = Some(inlineCTE(plan)) + inlinedPlan = inlineCTE(plan) } catch { case e: AnalysisException => throw new ExtendedAnalysisException(e, plan) } + if (danglingCTERelations.nonEmpty) { + inlinedPlan = WithCTE(inlinedPlan, danglingCTERelations.toSeq) + } try { - checkAnalysis0(inlinedPlan.get) + checkAnalysis0(inlinedPlan) } catch { case e: AnalysisException => - throw new ExtendedAnalysisException(e, inlinedPlan.get) + throw new ExtendedAnalysisException(e, inlinedPlan) } plan.setAnalyzed() } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org