This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 24edc0ef5bee [SPARK-45752][SQL] Unreferenced CTE should all be checked by CheckAnalysis0 24edc0ef5bee is described below commit 24edc0ef5bee578de8eec3b032f993812e4303ea Author: Rui Wang <rui.w...@databricks.com> AuthorDate: Thu Nov 9 15:25:52 2023 +0800 [SPARK-45752][SQL] Unreferenced CTE should all be checked by CheckAnalysis0 ### What changes were proposed in this pull request? This PR fixes an issue that if a CTE is referenced by a non-referenced CTE, then this CTE should also have ref count as 0 and goes through CheckAnalysis0. This will guarantee analyzer throw proper error message for problematic CTE which is not referenced. ### Why are the changes needed? To improve error message for non-referenced CTE case. ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? UT ### Was this patch authored or co-authored using generative AI tooling? NO Closes #43614 from amaliujia/cte_ref. Lead-authored-by: Rui Wang <rui.w...@databricks.com> Co-authored-by: Wenchen Fan <cloud0...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../sql/catalyst/analysis/CheckAnalysis.scala | 28 ++++++++++++++++++++-- .../org/apache/spark/sql/CTEInlineSuite.scala | 11 +++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index cebaee2cdec9..29d60ae0f41e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -148,15 +148,39 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB errorClass, missingCol, orderedCandidates, a.origin) } + private def checkUnreferencedCTERelations( + cteMap: mutable.Map[Long, (CTERelationDef, Int, mutable.Map[Long, Int])], + visited: mutable.Map[Long, Boolean], + cteId: Long): Unit = { + if (visited(cteId)) { + return + } + val (cteDef, _, refMap) = cteMap(cteId) + refMap.foreach { case (id, _) => + checkUnreferencedCTERelations(cteMap, visited, id) + } + checkAnalysis0(cteDef.child) + visited(cteId) = true + } + def checkAnalysis(plan: LogicalPlan): Unit = { val inlineCTE = InlineCTE(alwaysInline = true) val cteMap = mutable.HashMap.empty[Long, (CTERelationDef, Int, mutable.Map[Long, Int])] inlineCTE.buildCTEMap(plan, cteMap) - cteMap.values.foreach { case (relation, refCount, _) => + cteMap.values.foreach { case (relation, _, _) => // If a CTE relation is never used, it will disappear after inline. Here we explicitly check // analysis for it, to make sure the entire query plan is valid. try { - if (refCount == 0) checkAnalysis0(relation.child) + // If a CTE relation ref count is 0, the other CTE relations that reference it + // should also be checked by checkAnalysis0. This code will also guarantee the leaf + // relations that do not reference any others are checked first. + val visited: mutable.Map[Long, Boolean] = mutable.Map.empty.withDefaultValue(false) + cteMap.foreach { case (cteId, _) => + val (_, refCount, _) = cteMap(cteId) + if (refCount == 0) { + checkUnreferencedCTERelations(cteMap, visited, cteId) + } + } } catch { case e: AnalysisException => throw new ExtendedAnalysisException(e, relation.child) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala index 5f6c44792658..055c04992c00 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala @@ -678,6 +678,17 @@ abstract class CTEInlineSuiteBase }.isDefined, "CTE columns should not be pruned.") } } + + test("SPARK-45752: Unreferenced CTE should all be checked by CheckAnalysis0") { + val e = intercept[AnalysisException](sql( + s""" + |with + |a as (select * from non_exist), + |b as (select * from a) + |select 2 + |""".stripMargin)) + checkErrorTableNotFound(e, "`non_exist`", ExpectedContext("non_exist", 26, 34)) + } } class CTEInlineSuiteAEOff extends CTEInlineSuiteBase with DisableAdaptiveExecutionSuite --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org