This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 24edc0ef5bee [SPARK-45752][SQL] Unreferenced CTE should all be checked 
by CheckAnalysis0
24edc0ef5bee is described below

commit 24edc0ef5bee578de8eec3b032f993812e4303ea
Author: Rui Wang <rui.w...@databricks.com>
AuthorDate: Thu Nov 9 15:25:52 2023 +0800

    [SPARK-45752][SQL] Unreferenced CTE should all be checked by CheckAnalysis0
    
    ### What changes were proposed in this pull request?
    
    This PR fixes an issue that if a CTE is referenced by a non-referenced CTE, 
 then this CTE should also have ref count as 0 and goes through CheckAnalysis0. 
This will guarantee analyzer throw proper error message for problematic CTE 
which is not referenced.
    
    ### Why are the changes needed?
    
    To improve error message for non-referenced CTE case.
    
    ### Does this PR introduce _any_ user-facing change?
    
    NO
    
    ### How was this patch tested?
    
    UT
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    NO
    
    Closes #43614 from amaliujia/cte_ref.
    
    Lead-authored-by: Rui Wang <rui.w...@databricks.com>
    Co-authored-by: Wenchen Fan <cloud0...@gmail.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 .../sql/catalyst/analysis/CheckAnalysis.scala      | 28 ++++++++++++++++++++--
 .../org/apache/spark/sql/CTEInlineSuite.scala      | 11 +++++++++
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index cebaee2cdec9..29d60ae0f41e 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -148,15 +148,39 @@ trait CheckAnalysis extends PredicateHelper with 
LookupCatalog with QueryErrorsB
       errorClass, missingCol, orderedCandidates, a.origin)
   }
 
+  private def checkUnreferencedCTERelations(
+      cteMap: mutable.Map[Long, (CTERelationDef, Int, mutable.Map[Long, Int])],
+      visited: mutable.Map[Long, Boolean],
+      cteId: Long): Unit = {
+    if (visited(cteId)) {
+      return
+    }
+    val (cteDef, _, refMap) = cteMap(cteId)
+    refMap.foreach { case (id, _) =>
+      checkUnreferencedCTERelations(cteMap, visited, id)
+    }
+    checkAnalysis0(cteDef.child)
+    visited(cteId) = true
+  }
+
   def checkAnalysis(plan: LogicalPlan): Unit = {
     val inlineCTE = InlineCTE(alwaysInline = true)
     val cteMap = mutable.HashMap.empty[Long, (CTERelationDef, Int, 
mutable.Map[Long, Int])]
     inlineCTE.buildCTEMap(plan, cteMap)
-    cteMap.values.foreach { case (relation, refCount, _) =>
+    cteMap.values.foreach { case (relation, _, _) =>
       // If a CTE relation is never used, it will disappear after inline. Here 
we explicitly check
       // analysis for it, to make sure the entire query plan is valid.
       try {
-        if (refCount == 0) checkAnalysis0(relation.child)
+        // If a CTE relation ref count is 0, the other CTE relations that 
reference it
+        // should also be checked by checkAnalysis0. This code will also 
guarantee the leaf
+        // relations that do not reference any others are checked first.
+        val visited: mutable.Map[Long, Boolean] = 
mutable.Map.empty.withDefaultValue(false)
+        cteMap.foreach { case (cteId, _) =>
+          val (_, refCount, _) = cteMap(cteId)
+          if (refCount == 0) {
+            checkUnreferencedCTERelations(cteMap, visited, cteId)
+          }
+        }
       } catch {
         case e: AnalysisException =>
           throw new ExtendedAnalysisException(e, relation.child)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
index 5f6c44792658..055c04992c00 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
@@ -678,6 +678,17 @@ abstract class CTEInlineSuiteBase
       }.isDefined, "CTE columns should not be pruned.")
     }
   }
+
+  test("SPARK-45752: Unreferenced CTE should all be checked by 
CheckAnalysis0") {
+    val e = intercept[AnalysisException](sql(
+      s"""
+        |with
+        |a as (select * from non_exist),
+        |b as (select * from a)
+        |select 2
+        |""".stripMargin))
+    checkErrorTableNotFound(e, "`non_exist`", ExpectedContext("non_exist", 26, 
34))
+  }
 }
 
 class CTEInlineSuiteAEOff extends CTEInlineSuiteBase with 
DisableAdaptiveExecutionSuite


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to