(spark) branch master updated: [SPARK-45752][SQL] Simplify the code for check unreferenced CTE relations

beliefer Fri, 10 Nov 2023 06:01:16 -0800

This is an automated email from the ASF dual-hosted git repository.

beliefer pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 6851cb96ec6 [SPARK-45752][SQL] Simplify the code for check 
unreferenced CTE relations
6851cb96ec6 is described below

commit 6851cb96ec651b25a8103f7681e8528ff7d625ff
Author: Jiaan Geng <belie...@163.com>
AuthorDate: Fri Nov 10 22:00:51 2023 +0800

    [SPARK-45752][SQL] Simplify the code for check unreferenced CTE relations
    
    ### What changes were proposed in this pull request?
    https://github.com/apache/spark/pull/43614 let unreferenced `CTE` checked 
by `CheckAnalysis0`.
    This PR follows up https://github.com/apache/spark/pull/43614 to simplify 
the code for check unreferenced CTE relations.
    
    ### Why are the changes needed?
    Simplify the code for check unreferenced CTE relations
    
    ### Does this PR introduce _any_ user-facing change?
    'No'.
    
    ### How was this patch tested?
    Exists test cases.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    'No'.
    
    Closes #43727 from beliefer/SPARK-45752_followup.
    
    Authored-by: Jiaan Geng <belie...@163.com>
    Signed-off-by: Jiaan Geng <belie...@163.com>
---
 .../spark/sql/catalyst/analysis/CheckAnalysis.scala    | 12 ++++--------
 .../scala/org/apache/spark/sql/CTEInlineSuite.scala    | 18 ++++++++++++++++--
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 29d60ae0f41..f9010d47508 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -167,25 +167,21 @@ trait CheckAnalysis extends PredicateHelper with 
LookupCatalog with QueryErrorsB
     val inlineCTE = InlineCTE(alwaysInline = true)
     val cteMap = mutable.HashMap.empty[Long, (CTERelationDef, Int, 
mutable.Map[Long, Int])]
     inlineCTE.buildCTEMap(plan, cteMap)
-    cteMap.values.foreach { case (relation, _, _) =>
+    val visited: mutable.Map[Long, Boolean] = 
mutable.Map.empty.withDefaultValue(false)
+    cteMap.foreach { case (cteId, (relation, refCount, _)) =>
       // If a CTE relation is never used, it will disappear after inline. Here 
we explicitly check
       // analysis for it, to make sure the entire query plan is valid.
       try {
         // If a CTE relation ref count is 0, the other CTE relations that 
reference it
         // should also be checked by checkAnalysis0. This code will also 
guarantee the leaf
         // relations that do not reference any others are checked first.
-        val visited: mutable.Map[Long, Boolean] = 
mutable.Map.empty.withDefaultValue(false)
-        cteMap.foreach { case (cteId, _) =>
-          val (_, refCount, _) = cteMap(cteId)
-          if (refCount == 0) {
-            checkUnreferencedCTERelations(cteMap, visited, cteId)
-          }
+        if (refCount == 0) {
+          checkUnreferencedCTERelations(cteMap, visited, cteId)
         }
       } catch {
         case e: AnalysisException =>
           throw new ExtendedAnalysisException(e, relation.child)
       }
-
     }
     // Inline all CTEs in the plan to help check query plan structures in 
subqueries.
     var inlinedPlan: Option[LogicalPlan] = None
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
index 055c04992c0..a06b50d175f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
@@ -683,11 +683,25 @@ abstract class CTEInlineSuiteBase
     val e = intercept[AnalysisException](sql(
       s"""
         |with
-        |a as (select * from non_exist),
+        |a as (select * from tab_non_exists),
         |b as (select * from a)
         |select 2
         |""".stripMargin))
-    checkErrorTableNotFound(e, "`non_exist`", ExpectedContext("non_exist", 26, 
34))
+    checkErrorTableNotFound(e, "`tab_non_exists`", 
ExpectedContext("tab_non_exists", 26, 39))
+
+    withTable("tab_exists") {
+      spark.sql("CREATE TABLE tab_exists(id INT) using parquet")
+      val e = intercept[AnalysisException](sql(
+        s"""
+           |with
+           |a as (select * from tab_exists),
+           |b as (select * from a),
+           |c as (select * from tab_non_exists),
+           |d as (select * from c)
+           |select 2
+           |""".stripMargin))
+      checkErrorTableNotFound(e, "`tab_non_exists`", 
ExpectedContext("tab_non_exists", 83, 96))
+    }
   }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-45752][SQL] Simplify the code for check unreferenced CTE relations

Reply via email to