This is an automated email from the ASF dual-hosted git repository.

jonah pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new ac161bba33 fix: CTEs defined in a subquery can escape their scope 
(#10954)
ac161bba33 is described below

commit ac161bba336d098eab46f666af4664de7e8cd29f
Author: Jonah Gao <[email protected]>
AuthorDate: Tue Jun 18 13:04:26 2024 +0800

    fix: CTEs defined in a subquery can escape their scope (#10954)
    
    * fix: CTEs defined in a subquery can escape their scope
    
    * Add test
---
 datafusion/sql/src/cte.rs                  | 11 +++--------
 datafusion/sql/src/expr/subquery.rs        |  6 +++---
 datafusion/sql/src/planner.rs              | 23 ++++++++++++++--------
 datafusion/sql/src/query.rs                | 18 ++++++-----------
 datafusion/sql/src/relation/join.rs        | 31 ++++--------------------------
 datafusion/sqllogictest/test_files/cte.slt | 26 ++++++++++++++++++++++++-
 6 files changed, 56 insertions(+), 59 deletions(-)

diff --git a/datafusion/sql/src/cte.rs b/datafusion/sql/src/cte.rs
index 4f7b9bb6d1..0035dcda6e 100644
--- a/datafusion/sql/src/cte.rs
+++ b/datafusion/sql/src/cte.rs
@@ -66,10 +66,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         cte_query: Query,
         planner_context: &mut PlannerContext,
     ) -> Result<LogicalPlan> {
-        // CTE expr don't need extend outer_query_schema,
-        // so we clone a new planner_context here.
-        let mut cte_planner_context = planner_context.clone();
-        self.query_to_plan(cte_query, &mut cte_planner_context)
+        self.query_to_plan(cte_query, planner_context)
     }
 
     fn recursive_cte(
@@ -113,8 +110,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         // allow us to infer the schema to be used in the recursive term.
 
         // ---------- Step 1: Compile the static term ------------------
-        let static_plan =
-            self.set_expr_to_plan(*left_expr, &mut planner_context.clone())?;
+        let static_plan = self.set_expr_to_plan(*left_expr, planner_context)?;
 
         // Since the recursive CTEs include a component that references a
         // table with its name, like the example below:
@@ -166,8 +162,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         // this uses the named_relation we inserted above to resolve the
         // relation. This ensures that the recursive term uses the named 
relation logical plan
         // and thus the 'continuance' physical plan as its input and source
-        let recursive_plan =
-            self.set_expr_to_plan(*right_expr, &mut planner_context.clone())?;
+        let recursive_plan = self.set_expr_to_plan(*right_expr, 
planner_context)?;
 
         // Check if the recursive term references the CTE itself,
         // if not, it is a non-recursive CTE
diff --git a/datafusion/sql/src/expr/subquery.rs 
b/datafusion/sql/src/expr/subquery.rs
index d34065d92f..ff161c6ed6 100644
--- a/datafusion/sql/src/expr/subquery.rs
+++ b/datafusion/sql/src/expr/subquery.rs
@@ -33,7 +33,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         planner_context: &mut PlannerContext,
     ) -> Result<Expr> {
         let old_outer_query_schema =
-            planner_context.set_outer_query_schema(Some(input_schema.clone()));
+            
planner_context.set_outer_query_schema(Some(input_schema.clone().into()));
         let sub_plan = self.query_to_plan(subquery, planner_context)?;
         let outer_ref_columns = sub_plan.all_out_ref_exprs();
         planner_context.set_outer_query_schema(old_outer_query_schema);
@@ -55,7 +55,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         planner_context: &mut PlannerContext,
     ) -> Result<Expr> {
         let old_outer_query_schema =
-            planner_context.set_outer_query_schema(Some(input_schema.clone()));
+            
planner_context.set_outer_query_schema(Some(input_schema.clone().into()));
         let sub_plan = self.query_to_plan(subquery, planner_context)?;
         let outer_ref_columns = sub_plan.all_out_ref_exprs();
         planner_context.set_outer_query_schema(old_outer_query_schema);
@@ -77,7 +77,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         planner_context: &mut PlannerContext,
     ) -> Result<Expr> {
         let old_outer_query_schema =
-            planner_context.set_outer_query_schema(Some(input_schema.clone()));
+            
planner_context.set_outer_query_schema(Some(input_schema.clone().into()));
         let sub_plan = self.query_to_plan(subquery, planner_context)?;
         let outer_ref_columns = sub_plan.all_out_ref_exprs();
         planner_context.set_outer_query_schema(old_outer_query_schema);
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index a92e64597e..30f95170a3 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -22,7 +22,7 @@ use std::vec;
 
 use arrow_schema::*;
 use datafusion_common::{
-    field_not_found, internal_err, plan_datafusion_err, SchemaError,
+    field_not_found, internal_err, plan_datafusion_err, DFSchemaRef, 
SchemaError,
 };
 use datafusion_expr::WindowUDF;
 use sqlparser::ast::TimezoneInfo;
@@ -139,16 +139,23 @@ impl IdentNormalizer {
 /// Common Table Expression (CTE) provided with WITH clause and
 /// Parameter Data Types provided with PREPARE statement and the query schema 
of the
 /// outer query plan
+///
+/// # Cloning
+///
+/// Only the `ctes` are truly cloned when the `PlannerContext` is cloned. This 
helps resolve
+/// scoping issues of CTEs. By using cloning, a subquery can inherit CTEs from 
the outer query
+/// and can also define its own private CTEs without affecting the outer query.
+///
 #[derive(Debug, Clone)]
 pub struct PlannerContext {
     /// Data types for numbered parameters ($1, $2, etc), if supplied
     /// in `PREPARE` statement
-    prepare_param_data_types: Vec<DataType>,
+    prepare_param_data_types: Arc<Vec<DataType>>,
     /// Map of CTE name to logical plan of the WITH clause.
     /// Use `Arc<LogicalPlan>` to allow cheap cloning
     ctes: HashMap<String, Arc<LogicalPlan>>,
     /// The query schema of the outer query plan, used to resolve the columns 
in subquery
-    outer_query_schema: Option<DFSchema>,
+    outer_query_schema: Option<DFSchemaRef>,
 }
 
 impl Default for PlannerContext {
@@ -161,7 +168,7 @@ impl PlannerContext {
     /// Create an empty PlannerContext
     pub fn new() -> Self {
         Self {
-            prepare_param_data_types: vec![],
+            prepare_param_data_types: Arc::new(vec![]),
             ctes: HashMap::new(),
             outer_query_schema: None,
         }
@@ -172,21 +179,21 @@ impl PlannerContext {
         mut self,
         prepare_param_data_types: Vec<DataType>,
     ) -> Self {
-        self.prepare_param_data_types = prepare_param_data_types;
+        self.prepare_param_data_types = prepare_param_data_types.into();
         self
     }
 
     // return a reference to the outer queries schema
     pub fn outer_query_schema(&self) -> Option<&DFSchema> {
-        self.outer_query_schema.as_ref()
+        self.outer_query_schema.as_ref().map(|s| s.as_ref())
     }
 
     /// sets the outer query schema, returning the existing one, if
     /// any
     pub fn set_outer_query_schema(
         &mut self,
-        mut schema: Option<DFSchema>,
-    ) -> Option<DFSchema> {
+        mut schema: Option<DFSchemaRef>,
+    ) -> Option<DFSchemaRef> {
         std::mem::swap(&mut self.outer_query_schema, &mut schema);
         schema
     }
diff --git a/datafusion/sql/src/query.rs b/datafusion/sql/src/query.rs
index fdc739646a..cbbff19321 100644
--- a/datafusion/sql/src/query.rs
+++ b/datafusion/sql/src/query.rs
@@ -29,23 +29,17 @@ use sqlparser::ast::{
 };
 
 impl<'a, S: ContextProvider> SqlToRel<'a, S> {
-    /// Generate a logical plan from an SQL query
+    /// Generate a logical plan from an SQL query/subquery
     pub(crate) fn query_to_plan(
         &self,
         query: Query,
-        planner_context: &mut PlannerContext,
+        outer_planner_context: &mut PlannerContext,
     ) -> Result<LogicalPlan> {
-        self.query_to_plan_with_schema(query, planner_context)
-    }
+        // Each query has its own planner context, including CTEs that are 
visible within that query.
+        // It also inherits the CTEs from the outer query by cloning the outer 
planner context.
+        let mut query_plan_context = outer_planner_context.clone();
+        let planner_context = &mut query_plan_context;
 
-    /// Generate a logic plan from an SQL query.
-    /// It's implementation of `subquery_to_plan` and `query_to_plan`.
-    /// It shouldn't be invoked directly.
-    fn query_to_plan_with_schema(
-        &self,
-        query: Query,
-        planner_context: &mut PlannerContext,
-    ) -> Result<LogicalPlan> {
         if let Some(with) = query.with {
             self.plan_with_clause(with, planner_context)?;
         }
diff --git a/datafusion/sql/src/relation/join.rs 
b/datafusion/sql/src/relation/join.rs
index 262bae397c..ee2e35b550 100644
--- a/datafusion/sql/src/relation/join.rs
+++ b/datafusion/sql/src/relation/join.rs
@@ -27,34 +27,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         t: TableWithJoins,
         planner_context: &mut PlannerContext,
     ) -> Result<LogicalPlan> {
-        // From clause may exist CTEs, we should separate them from global 
CTEs.
-        // CTEs in from clause are allowed to be duplicated.
-        // Such as `select * from (WITH source AS (select 1 as e) SELECT * 
FROM source) t1, (WITH source AS (select 1 as e) SELECT * FROM source) t2;` 
which is valid.
-        // So always use original global CTEs to plan CTEs in from clause.
-        // Btw, don't need to add CTEs in from to global CTEs.
-        let origin_planner_context = planner_context.clone();
-        let left = self.create_relation(t.relation, planner_context)?;
-        match t.joins.len() {
-            0 => {
-                *planner_context = origin_planner_context;
-                Ok(left)
-            }
-            _ => {
-                let mut joins = t.joins.into_iter();
-                *planner_context = origin_planner_context.clone();
-                let mut left = self.parse_relation_join(
-                    left,
-                    joins.next().unwrap(), // length of joins > 0
-                    planner_context,
-                )?;
-                for join in joins {
-                    *planner_context = origin_planner_context.clone();
-                    left = self.parse_relation_join(left, join, 
planner_context)?;
-                }
-                *planner_context = origin_planner_context;
-                Ok(left)
-            }
+        let mut left = self.create_relation(t.relation, planner_context)?;
+        for join in t.joins.into_iter() {
+            left = self.parse_relation_join(left, join, planner_context)?;
         }
+        Ok(left)
     }
 
     fn parse_relation_join(
diff --git a/datafusion/sqllogictest/test_files/cte.slt 
b/datafusion/sqllogictest/test_files/cte.slt
index d8eaa51fc8..e9fcf07e77 100644
--- a/datafusion/sqllogictest/test_files/cte.slt
+++ b/datafusion/sqllogictest/test_files/cte.slt
@@ -834,4 +834,28 @@ SELECT * FROM non_recursive_cte, recursive_cte;
 query I
 WITH t AS (SELECT * FROM t where t.a < 2) SELECT * FROM t
 ----
-1
\ No newline at end of file
+1
+
+# Issue: https://github.com/apache/datafusion/issues/10914
+# The CTE defined within the subquery is only visible inside that subquery.
+query I rowsort
+(WITH t AS (SELECT 400) SELECT * FROM t) UNION (SELECT * FROM t);
+----
+1
+2
+3
+400
+
+query error DataFusion error: Error during planning: table 
'datafusion\.public\.cte' not found
+(WITH cte AS (SELECT 400) SELECT * FROM cte) UNION (SELECT * FROM cte);
+
+# Test duplicate CTE names in different subqueries in the FROM clause.
+query III rowsort
+SELECT * FROM 
+  (WITH t AS (select 400 as e) SELECT * FROM t) t1, 
+  (WITH t AS (select 500 as e) SELECT * FROM t) t2, 
+  t
+----
+400 500 1
+400 500 2
+400 500 3


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to