This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 570a1cfacb Planner: normalize_ident only when 
enable_ident_normalization is enabled (#5785)
570a1cfacb is described below

commit 570a1cfacbf4eeced48d405f61687428dfb30bd9
Author: Ayush Dattagupta <[email protected]>
AuthorDate: Mon Apr 10 14:26:46 2023 -0700

    Planner: normalize_ident only when enable_ident_normalization is enabled 
(#5785)
    
    * Make planner::normalize_ident public and refactor planner impl method to 
honor planner options
    
    * Undo pre-commit config change
    
    * Add tests
    
    * Add identNormalizer struct to sqlToRel and move calls to normalize_ident 
to the struct implementation
---
 datafusion/sql/src/expr/function.rs      |  3 +-
 datafusion/sql/src/expr/identifier.rs    | 16 ++++------
 datafusion/sql/src/expr/mod.rs           |  3 +-
 datafusion/sql/src/planner.rs            | 51 ++++++++++++++++++++++----------
 datafusion/sql/src/query.rs              |  4 +--
 datafusion/sql/src/relation/join.rs      |  3 +-
 datafusion/sql/src/select.rs             |  6 ++--
 datafusion/sql/src/statement.rs          |  4 ++-
 datafusion/sql/tests/integration_test.rs | 16 ++++++++++
 9 files changed, 68 insertions(+), 38 deletions(-)

diff --git a/datafusion/sql/src/expr/function.rs 
b/datafusion/sql/src/expr/function.rs
index e5af0eb269..bf076a2f30 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -16,7 +16,6 @@
 // under the License.
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use crate::utils::normalize_ident;
 use datafusion_common::{DFSchema, DataFusionError, Result};
 use datafusion_expr::utils::COUNT_STAR_EXPANSION;
 use datafusion_expr::window_frame::regularize;
@@ -43,7 +42,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             // (e.g. "foo.bar") for function names yet
             function.name.to_string()
         } else {
-            normalize_ident(function.name.0[0].clone())
+            self.normalizer.normalize(function.name.0[0].clone())
         };
 
         // next, scalar built-in
diff --git a/datafusion/sql/src/expr/identifier.rs 
b/datafusion/sql/src/expr/identifier.rs
index fa58e9b96c..fdf3d0f20b 100644
--- a/datafusion/sql/src/expr/identifier.rs
+++ b/datafusion/sql/src/expr/identifier.rs
@@ -16,7 +16,6 @@
 // under the License.
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use crate::utils::normalize_ident;
 use datafusion_common::{
     Column, DFField, DFSchema, DataFusionError, Result, ScalarValue, 
TableReference,
 };
@@ -47,7 +46,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             // interpret names with '.' as if they were
             // compound identifiers, but this is not a compound
             // identifier. (e.g. it is "foo.bar" not foo.bar)
-            let normalize_ident = normalize_ident(id);
+            let normalize_ident = self.normalizer.normalize(id);
             match schema.field_with_unqualified_name(normalize_ident.as_str()) 
{
                 Ok(_) => {
                     // found a match without a qualified name, this is a inner 
table column
@@ -97,7 +96,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         }
 
         if ids[0].value.starts_with('@') {
-            let var_names: Vec<_> = 
ids.into_iter().map(normalize_ident).collect();
+            let var_names: Vec<_> = ids
+                .into_iter()
+                .map(|id| self.normalizer.normalize(id))
+                .collect();
             let ty = self
                 .schema_provider
                 .get_variable_type(&var_names)
@@ -110,13 +112,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
         } else {
             let ids = ids
                 .into_iter()
-                .map(|id| {
-                    if self.options.enable_ident_normalization {
-                        normalize_ident(id)
-                    } else {
-                        id.value
-                    }
-                })
+                .map(|id| self.normalizer.normalize(id))
                 .collect::<Vec<_>>();
 
             // Currently not supporting more than one nested level
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index ed1e273a3a..4a8719331a 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -27,7 +27,6 @@ mod unary_op;
 mod value;
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use crate::utils::normalize_ident;
 use arrow_schema::DataType;
 use datafusion_common::tree_node::{Transformed, TreeNode};
 use datafusion_common::{Column, DFSchema, DataFusionError, Result, 
ScalarValue};
@@ -148,7 +147,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
             SQLExpr::MapAccess { column, keys } => {
                 if let SQLExpr::Identifier(id) = *column {
-                    plan_indexed(col(normalize_ident(id)), keys)
+                    plan_indexed(col(self.normalizer.normalize(id)), keys)
                 } else {
                     Err(DataFusionError::NotImplemented(format!(
                         "map access requires an identifier, found column 
{column} instead"
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index ac6ce37256..3673742d5b 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -69,6 +69,32 @@ impl Default for ParserOptions {
     }
 }
 
+/// Ident Normalizer
+#[derive(Debug)]
+pub struct IdentNormalizer {
+    normalize: bool,
+}
+
+impl Default for IdentNormalizer {
+    fn default() -> Self {
+        Self { normalize: true }
+    }
+}
+
+impl IdentNormalizer {
+    pub fn new(normalize: bool) -> Self {
+        Self { normalize }
+    }
+
+    pub fn normalize(&self, ident: Ident) -> String {
+        if self.normalize {
+            crate::utils::normalize_ident(ident)
+        } else {
+            ident.value
+        }
+    }
+}
+
 /// Struct to store the states used by the Planner. The Planner will leverage 
the states to resolve
 /// CTEs, Views, subqueries and PREPARE statements. The states include
 /// Common Table Expression (CTE) provided with WITH clause and
@@ -155,6 +181,7 @@ impl PlannerContext {
 pub struct SqlToRel<'a, S: ContextProvider> {
     pub(crate) schema_provider: &'a S,
     pub(crate) options: ParserOptions,
+    pub(crate) normalizer: IdentNormalizer,
 }
 
 impl<'a, S: ContextProvider> SqlToRel<'a, S> {
@@ -165,9 +192,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
     /// Create a new query planner
     pub fn new_with_options(schema_provider: &'a S, options: ParserOptions) -> 
Self {
+        let normalize = options.enable_ident_normalization;
         SqlToRel {
             schema_provider,
             options,
+            normalizer: IdentNormalizer::new(normalize),
         }
     }
 
@@ -181,7 +210,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 .iter()
                 .any(|x| x.option == ColumnOption::NotNull);
             fields.push(Field::new(
-                normalize_ident(column.name, 
self.options.enable_ident_normalization),
+                self.normalizer.normalize(column.name),
                 data_type,
                 !not_nullable,
             ));
@@ -198,7 +227,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
     ) -> Result<LogicalPlan> {
         let apply_name_plan = 
LogicalPlan::SubqueryAlias(SubqueryAlias::try_new(
             plan,
-            normalize_ident(alias.name, 
self.options.enable_ident_normalization),
+            self.normalizer.normalize(alias.name),
         )?);
 
         self.apply_expr_alias(apply_name_plan, alias.columns)
@@ -221,10 +250,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             let fields = plan.schema().fields().clone();
             LogicalPlanBuilder::from(plan)
                 .project(fields.iter().zip(idents.into_iter()).map(|(field, 
ident)| {
-                    col(field.name()).alias(normalize_ident(
-                        ident,
-                        self.options.enable_ident_normalization,
-                    ))
+                    col(field.name()).alias(self.normalizer.normalize(ident))
                 }))?
                 .build()
         }
@@ -411,7 +437,7 @@ pub(crate) fn idents_to_table_reference(
     impl IdentTaker {
         fn take(&mut self, enable_normalization: bool) -> String {
             let ident = self.0.pop().expect("no more identifiers");
-            normalize_ident(ident, enable_normalization)
+            IdentNormalizer::new(enable_normalization).normalize(ident)
         }
     }
 
@@ -447,6 +473,7 @@ pub fn object_name_to_qualifier(
     enable_normalization: bool,
 ) -> String {
     let columns = vec!["table_name", "table_schema", 
"table_catalog"].into_iter();
+    let normalizer = IdentNormalizer::new(enable_normalization);
     sql_table_name
         .0
         .iter()
@@ -456,17 +483,9 @@ pub fn object_name_to_qualifier(
             format!(
                 r#"{} = '{}'"#,
                 column_name,
-                normalize_ident(ident.clone(), enable_normalization)
+                normalizer.normalize(ident.clone())
             )
         })
         .collect::<Vec<_>>()
         .join(" AND ")
 }
-
-fn normalize_ident(id: Ident, enable_normalization: bool) -> String {
-    if enable_normalization {
-        return crate::utils::normalize_ident(id);
-    }
-
-    id.value
-}
diff --git a/datafusion/sql/src/query.rs b/datafusion/sql/src/query.rs
index e25b3f578b..ea1f72e76c 100644
--- a/datafusion/sql/src/query.rs
+++ b/datafusion/sql/src/query.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use crate::utils::normalize_ident;
+
 use datafusion_common::{DataFusionError, Result, ScalarValue};
 use datafusion_expr::{Expr, LogicalPlan, LogicalPlanBuilder};
 use sqlparser::ast::{Expr as SQLExpr, Offset as SQLOffset, OrderByExpr, Query, 
Value};
@@ -53,7 +53,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
 
             for cte in with.cte_tables {
                 // A `WITH` block can't use the same name more than once
-                let cte_name = normalize_ident(cte.alias.name.clone());
+                let cte_name = 
self.normalizer.normalize(cte.alias.name.clone());
                 if planner_context.contains_cte(&cte_name) {
                     return Err(DataFusionError::SQL(ParserError(format!(
                         "WITH query name {cte_name:?} specified more than once"
diff --git a/datafusion/sql/src/relation/join.rs 
b/datafusion/sql/src/relation/join.rs
index 5911941362..eedae28385 100644
--- a/datafusion/sql/src/relation/join.rs
+++ b/datafusion/sql/src/relation/join.rs
@@ -16,7 +16,6 @@
 // under the License.
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use crate::utils::normalize_ident;
 use datafusion_common::{Column, DataFusionError, Result};
 use datafusion_expr::{JoinType, LogicalPlan, LogicalPlanBuilder};
 use sqlparser::ast::{Join, JoinConstraint, JoinOperator, TableWithJoins};
@@ -146,7 +145,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             JoinConstraint::Using(idents) => {
                 let keys: Vec<Column> = idents
                     .into_iter()
-                    .map(|x| Column::from_name(normalize_ident(x)))
+                    .map(|x| Column::from_name(self.normalizer.normalize(x)))
                     .collect();
                 LogicalPlanBuilder::from(left)
                     .join_using(right, join_type, keys)?
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index 03f96d4009..a723c7263f 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -17,8 +17,8 @@
 
 use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
 use crate::utils::{
-    check_columns_satisfy_exprs, extract_aliases, normalize_ident, rebase_expr,
-    resolve_aliases_to_exprs, resolve_columns, resolve_positions_to_exprs,
+    check_columns_satisfy_exprs, extract_aliases, rebase_expr, 
resolve_aliases_to_exprs,
+    resolve_columns, resolve_positions_to_exprs,
 };
 use datafusion_common::{DataFusionError, Result};
 use datafusion_expr::expr_rewriter::{
@@ -330,7 +330,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                     &[&[plan.schema()]],
                     &plan.using_columns()?,
                 )?;
-                let expr = Alias(Box::new(col), normalize_ident(alias));
+                let expr = Alias(Box::new(col), 
self.normalizer.normalize(alias));
                 Ok(vec![expr])
             }
             SelectItem::Wildcard(options) => {
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index ef1ecfef58..bec9888a8c 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -924,7 +924,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
                 .iter()
                 .map(|c| {
                     Ok(table_schema
-                        
.field_with_unqualified_name(&normalize_ident(c.clone()))?
+                        .field_with_unqualified_name(
+                            &self.normalizer.normalize(c.clone()),
+                        )?
                         .clone())
                 })
                 .collect::<Result<Vec<DFField>>>()?;
diff --git a/datafusion/sql/tests/integration_test.rs 
b/datafusion/sql/tests/integration_test.rs
index 99a6a90aa0..181e35a466 100644
--- a/datafusion/sql/tests/integration_test.rs
+++ b/datafusion/sql/tests/integration_test.rs
@@ -89,6 +89,18 @@ fn parse_ident_normalization() {
             "Err(Plan(\"No table named: PERSON found\"))",
             false,
         ),
+        (
+            "SELECT Id FROM UPPERCASE_test",
+            "Ok(Projection: UPPERCASE_test.Id\
+                \n  TableScan: UPPERCASE_test)",
+            false,
+        ),
+        (
+            "SELECT \"Id\", lower FROM \"UPPERCASE_test\"",
+            "Ok(Projection: UPPERCASE_test.Id, UPPERCASE_test.lower\
+                \n  TableScan: UPPERCASE_test)",
+            true,
+        ),
     ];
 
     for (sql, expected, enable_ident_normalization) in test_data {
@@ -2635,6 +2647,10 @@ impl ContextProvider for MockContextProvider {
                 Field::new("c12", DataType::Float64, false),
                 Field::new("c13", DataType::Utf8, false),
             ])),
+            "UPPERCASE_test" => Ok(Schema::new(vec![
+                Field::new("Id", DataType::UInt32, false),
+                Field::new("lower", DataType::UInt32, false),
+            ])),
             _ => Err(DataFusionError::Plan(format!(
                 "No table named: {} found",
                 name.table()

Reply via email to