This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 570a1cfacb Planner: normalize_ident only when
enable_ident_normalization is enabled (#5785)
570a1cfacb is described below
commit 570a1cfacbf4eeced48d405f61687428dfb30bd9
Author: Ayush Dattagupta <[email protected]>
AuthorDate: Mon Apr 10 14:26:46 2023 -0700
Planner: normalize_ident only when enable_ident_normalization is enabled
(#5785)
* Make planner::normalize_ident public and refactor planner impl method to
honor planner options
* Undo pre-commit config change
* Add tests
* Add identNormalizer struct to sqlToRel and move calls to normalize_ident
to the struct implementation
---
datafusion/sql/src/expr/function.rs | 3 +-
datafusion/sql/src/expr/identifier.rs | 16 ++++------
datafusion/sql/src/expr/mod.rs | 3 +-
datafusion/sql/src/planner.rs | 51 ++++++++++++++++++++++----------
datafusion/sql/src/query.rs | 4 +--
datafusion/sql/src/relation/join.rs | 3 +-
datafusion/sql/src/select.rs | 6 ++--
datafusion/sql/src/statement.rs | 4 ++-
datafusion/sql/tests/integration_test.rs | 16 ++++++++++
9 files changed, 68 insertions(+), 38 deletions(-)
diff --git a/datafusion/sql/src/expr/function.rs
b/datafusion/sql/src/expr/function.rs
index e5af0eb269..bf076a2f30 100644
--- a/datafusion/sql/src/expr/function.rs
+++ b/datafusion/sql/src/expr/function.rs
@@ -16,7 +16,6 @@
// under the License.
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use crate::utils::normalize_ident;
use datafusion_common::{DFSchema, DataFusionError, Result};
use datafusion_expr::utils::COUNT_STAR_EXPANSION;
use datafusion_expr::window_frame::regularize;
@@ -43,7 +42,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
// (e.g. "foo.bar") for function names yet
function.name.to_string()
} else {
- normalize_ident(function.name.0[0].clone())
+ self.normalizer.normalize(function.name.0[0].clone())
};
// next, scalar built-in
diff --git a/datafusion/sql/src/expr/identifier.rs
b/datafusion/sql/src/expr/identifier.rs
index fa58e9b96c..fdf3d0f20b 100644
--- a/datafusion/sql/src/expr/identifier.rs
+++ b/datafusion/sql/src/expr/identifier.rs
@@ -16,7 +16,6 @@
// under the License.
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use crate::utils::normalize_ident;
use datafusion_common::{
Column, DFField, DFSchema, DataFusionError, Result, ScalarValue,
TableReference,
};
@@ -47,7 +46,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
// interpret names with '.' as if they were
// compound identifiers, but this is not a compound
// identifier. (e.g. it is "foo.bar" not foo.bar)
- let normalize_ident = normalize_ident(id);
+ let normalize_ident = self.normalizer.normalize(id);
match schema.field_with_unqualified_name(normalize_ident.as_str())
{
Ok(_) => {
// found a match without a qualified name, this is a inner
table column
@@ -97,7 +96,10 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
}
if ids[0].value.starts_with('@') {
- let var_names: Vec<_> =
ids.into_iter().map(normalize_ident).collect();
+ let var_names: Vec<_> = ids
+ .into_iter()
+ .map(|id| self.normalizer.normalize(id))
+ .collect();
let ty = self
.schema_provider
.get_variable_type(&var_names)
@@ -110,13 +112,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
} else {
let ids = ids
.into_iter()
- .map(|id| {
- if self.options.enable_ident_normalization {
- normalize_ident(id)
- } else {
- id.value
- }
- })
+ .map(|id| self.normalizer.normalize(id))
.collect::<Vec<_>>();
// Currently not supporting more than one nested level
diff --git a/datafusion/sql/src/expr/mod.rs b/datafusion/sql/src/expr/mod.rs
index ed1e273a3a..4a8719331a 100644
--- a/datafusion/sql/src/expr/mod.rs
+++ b/datafusion/sql/src/expr/mod.rs
@@ -27,7 +27,6 @@ mod unary_op;
mod value;
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use crate::utils::normalize_ident;
use arrow_schema::DataType;
use datafusion_common::tree_node::{Transformed, TreeNode};
use datafusion_common::{Column, DFSchema, DataFusionError, Result,
ScalarValue};
@@ -148,7 +147,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
SQLExpr::MapAccess { column, keys } => {
if let SQLExpr::Identifier(id) = *column {
- plan_indexed(col(normalize_ident(id)), keys)
+ plan_indexed(col(self.normalizer.normalize(id)), keys)
} else {
Err(DataFusionError::NotImplemented(format!(
"map access requires an identifier, found column
{column} instead"
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index ac6ce37256..3673742d5b 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -69,6 +69,32 @@ impl Default for ParserOptions {
}
}
+/// Ident Normalizer
+#[derive(Debug)]
+pub struct IdentNormalizer {
+ normalize: bool,
+}
+
+impl Default for IdentNormalizer {
+ fn default() -> Self {
+ Self { normalize: true }
+ }
+}
+
+impl IdentNormalizer {
+ pub fn new(normalize: bool) -> Self {
+ Self { normalize }
+ }
+
+ pub fn normalize(&self, ident: Ident) -> String {
+ if self.normalize {
+ crate::utils::normalize_ident(ident)
+ } else {
+ ident.value
+ }
+ }
+}
+
/// Struct to store the states used by the Planner. The Planner will leverage
the states to resolve
/// CTEs, Views, subqueries and PREPARE statements. The states include
/// Common Table Expression (CTE) provided with WITH clause and
@@ -155,6 +181,7 @@ impl PlannerContext {
pub struct SqlToRel<'a, S: ContextProvider> {
pub(crate) schema_provider: &'a S,
pub(crate) options: ParserOptions,
+ pub(crate) normalizer: IdentNormalizer,
}
impl<'a, S: ContextProvider> SqlToRel<'a, S> {
@@ -165,9 +192,11 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
/// Create a new query planner
pub fn new_with_options(schema_provider: &'a S, options: ParserOptions) ->
Self {
+ let normalize = options.enable_ident_normalization;
SqlToRel {
schema_provider,
options,
+ normalizer: IdentNormalizer::new(normalize),
}
}
@@ -181,7 +210,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
.iter()
.any(|x| x.option == ColumnOption::NotNull);
fields.push(Field::new(
- normalize_ident(column.name,
self.options.enable_ident_normalization),
+ self.normalizer.normalize(column.name),
data_type,
!not_nullable,
));
@@ -198,7 +227,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
) -> Result<LogicalPlan> {
let apply_name_plan =
LogicalPlan::SubqueryAlias(SubqueryAlias::try_new(
plan,
- normalize_ident(alias.name,
self.options.enable_ident_normalization),
+ self.normalizer.normalize(alias.name),
)?);
self.apply_expr_alias(apply_name_plan, alias.columns)
@@ -221,10 +250,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
let fields = plan.schema().fields().clone();
LogicalPlanBuilder::from(plan)
.project(fields.iter().zip(idents.into_iter()).map(|(field,
ident)| {
- col(field.name()).alias(normalize_ident(
- ident,
- self.options.enable_ident_normalization,
- ))
+ col(field.name()).alias(self.normalizer.normalize(ident))
}))?
.build()
}
@@ -411,7 +437,7 @@ pub(crate) fn idents_to_table_reference(
impl IdentTaker {
fn take(&mut self, enable_normalization: bool) -> String {
let ident = self.0.pop().expect("no more identifiers");
- normalize_ident(ident, enable_normalization)
+ IdentNormalizer::new(enable_normalization).normalize(ident)
}
}
@@ -447,6 +473,7 @@ pub fn object_name_to_qualifier(
enable_normalization: bool,
) -> String {
let columns = vec!["table_name", "table_schema",
"table_catalog"].into_iter();
+ let normalizer = IdentNormalizer::new(enable_normalization);
sql_table_name
.0
.iter()
@@ -456,17 +483,9 @@ pub fn object_name_to_qualifier(
format!(
r#"{} = '{}'"#,
column_name,
- normalize_ident(ident.clone(), enable_normalization)
+ normalizer.normalize(ident.clone())
)
})
.collect::<Vec<_>>()
.join(" AND ")
}
-
-fn normalize_ident(id: Ident, enable_normalization: bool) -> String {
- if enable_normalization {
- return crate::utils::normalize_ident(id);
- }
-
- id.value
-}
diff --git a/datafusion/sql/src/query.rs b/datafusion/sql/src/query.rs
index e25b3f578b..ea1f72e76c 100644
--- a/datafusion/sql/src/query.rs
+++ b/datafusion/sql/src/query.rs
@@ -16,7 +16,7 @@
// under the License.
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use crate::utils::normalize_ident;
+
use datafusion_common::{DataFusionError, Result, ScalarValue};
use datafusion_expr::{Expr, LogicalPlan, LogicalPlanBuilder};
use sqlparser::ast::{Expr as SQLExpr, Offset as SQLOffset, OrderByExpr, Query,
Value};
@@ -53,7 +53,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
for cte in with.cte_tables {
// A `WITH` block can't use the same name more than once
- let cte_name = normalize_ident(cte.alias.name.clone());
+ let cte_name =
self.normalizer.normalize(cte.alias.name.clone());
if planner_context.contains_cte(&cte_name) {
return Err(DataFusionError::SQL(ParserError(format!(
"WITH query name {cte_name:?} specified more than once"
diff --git a/datafusion/sql/src/relation/join.rs
b/datafusion/sql/src/relation/join.rs
index 5911941362..eedae28385 100644
--- a/datafusion/sql/src/relation/join.rs
+++ b/datafusion/sql/src/relation/join.rs
@@ -16,7 +16,6 @@
// under the License.
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
-use crate::utils::normalize_ident;
use datafusion_common::{Column, DataFusionError, Result};
use datafusion_expr::{JoinType, LogicalPlan, LogicalPlanBuilder};
use sqlparser::ast::{Join, JoinConstraint, JoinOperator, TableWithJoins};
@@ -146,7 +145,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
JoinConstraint::Using(idents) => {
let keys: Vec<Column> = idents
.into_iter()
- .map(|x| Column::from_name(normalize_ident(x)))
+ .map(|x| Column::from_name(self.normalizer.normalize(x)))
.collect();
LogicalPlanBuilder::from(left)
.join_using(right, join_type, keys)?
diff --git a/datafusion/sql/src/select.rs b/datafusion/sql/src/select.rs
index 03f96d4009..a723c7263f 100644
--- a/datafusion/sql/src/select.rs
+++ b/datafusion/sql/src/select.rs
@@ -17,8 +17,8 @@
use crate::planner::{ContextProvider, PlannerContext, SqlToRel};
use crate::utils::{
- check_columns_satisfy_exprs, extract_aliases, normalize_ident, rebase_expr,
- resolve_aliases_to_exprs, resolve_columns, resolve_positions_to_exprs,
+ check_columns_satisfy_exprs, extract_aliases, rebase_expr,
resolve_aliases_to_exprs,
+ resolve_columns, resolve_positions_to_exprs,
};
use datafusion_common::{DataFusionError, Result};
use datafusion_expr::expr_rewriter::{
@@ -330,7 +330,7 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
&[&[plan.schema()]],
&plan.using_columns()?,
)?;
- let expr = Alias(Box::new(col), normalize_ident(alias));
+ let expr = Alias(Box::new(col),
self.normalizer.normalize(alias));
Ok(vec![expr])
}
SelectItem::Wildcard(options) => {
diff --git a/datafusion/sql/src/statement.rs b/datafusion/sql/src/statement.rs
index ef1ecfef58..bec9888a8c 100644
--- a/datafusion/sql/src/statement.rs
+++ b/datafusion/sql/src/statement.rs
@@ -924,7 +924,9 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
.iter()
.map(|c| {
Ok(table_schema
-
.field_with_unqualified_name(&normalize_ident(c.clone()))?
+ .field_with_unqualified_name(
+ &self.normalizer.normalize(c.clone()),
+ )?
.clone())
})
.collect::<Result<Vec<DFField>>>()?;
diff --git a/datafusion/sql/tests/integration_test.rs
b/datafusion/sql/tests/integration_test.rs
index 99a6a90aa0..181e35a466 100644
--- a/datafusion/sql/tests/integration_test.rs
+++ b/datafusion/sql/tests/integration_test.rs
@@ -89,6 +89,18 @@ fn parse_ident_normalization() {
"Err(Plan(\"No table named: PERSON found\"))",
false,
),
+ (
+ "SELECT Id FROM UPPERCASE_test",
+ "Ok(Projection: UPPERCASE_test.Id\
+ \n TableScan: UPPERCASE_test)",
+ false,
+ ),
+ (
+ "SELECT \"Id\", lower FROM \"UPPERCASE_test\"",
+ "Ok(Projection: UPPERCASE_test.Id, UPPERCASE_test.lower\
+ \n TableScan: UPPERCASE_test)",
+ true,
+ ),
];
for (sql, expected, enable_ident_normalization) in test_data {
@@ -2635,6 +2647,10 @@ impl ContextProvider for MockContextProvider {
Field::new("c12", DataType::Float64, false),
Field::new("c13", DataType::Utf8, false),
])),
+ "UPPERCASE_test" => Ok(Schema::new(vec![
+ Field::new("Id", DataType::UInt32, false),
+ Field::new("lower", DataType::UInt32, false),
+ ])),
_ => Err(DataFusionError::Plan(format!(
"No table named: {} found",
name.table()