This is an automated email from the ASF dual-hosted git repository.

comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 0a2f09ffc0 Add a hint about normalization in error message (#14089) 
(#14113)
0a2f09ffc0 is described below

commit 0a2f09ffc01d10083fd27f2c33e33730410fdcef
Author: Sergey Zhukov <62326549+cj-zhu...@users.noreply.github.com>
AuthorDate: Tue Jan 14 19:33:24 2025 +0300

    Add a hint about normalization in error message (#14089) (#14113)
    
    * Add a hint about normalization in error message (#14089)
    
    * normalization suggestion is only shown when a column name matches schema
    
    ---------
    
    Co-authored-by: Sergey Zhukov <szhu...@aligntech.com>
---
 datafusion/common/src/column.rs                    |  3 ++-
 datafusion/common/src/dfschema.rs                  | 23 ++++++++++--------
 datafusion/common/src/error.rs                     | 12 ++++++++++
 datafusion/expr/src/expr_rewriter/mod.rs           |  7 +++---
 datafusion/sql/tests/sql_integration.rs            | 27 ++++++++++------------
 datafusion/sqllogictest/test_files/identifiers.slt |  8 +++----
 datafusion/sqllogictest/test_files/select.slt      |  4 ++--
 7 files changed, 48 insertions(+), 36 deletions(-)

diff --git a/datafusion/common/src/column.rs b/datafusion/common/src/column.rs
index 4f25260d5e..fdde3d69ed 100644
--- a/datafusion/common/src/column.rs
+++ b/datafusion/common/src/column.rs
@@ -368,7 +368,8 @@ mod tests {
                 &[],
             )
             .expect_err("should've failed to find field");
-        let expected = r#"Schema error: No field named z. Valid fields are 
t1.a, t1.b, t2.c, t2.d, t3.a, t3.b, t3.c, t3.d, t3.e."#;
+        let expected = "Schema error: No field named z. \
+            Valid fields are t1.a, t1.b, t2.c, t2.d, t3.a, t3.b, t3.c, t3.d, 
t3.e.";
         assert_eq!(err.strip_backtrace(), expected);
 
         // ambiguous column reference
diff --git a/datafusion/common/src/dfschema.rs 
b/datafusion/common/src/dfschema.rs
index ac4d8be804..302d515e02 100644
--- a/datafusion/common/src/dfschema.rs
+++ b/datafusion/common/src/dfschema.rs
@@ -1068,10 +1068,12 @@ mod tests {
         let schema = DFSchema::try_from_qualified_schema("t1", 
&test_schema_1())?;
         // lookup with unqualified name "t1.c0"
         let err = schema.index_of_column(&col).unwrap_err();
-        assert_eq!(
-            err.strip_backtrace(),
-            "Schema error: No field named \"t1.c0\". Valid fields are t1.c0, 
t1.c1."
-        );
+        let expected = "Schema error: No field named \"t1.c0\". \
+            Column names are case sensitive. \
+            You can use double quotes to refer to the \"\"t1.c0\"\" column \
+            or set the datafusion.sql_parser.enable_ident_normalization 
configuration. \
+            Valid fields are t1.c0, t1.c1.";
+        assert_eq!(err.strip_backtrace(), expected);
         Ok(())
     }
 
@@ -1088,10 +1090,9 @@ mod tests {
 
         // lookup with unqualified name "t1.c0"
         let err = schema.index_of_column(&col).unwrap_err();
-        assert_eq!(
-            err.strip_backtrace(),
-            "Schema error: No field named \"t1.c0\". Valid fields are 
t1.\"CapitalColumn\", t1.\"field.with.period\"."
-        );
+        let expected = "Schema error: No field named \"t1.c0\". \
+            Valid fields are t1.\"CapitalColumn\", t1.\"field.with.period\".";
+        assert_eq!(err.strip_backtrace(), expected);
         Ok(())
     }
 
@@ -1261,12 +1262,14 @@ mod tests {
 
         let col = Column::from_qualified_name("t1.c0");
         let err = schema.index_of_column(&col).unwrap_err();
-        assert_eq!(err.strip_backtrace(), "Schema error: No field named 
t1.c0.");
+        let expected = "Schema error: No field named t1.c0.";
+        assert_eq!(err.strip_backtrace(), expected);
 
         // the same check without qualifier
         let col = Column::from_name("c0");
         let err = schema.index_of_column(&col).err().unwrap();
-        assert_eq!(err.strip_backtrace(), "Schema error: No field named c0.");
+        let expected = "Schema error: No field named c0.";
+        assert_eq!(err.strip_backtrace(), expected);
     }
 
     #[test]
diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs
index 1012c4cd22..f7c247aaf2 100644
--- a/datafusion/common/src/error.rs
+++ b/datafusion/common/src/error.rs
@@ -167,6 +167,18 @@ impl Display for SchemaError {
                 valid_fields,
             } => {
                 write!(f, "No field named {}", field.quoted_flat_name())?;
+                let lower_valid_fields = valid_fields
+                    .iter()
+                    .map(|column| column.flat_name().to_lowercase())
+                    .collect::<Vec<String>>();
+                if 
lower_valid_fields.contains(&field.flat_name().to_lowercase()) {
+                    write!(
+                        f,
+                        ". Column names are case sensitive. You can use double 
quotes to refer to the \"{}\" column \
+                        or set the 
datafusion.sql_parser.enable_ident_normalization configuration",
+                        field.quoted_flat_name()
+                    )?;
+                }
                 if !valid_fields.is_empty() {
                     write!(
                         f,
diff --git a/datafusion/expr/src/expr_rewriter/mod.rs 
b/datafusion/expr/src/expr_rewriter/mod.rs
index b944428977..335d5587be 100644
--- a/datafusion/expr/src/expr_rewriter/mod.rs
+++ b/datafusion/expr/src/expr_rewriter/mod.rs
@@ -462,10 +462,9 @@ mod test {
             normalize_col_with_schemas_and_ambiguity_check(expr, &[&schemas], 
&[])
                 .unwrap_err()
                 .strip_backtrace();
-        assert_eq!(
-            error,
-            r#"Schema error: No field named b. Valid fields are "tableA".a."#
-        );
+        let expected = "Schema error: No field named b. \
+            Valid fields are \"tableA\".a.";
+        assert_eq!(error, expected);
     }
 
     #[test]
diff --git a/datafusion/sql/tests/sql_integration.rs 
b/datafusion/sql/tests/sql_integration.rs
index b93060988d..5edafdd405 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -492,7 +492,8 @@ Dml: op=[Insert Into] table=[test_decimal]
 )]
 #[case::non_existing_column(
     "INSERT INTO test_decimal (nonexistent, price) VALUES (1, 2), (4, 5)",
-    "Schema error: No field named nonexistent. Valid fields are id, price."
+    "Schema error: No field named nonexistent. \
+    Valid fields are id, price."
 )]
 #[case::target_column_count_mismatch(
     "INSERT INTO person (id, first_name, last_name) VALUES ($1, $2)",
@@ -1358,9 +1359,11 @@ fn 
select_simple_aggregate_with_groupby_column_unselected() {
 fn 
select_simple_aggregate_with_groupby_and_column_in_group_by_does_not_exist() {
     let sql = "SELECT sum(age) FROM person GROUP BY doesnotexist";
     let err = logical_plan(sql).expect_err("query should have failed");
-    assert_eq!("Schema error: No field named doesnotexist. Valid fields are 
\"sum(person.age)\", \
+    let expected = "Schema error: No field named doesnotexist. \
+        Valid fields are \"sum(person.age)\", \
         person.id, person.first_name, person.last_name, person.age, 
person.state, \
-        person.salary, person.birth_date, person.\"😀\".", 
err.strip_backtrace());
+        person.salary, person.birth_date, person.\"😀\".";
+    assert_eq!(err.strip_backtrace(), expected);
 }
 
 #[test]
@@ -3636,10 +3639,8 @@ fn 
test_prepare_statement_to_plan_panic_prepare_wrong_syntax() {
 #[test]
 fn test_prepare_statement_to_plan_panic_no_relation_and_constant_param() {
     let sql = "PREPARE my_plan(INT) AS SELECT id + $1";
-    assert_eq!(
-        logical_plan(sql).unwrap_err().strip_backtrace(),
-        "Schema error: No field named id."
-    )
+    let expected = "Schema error: No field named id.";
+    assert_eq!(logical_plan(sql).unwrap_err().strip_backtrace(), expected);
 }
 
 #[test]
@@ -4334,18 +4335,14 @@ fn test_multi_grouping_sets() {
 fn test_field_not_found_window_function() {
     let order_by_sql = "SELECT count() OVER (order by a);";
     let order_by_err = logical_plan(order_by_sql).expect_err("query should 
have failed");
-    assert_eq!(
-        "Schema error: No field named a.",
-        order_by_err.strip_backtrace()
-    );
+    let expected = "Schema error: No field named a.";
+    assert_eq!(order_by_err.strip_backtrace(), expected);
 
     let partition_by_sql = "SELECT count() OVER (PARTITION BY a);";
     let partition_by_err =
         logical_plan(partition_by_sql).expect_err("query should have failed");
-    assert_eq!(
-        "Schema error: No field named a.",
-        partition_by_err.strip_backtrace()
-    );
+    let expected = "Schema error: No field named a.";
+    assert_eq!(partition_by_err.strip_backtrace(), expected);
 
     let qualified_sql =
         "SELECT order_id, MAX(qty) OVER (PARTITION BY orders.order_id) from 
orders";
diff --git a/datafusion/sqllogictest/test_files/identifiers.slt 
b/datafusion/sqllogictest/test_files/identifiers.slt
index 755d617e7a..f1b2777bec 100644
--- a/datafusion/sqllogictest/test_files/identifiers.slt
+++ b/datafusion/sqllogictest/test_files/identifiers.slt
@@ -90,16 +90,16 @@ drop table  case_insensitive_test
 statement ok
 CREATE TABLE test("Column1" string) AS VALUES ('content1');
 
-statement error DataFusion error: Schema error: No field named column1\. Valid 
fields are test\."Column1"\.
+statement error DataFusion error: Schema error: No field named column1. Valid 
fields are test\."Column1"\.
 SELECT COLumn1 from test
 
-statement error DataFusion error: Schema error: No field named column1\. Valid 
fields are test\."Column1"\.
+statement error DataFusion error: Schema error: No field named column1. Valid 
fields are test\."Column1"\.
 SELECT Column1 from test
 
-statement error DataFusion error: Schema error: No field named column1\. Valid 
fields are test\."Column1"\.
+statement error DataFusion error: Schema error: No field named column1. Valid 
fields are test\."Column1"\.
 SELECT column1 from test
 
-statement error DataFusion error: Schema error: No field named column1\. Valid 
fields are test\."Column1"\.
+statement error DataFusion error: Schema error: No field named column1. Valid 
fields are test\."Column1"\.
 SELECT "column1" from test
 
 statement ok
diff --git a/datafusion/sqllogictest/test_files/select.slt 
b/datafusion/sqllogictest/test_files/select.slt
index a127463c2b..dce685f5c1 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -1799,7 +1799,7 @@ select a + b from (select 1 as a, 2 as b, 1 as "a + b");
 3
 
 # Can't reference an output column by expression over projection.
-query error DataFusion error: Schema error: No field named a\. Valid fields 
are "a \+ Int64\(1\)"\.
+query error DataFusion error: Schema error: No field named a. Valid fields are 
"a \+ Int64\(1\)"\.
 select a + 1 from (select a+1 from (select 1 as a));
 
 query I
@@ -1834,5 +1834,5 @@ statement ok
 DROP TABLE test;
 
 # Can't reference an unqualified column by a qualified name
-query error DataFusion error: Schema error: No field named t1\.v1\. Valid 
fields are "t1\.v1"\.
+query error DataFusion error: Schema error: No field named t1.v1. Column names 
are case sensitive. You can use double quotes to refer to the "t1.v1" column or 
set the datafusion.sql_parser.enable_ident_normalization configuration. Valid 
fields are "t1.v1".
 SELECT t1.v1 FROM (SELECT 1 AS "t1.v1");


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

Reply via email to