This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new ca9c322ab3 adding config to control Varchar behavior (#11090)
ca9c322ab3 is described below

commit ca9c322ab3ffa1b5f3d08c2fd2385593d0a76ee5
Author: Lordworms <[email protected]>
AuthorDate: Fri Jun 28 07:34:16 2024 -0700

    adding config to control Varchar behavior (#11090)
    
    * adding config to control Varchar behavior
    
    * fix failed tests
    
    * fix config_md
    
    * format md
    
    * optimize code
    
    * format md
    
    * format md
    
    * adding config
    
    * Tweak documentation
    
    * Update sqllogictest
    
    * tweaks strings
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion/common/src/config.rs                    |  6 +++
 datafusion/core/src/execution/session_state.rs     |  1 +
 datafusion/sql/src/planner.rs                      |  9 +++-
 datafusion/sql/tests/sql_integration.rs            |  2 +
 .../sqllogictest/test_files/information_schema.slt |  2 +
 datafusion/sqllogictest/test_files/strings.slt     | 49 ++++++++++++++++++++++
 docs/source/user-guide/configs.md                  |  1 +
 7 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index b90aeffb07..7e3871e6b7 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -204,6 +204,11 @@ config_namespace! {
         /// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, 
ClickHouse, BigQuery, and Ansi.
         pub dialect: String, default = "generic".to_string()
 
+        /// If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but
+        /// ignore the length. If false, error if a `VARCHAR` with a length is
+        /// specified. The Arrow type system does not have a notion of maximum
+        /// string length and thus DataFusion can not enforce such limits.
+        pub support_varchar_with_length: bool, default = true
     }
 }
 
@@ -303,6 +308,7 @@ config_namespace! {
         /// statistics into the same file groups.
         /// Currently experimental
         pub split_file_groups_by_statistics: bool, default = false
+
     }
 }
 
diff --git a/datafusion/core/src/execution/session_state.rs 
b/datafusion/core/src/execution/session_state.rs
index 2b7867e720..0b880ddbf8 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -615,6 +615,7 @@ impl SessionState {
         ParserOptions {
             parse_float_as_decimal: sql_parser_options.parse_float_as_decimal,
             enable_ident_normalization: 
sql_parser_options.enable_ident_normalization,
+            support_varchar_with_length: 
sql_parser_options.support_varchar_with_length,
         }
     }
 
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index 63ef86446a..00f2212006 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -103,6 +103,7 @@ pub trait ContextProvider {
 pub struct ParserOptions {
     pub parse_float_as_decimal: bool,
     pub enable_ident_normalization: bool,
+    pub support_varchar_with_length: bool,
 }
 
 impl Default for ParserOptions {
@@ -110,6 +111,7 @@ impl Default for ParserOptions {
         Self {
             parse_float_as_decimal: false,
             enable_ident_normalization: true,
+            support_varchar_with_length: true,
         }
     }
 }
@@ -404,12 +406,17 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             SQLDataType::UnsignedInt(_) | SQLDataType::UnsignedInteger(_) | 
SQLDataType::UnsignedInt4(_) => {
                 Ok(DataType::UInt32)
             }
+            SQLDataType::Varchar(length) => {
+                match (length, self.options.support_varchar_with_length) {
+                    (Some(_), false) => plan_err!("does not support Varchar 
with length, please set `support_varchar_with_length` to be true"),
+                    _ => Ok(DataType::Utf8),
+                }
+            }
             SQLDataType::UnsignedBigInt(_) | SQLDataType::UnsignedInt8(_) => 
Ok(DataType::UInt64),
             SQLDataType::Float(_) => Ok(DataType::Float32),
             SQLDataType::Real | SQLDataType::Float4 => Ok(DataType::Float32),
             SQLDataType::Double | SQLDataType::DoublePrecision | 
SQLDataType::Float8 => Ok(DataType::Float64),
             SQLDataType::Char(_)
-            | SQLDataType::Varchar(_)
             | SQLDataType::Text
             | SQLDataType::String(_) => Ok(DataType::Utf8),
             SQLDataType::Timestamp(None, tz_info) => {
diff --git a/datafusion/sql/tests/sql_integration.rs 
b/datafusion/sql/tests/sql_integration.rs
index f196d71d41..e72a439b32 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -84,6 +84,7 @@ fn parse_decimals() {
             ParserOptions {
                 parse_float_as_decimal: true,
                 enable_ident_normalization: false,
+                support_varchar_with_length: false,
             },
         );
     }
@@ -137,6 +138,7 @@ fn parse_ident_normalization() {
             ParserOptions {
                 parse_float_as_decimal: false,
                 enable_ident_normalization,
+                support_varchar_with_length: false,
             },
         );
         if plan.is_ok() {
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt 
b/datafusion/sqllogictest/test_files/information_schema.slt
index 6f31973fdb..3cc837aa8e 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -237,6 +237,7 @@ datafusion.optimizer.top_down_join_key_reordering true
 datafusion.sql_parser.dialect generic
 datafusion.sql_parser.enable_ident_normalization true
 datafusion.sql_parser.parse_float_as_decimal false
+datafusion.sql_parser.support_varchar_with_length true
 
 # show all variables with verbose
 query TTT rowsort
@@ -318,6 +319,7 @@ datafusion.optimizer.top_down_join_key_reordering true When 
set to true, the phy
 datafusion.sql_parser.dialect generic Configure the SQL dialect used by 
DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, 
Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi.
 datafusion.sql_parser.enable_ident_normalization true When set to true, SQL 
parser will normalize ident (convert ident to lowercase when not quoted)
 datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL 
parser will parse float as decimal type
+datafusion.sql_parser.support_varchar_with_length true If true, permit lengths 
for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if 
a `VARCHAR` with a length is specified. The Arrow type system does not have a 
notion of maximum string length and thus DataFusion can not enforce such limits.
 
 # show_variable_in_config_options
 query TT
diff --git a/datafusion/sqllogictest/test_files/strings.slt 
b/datafusion/sqllogictest/test_files/strings.slt
index 27ed0e2d09..3cd6c339b4 100644
--- a/datafusion/sqllogictest/test_files/strings.slt
+++ b/datafusion/sqllogictest/test_files/strings.slt
@@ -78,3 +78,52 @@ e1
 p2
 p2e1
 p2m1e1
+
+## VARCHAR with length support
+
+# Lengths can be used by default
+query T
+SELECT '12345'::VARCHAR(2);
+----
+12345
+
+# Lengths can not be used when the config setting is disabled
+
+statement ok
+set datafusion.sql_parser.support_varchar_with_length = false;
+
+query error
+SELECT '12345'::VARCHAR(2);
+
+query error
+SELECT s::VARCHAR(2) FROM (VALUES ('12345')) t(s);
+
+statement ok
+create table vals(s char) as values('abc'), ('def');
+
+query error
+SELECT s::VARCHAR(2) FROM vals
+
+# Lengths can be used when the config setting is enabled
+
+statement ok
+set datafusion.sql_parser.support_varchar_with_length = true;
+
+query T
+SELECT '12345'::VARCHAR(2)
+----
+12345
+
+query T
+SELECT s::VARCHAR(2) FROM (VALUES ('12345')) t(s)
+----
+12345
+
+query T
+SELECT s::VARCHAR(2) FROM vals
+----
+abc
+def
+
+statement ok
+drop table vals;
diff --git a/docs/source/user-guide/configs.md 
b/docs/source/user-guide/configs.md
index 80d88632ff..c5f22725e0 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -113,3 +113,4 @@ Environment variables are read during `SessionConfig` 
initialisation so they mus
 | datafusion.sql_parser.parse_float_as_decimal                            | 
false                     | When set to true, SQL parser will parse float as 
decimal type                                                                    
                                                                                
                                                                                
                                                                                
                    [...]
 | datafusion.sql_parser.enable_ident_normalization                        | 
true                      | When set to true, SQL parser will normalize ident 
(convert ident to lowercase when not quoted)                                    
                                                                                
                                                                                
                                                                                
                   [...]
 | datafusion.sql_parser.dialect                                           | 
generic                   | Configure the SQL dialect used by DataFusion's 
parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, 
Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi.                     
                                                                                
                                                                                
                          [...]
+| datafusion.sql_parser.support_varchar_with_length                       | 
true                      | If true, permit lengths for `VARCHAR` such as 
`VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a 
length is specified. The Arrow type system does not have a notion of maximum 
string length and thus DataFusion can not enforce such limits.                  
                                                                                
                              [...]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to