This is an automated email from the ASF dual-hosted git repository.

weijun pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new e7e7758e72 Config: Add support default sql varchar to view types 
(#15104)
e7e7758e72 is described below

commit e7e7758e72201500ffb78fd92512417e5b1afd8d
Author: Qi Zhu <[email protected]>
AuthorDate: Wed Mar 12 09:54:34 2025 +0800

    Config: Add support default sql varchar to view types (#15104)
    
    * Config: Add support default sql varchar to view types
    
    * Fix test
    
    * fix test
    
    * Address comments
    
    * Address comments
    
    * Fix slt test
---
 datafusion/common/src/config.rs                    |  5 ++++
 datafusion/core/src/execution/session_state.rs     |  1 +
 datafusion/sql/src/planner.rs                      | 18 +++++++++++++-
 datafusion/sql/tests/sql_integration.rs            |  2 ++
 datafusion/sqllogictest/test_files/ddl.slt         | 28 ++++++++++++++++++++++
 .../sqllogictest/test_files/information_schema.slt |  2 ++
 docs/source/user-guide/configs.md                  |  1 +
 7 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 8c093a9db8..b0f17630c9 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -252,6 +252,11 @@ config_namespace! {
         /// string length and thus DataFusion can not enforce such limits.
         pub support_varchar_with_length: bool, default = true
 
+       /// If true, `VARCHAR` is mapped to `Utf8View` during SQL planning.
+       /// If false, `VARCHAR` is mapped to `Utf8`  during SQL planning.
+       /// Default is false.
+        pub map_varchar_to_utf8view: bool, default = false
+
         /// When set to true, the source locations relative to the original SQL
         /// query (i.e. 
[`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html))
 will be collected
         /// and recorded in the logical plan nodes.
diff --git a/datafusion/core/src/execution/session_state.rs 
b/datafusion/core/src/execution/session_state.rs
index 0e83156ab5..f4b0fd0c12 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -489,6 +489,7 @@ impl SessionState {
             enable_options_value_normalization: sql_parser_options
                 .enable_options_value_normalization,
             support_varchar_with_length: 
sql_parser_options.support_varchar_with_length,
+            map_varchar_to_utf8view: 
sql_parser_options.map_varchar_to_utf8view,
             collect_spans: sql_parser_options.collect_spans,
         }
     }
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index bc7c2b7f43..daaf70f953 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -54,6 +54,8 @@ pub struct ParserOptions {
     pub enable_options_value_normalization: bool,
     /// Whether to collect spans
     pub collect_spans: bool,
+    /// Whether `VARCHAR` is mapped to `Utf8View` during SQL planning.
+    pub map_varchar_to_utf8view: bool,
 }
 
 impl ParserOptions {
@@ -72,6 +74,7 @@ impl ParserOptions {
             parse_float_as_decimal: false,
             enable_ident_normalization: true,
             support_varchar_with_length: true,
+            map_varchar_to_utf8view: false,
             enable_options_value_normalization: false,
             collect_spans: false,
         }
@@ -111,6 +114,12 @@ impl ParserOptions {
         self
     }
 
+    /// Sets the `map_varchar_to_utf8view` option.
+    pub fn with_map_varchar_to_utf8view(mut self, value: bool) -> Self {
+        self.map_varchar_to_utf8view = value;
+        self
+    }
+
     /// Sets the `enable_options_value_normalization` option.
     pub fn with_enable_options_value_normalization(mut self, value: bool) -> 
Self {
         self.enable_options_value_normalization = value;
@@ -136,6 +145,7 @@ impl From<&SqlParserOptions> for ParserOptions {
             parse_float_as_decimal: options.parse_float_as_decimal,
             enable_ident_normalization: options.enable_ident_normalization,
             support_varchar_with_length: options.support_varchar_with_length,
+            map_varchar_to_utf8view: options.map_varchar_to_utf8view,
             enable_options_value_normalization: options
                 .enable_options_value_normalization,
             collect_spans: options.collect_spans,
@@ -558,7 +568,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
             SQLDataType::Varchar(length) => {
                 match (length, self.options.support_varchar_with_length) {
                     (Some(_), false) => plan_err!("does not support Varchar 
with length, please set `support_varchar_with_length` to be true"),
-                    _ => Ok(DataType::Utf8),
+                    _ => {
+                        if self.options.map_varchar_to_utf8view {
+                            Ok(DataType::Utf8View)
+                        } else {
+                            Ok(DataType::Utf8)
+                        }
+                    }
                 }
             }
             SQLDataType::UnsignedBigInt(_) | SQLDataType::UnsignedInt8(_) => 
Ok(DataType::UInt64),
diff --git a/datafusion/sql/tests/sql_integration.rs 
b/datafusion/sql/tests/sql_integration.rs
index 6877e78264..023ea88cb5 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -83,6 +83,7 @@ fn parse_decimals() {
                 parse_float_as_decimal: true,
                 enable_ident_normalization: false,
                 support_varchar_with_length: false,
+                map_varchar_to_utf8view: false,
                 enable_options_value_normalization: false,
                 collect_spans: false,
             },
@@ -139,6 +140,7 @@ fn parse_ident_normalization() {
                 parse_float_as_decimal: false,
                 enable_ident_normalization,
                 support_varchar_with_length: false,
+                map_varchar_to_utf8view: false,
                 enable_options_value_normalization: false,
                 collect_spans: false,
             },
diff --git a/datafusion/sqllogictest/test_files/ddl.slt 
b/datafusion/sqllogictest/test_files/ddl.slt
index 6f75a7d7f8..bc15f22103 100644
--- a/datafusion/sqllogictest/test_files/ddl.slt
+++ b/datafusion/sqllogictest/test_files/ddl.slt
@@ -827,3 +827,31 @@ drop table table_with_pk;
 
 statement ok
 set datafusion.catalog.information_schema = false;
+
+# Test VARCHAR is mapped to Utf8View during SQL planning when setting 
map_varchar_to_utf8view to true
+statement ok
+CREATE TABLE t1(c1 VARCHAR(10) NOT NULL, c2 VARCHAR);
+
+query TTT
+DESCRIBE t1;
+----
+c1 Utf8 NO
+c2 Utf8 YES
+
+statement ok
+set datafusion.sql_parser.map_varchar_to_utf8view = true;
+
+statement ok
+CREATE TABLE t2(c1 VARCHAR(10) NOT NULL, c2 VARCHAR);
+
+query TTT
+DESCRIBE t2;
+----
+c1 Utf8View NO
+c2 Utf8View YES
+
+statement ok
+DROP TABLE t1;
+
+statement ok
+DROP TABLE t2;
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt 
b/datafusion/sqllogictest/test_files/information_schema.slt
index 454055b539..496f24abf6 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -263,6 +263,7 @@ datafusion.sql_parser.collect_spans false
 datafusion.sql_parser.dialect generic
 datafusion.sql_parser.enable_ident_normalization true
 datafusion.sql_parser.enable_options_value_normalization false
+datafusion.sql_parser.map_varchar_to_utf8view false
 datafusion.sql_parser.parse_float_as_decimal false
 datafusion.sql_parser.recursion_limit 50
 datafusion.sql_parser.support_varchar_with_length true
@@ -361,6 +362,7 @@ datafusion.sql_parser.collect_spans false When set to true, 
the source locations
 datafusion.sql_parser.dialect generic Configure the SQL dialect used by 
DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL, 
Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB 
and Databricks.
 datafusion.sql_parser.enable_ident_normalization true When set to true, SQL 
parser will normalize ident (convert ident to lowercase when not quoted)
 datafusion.sql_parser.enable_options_value_normalization false When set to 
true, SQL parser will normalize options value (convert value to lowercase). 
Note that this option is ignored and will be removed in the future. All 
case-insensitive values are normalized automatically.
+datafusion.sql_parser.map_varchar_to_utf8view false If true, `VARCHAR` is 
mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to 
`Utf8`  during SQL planning. Default is false.
 datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL 
parser will parse float as decimal type
 datafusion.sql_parser.recursion_limit 50 Specifies the recursion depth limit 
when parsing complex SQL Queries
 datafusion.sql_parser.support_varchar_with_length true If true, permit lengths 
for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if 
a `VARCHAR` with a length is specified. The Arrow type system does not have a 
notion of maximum string length and thus DataFusion can not enforce such limits.
diff --git a/docs/source/user-guide/configs.md 
b/docs/source/user-guide/configs.md
index 635eb2b0a6..b6b53cfe49 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -128,5 +128,6 @@ Environment variables are read during `SessionConfig` 
initialisation so they mus
 | datafusion.sql_parser.enable_options_value_normalization                | 
false                     | When set to true, SQL parser will normalize options 
value (convert value to lowercase). Note that this option is ignored and will 
be removed in the future. All case-insensitive values are normalized 
automatically.                                                                  
                                                                                
                              [...]
 | datafusion.sql_parser.dialect                                           | 
generic                   | Configure the SQL dialect used by DataFusion's 
parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite, 
Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks.  
                                                                                
                                                                                
                          [...]
 | datafusion.sql_parser.support_varchar_with_length                       | 
true                      | If true, permit lengths for `VARCHAR` such as 
`VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a 
length is specified. The Arrow type system does not have a notion of maximum 
string length and thus DataFusion can not enforce such limits.                  
                                                                                
                              [...]
+| datafusion.sql_parser.map_varchar_to_utf8view                           | 
false                     | If true, `VARCHAR` is mapped to `Utf8View` during 
SQL planning. If false, `VARCHAR` is mapped to `Utf8` during SQL planning. 
Default is false.                                                               
                                                                                
                                                                                
                        [...]
 | datafusion.sql_parser.collect_spans                                     | 
false                     | When set to true, the source locations relative to 
the original SQL query (i.e. 
[`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html))
 will be collected and recorded in the logical plan nodes.                      
                                                                                
                                                                     [...]
 | datafusion.sql_parser.recursion_limit                                   | 50 
                       | Specifies the recursion depth limit when parsing 
complex SQL Queries                                                             
                                                                                
                                                                                
                                                                                
                    [...]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to