This is an automated email from the ASF dual-hosted git repository.
weijun pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new e7e7758e72 Config: Add support default sql varchar to view types
(#15104)
e7e7758e72 is described below
commit e7e7758e72201500ffb78fd92512417e5b1afd8d
Author: Qi Zhu <[email protected]>
AuthorDate: Wed Mar 12 09:54:34 2025 +0800
Config: Add support default sql varchar to view types (#15104)
* Config: Add support default sql varchar to view types
* Fix test
* fix test
* Address comments
* Address comments
* Fix slt test
---
datafusion/common/src/config.rs | 5 ++++
datafusion/core/src/execution/session_state.rs | 1 +
datafusion/sql/src/planner.rs | 18 +++++++++++++-
datafusion/sql/tests/sql_integration.rs | 2 ++
datafusion/sqllogictest/test_files/ddl.slt | 28 ++++++++++++++++++++++
.../sqllogictest/test_files/information_schema.slt | 2 ++
docs/source/user-guide/configs.md | 1 +
7 files changed, 56 insertions(+), 1 deletion(-)
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index 8c093a9db8..b0f17630c9 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -252,6 +252,11 @@ config_namespace! {
/// string length and thus DataFusion can not enforce such limits.
pub support_varchar_with_length: bool, default = true
+ /// If true, `VARCHAR` is mapped to `Utf8View` during SQL planning.
+ /// If false, `VARCHAR` is mapped to `Utf8` during SQL planning.
+ /// Default is false.
+ pub map_varchar_to_utf8view: bool, default = false
+
/// When set to true, the source locations relative to the original SQL
/// query (i.e.
[`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html))
will be collected
/// and recorded in the logical plan nodes.
diff --git a/datafusion/core/src/execution/session_state.rs
b/datafusion/core/src/execution/session_state.rs
index 0e83156ab5..f4b0fd0c12 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -489,6 +489,7 @@ impl SessionState {
enable_options_value_normalization: sql_parser_options
.enable_options_value_normalization,
support_varchar_with_length:
sql_parser_options.support_varchar_with_length,
+ map_varchar_to_utf8view:
sql_parser_options.map_varchar_to_utf8view,
collect_spans: sql_parser_options.collect_spans,
}
}
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index bc7c2b7f43..daaf70f953 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -54,6 +54,8 @@ pub struct ParserOptions {
pub enable_options_value_normalization: bool,
/// Whether to collect spans
pub collect_spans: bool,
+ /// Whether `VARCHAR` is mapped to `Utf8View` during SQL planning.
+ pub map_varchar_to_utf8view: bool,
}
impl ParserOptions {
@@ -72,6 +74,7 @@ impl ParserOptions {
parse_float_as_decimal: false,
enable_ident_normalization: true,
support_varchar_with_length: true,
+ map_varchar_to_utf8view: false,
enable_options_value_normalization: false,
collect_spans: false,
}
@@ -111,6 +114,12 @@ impl ParserOptions {
self
}
+ /// Sets the `map_varchar_to_utf8view` option.
+ pub fn with_map_varchar_to_utf8view(mut self, value: bool) -> Self {
+ self.map_varchar_to_utf8view = value;
+ self
+ }
+
/// Sets the `enable_options_value_normalization` option.
pub fn with_enable_options_value_normalization(mut self, value: bool) ->
Self {
self.enable_options_value_normalization = value;
@@ -136,6 +145,7 @@ impl From<&SqlParserOptions> for ParserOptions {
parse_float_as_decimal: options.parse_float_as_decimal,
enable_ident_normalization: options.enable_ident_normalization,
support_varchar_with_length: options.support_varchar_with_length,
+ map_varchar_to_utf8view: options.map_varchar_to_utf8view,
enable_options_value_normalization: options
.enable_options_value_normalization,
collect_spans: options.collect_spans,
@@ -558,7 +568,13 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
SQLDataType::Varchar(length) => {
match (length, self.options.support_varchar_with_length) {
(Some(_), false) => plan_err!("does not support Varchar
with length, please set `support_varchar_with_length` to be true"),
- _ => Ok(DataType::Utf8),
+ _ => {
+ if self.options.map_varchar_to_utf8view {
+ Ok(DataType::Utf8View)
+ } else {
+ Ok(DataType::Utf8)
+ }
+ }
}
}
SQLDataType::UnsignedBigInt(_) | SQLDataType::UnsignedInt8(_) =>
Ok(DataType::UInt64),
diff --git a/datafusion/sql/tests/sql_integration.rs
b/datafusion/sql/tests/sql_integration.rs
index 6877e78264..023ea88cb5 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -83,6 +83,7 @@ fn parse_decimals() {
parse_float_as_decimal: true,
enable_ident_normalization: false,
support_varchar_with_length: false,
+ map_varchar_to_utf8view: false,
enable_options_value_normalization: false,
collect_spans: false,
},
@@ -139,6 +140,7 @@ fn parse_ident_normalization() {
parse_float_as_decimal: false,
enable_ident_normalization,
support_varchar_with_length: false,
+ map_varchar_to_utf8view: false,
enable_options_value_normalization: false,
collect_spans: false,
},
diff --git a/datafusion/sqllogictest/test_files/ddl.slt
b/datafusion/sqllogictest/test_files/ddl.slt
index 6f75a7d7f8..bc15f22103 100644
--- a/datafusion/sqllogictest/test_files/ddl.slt
+++ b/datafusion/sqllogictest/test_files/ddl.slt
@@ -827,3 +827,31 @@ drop table table_with_pk;
statement ok
set datafusion.catalog.information_schema = false;
+
+# Test VARCHAR is mapped to Utf8View during SQL planning when setting
map_varchar_to_utf8view to true
+statement ok
+CREATE TABLE t1(c1 VARCHAR(10) NOT NULL, c2 VARCHAR);
+
+query TTT
+DESCRIBE t1;
+----
+c1 Utf8 NO
+c2 Utf8 YES
+
+statement ok
+set datafusion.sql_parser.map_varchar_to_utf8view = true;
+
+statement ok
+CREATE TABLE t2(c1 VARCHAR(10) NOT NULL, c2 VARCHAR);
+
+query TTT
+DESCRIBE t2;
+----
+c1 Utf8View NO
+c2 Utf8View YES
+
+statement ok
+DROP TABLE t1;
+
+statement ok
+DROP TABLE t2;
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt
b/datafusion/sqllogictest/test_files/information_schema.slt
index 454055b539..496f24abf6 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -263,6 +263,7 @@ datafusion.sql_parser.collect_spans false
datafusion.sql_parser.dialect generic
datafusion.sql_parser.enable_ident_normalization true
datafusion.sql_parser.enable_options_value_normalization false
+datafusion.sql_parser.map_varchar_to_utf8view false
datafusion.sql_parser.parse_float_as_decimal false
datafusion.sql_parser.recursion_limit 50
datafusion.sql_parser.support_varchar_with_length true
@@ -361,6 +362,7 @@ datafusion.sql_parser.collect_spans false When set to true,
the source locations
datafusion.sql_parser.dialect generic Configure the SQL dialect used by
DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL,
Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB
and Databricks.
datafusion.sql_parser.enable_ident_normalization true When set to true, SQL
parser will normalize ident (convert ident to lowercase when not quoted)
datafusion.sql_parser.enable_options_value_normalization false When set to
true, SQL parser will normalize options value (convert value to lowercase).
Note that this option is ignored and will be removed in the future. All
case-insensitive values are normalized automatically.
+datafusion.sql_parser.map_varchar_to_utf8view false If true, `VARCHAR` is
mapped to `Utf8View` during SQL planning. If false, `VARCHAR` is mapped to
`Utf8` during SQL planning. Default is false.
datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL
parser will parse float as decimal type
datafusion.sql_parser.recursion_limit 50 Specifies the recursion depth limit
when parsing complex SQL Queries
datafusion.sql_parser.support_varchar_with_length true If true, permit lengths
for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if
a `VARCHAR` with a length is specified. The Arrow type system does not have a
notion of maximum string length and thus DataFusion can not enforce such limits.
diff --git a/docs/source/user-guide/configs.md
b/docs/source/user-guide/configs.md
index 635eb2b0a6..b6b53cfe49 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -128,5 +128,6 @@ Environment variables are read during `SessionConfig`
initialisation so they mus
| datafusion.sql_parser.enable_options_value_normalization |
false | When set to true, SQL parser will normalize options
value (convert value to lowercase). Note that this option is ignored and will
be removed in the future. All case-insensitive values are normalized
automatically.
[...]
| datafusion.sql_parser.dialect |
generic | Configure the SQL dialect used by DataFusion's
parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite,
Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, Ansi, DuckDB and Databricks.
[...]
| datafusion.sql_parser.support_varchar_with_length |
true | If true, permit lengths for `VARCHAR` such as
`VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a
length is specified. The Arrow type system does not have a notion of maximum
string length and thus DataFusion can not enforce such limits.
[...]
+| datafusion.sql_parser.map_varchar_to_utf8view |
false | If true, `VARCHAR` is mapped to `Utf8View` during
SQL planning. If false, `VARCHAR` is mapped to `Utf8` during SQL planning.
Default is false.
[...]
| datafusion.sql_parser.collect_spans |
false | When set to true, the source locations relative to
the original SQL query (i.e.
[`Span`](https://docs.rs/sqlparser/latest/sqlparser/tokenizer/struct.Span.html))
will be collected and recorded in the logical plan nodes.
[...]
| datafusion.sql_parser.recursion_limit | 50
| Specifies the recursion depth limit when parsing
complex SQL Queries
[...]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]