This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new ca9c322ab3 adding config to control Varchar behavior (#11090)
ca9c322ab3 is described below
commit ca9c322ab3ffa1b5f3d08c2fd2385593d0a76ee5
Author: Lordworms <[email protected]>
AuthorDate: Fri Jun 28 07:34:16 2024 -0700
adding config to control Varchar behavior (#11090)
* adding config to control Varchar behavior
* fix failed tests
* fix config_md
* format md
* optimize code
* format md
* format md
* adding config
* Tweak documentation
* Update sqllogictest
* tweaks strings
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/common/src/config.rs | 6 +++
datafusion/core/src/execution/session_state.rs | 1 +
datafusion/sql/src/planner.rs | 9 +++-
datafusion/sql/tests/sql_integration.rs | 2 +
.../sqllogictest/test_files/information_schema.slt | 2 +
datafusion/sqllogictest/test_files/strings.slt | 49 ++++++++++++++++++++++
docs/source/user-guide/configs.md | 1 +
7 files changed, 69 insertions(+), 1 deletion(-)
diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs
index b90aeffb07..7e3871e6b7 100644
--- a/datafusion/common/src/config.rs
+++ b/datafusion/common/src/config.rs
@@ -204,6 +204,11 @@ config_namespace! {
/// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL,
ClickHouse, BigQuery, and Ansi.
pub dialect: String, default = "generic".to_string()
+ /// If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but
+ /// ignore the length. If false, error if a `VARCHAR` with a length is
+ /// specified. The Arrow type system does not have a notion of maximum
+ /// string length and thus DataFusion can not enforce such limits.
+ pub support_varchar_with_length: bool, default = true
}
}
@@ -303,6 +308,7 @@ config_namespace! {
/// statistics into the same file groups.
/// Currently experimental
pub split_file_groups_by_statistics: bool, default = false
+
}
}
diff --git a/datafusion/core/src/execution/session_state.rs
b/datafusion/core/src/execution/session_state.rs
index 2b7867e720..0b880ddbf8 100644
--- a/datafusion/core/src/execution/session_state.rs
+++ b/datafusion/core/src/execution/session_state.rs
@@ -615,6 +615,7 @@ impl SessionState {
ParserOptions {
parse_float_as_decimal: sql_parser_options.parse_float_as_decimal,
enable_ident_normalization:
sql_parser_options.enable_ident_normalization,
+ support_varchar_with_length:
sql_parser_options.support_varchar_with_length,
}
}
diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs
index 63ef86446a..00f2212006 100644
--- a/datafusion/sql/src/planner.rs
+++ b/datafusion/sql/src/planner.rs
@@ -103,6 +103,7 @@ pub trait ContextProvider {
pub struct ParserOptions {
pub parse_float_as_decimal: bool,
pub enable_ident_normalization: bool,
+ pub support_varchar_with_length: bool,
}
impl Default for ParserOptions {
@@ -110,6 +111,7 @@ impl Default for ParserOptions {
Self {
parse_float_as_decimal: false,
enable_ident_normalization: true,
+ support_varchar_with_length: true,
}
}
}
@@ -404,12 +406,17 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> {
SQLDataType::UnsignedInt(_) | SQLDataType::UnsignedInteger(_) |
SQLDataType::UnsignedInt4(_) => {
Ok(DataType::UInt32)
}
+ SQLDataType::Varchar(length) => {
+ match (length, self.options.support_varchar_with_length) {
+ (Some(_), false) => plan_err!("does not support Varchar
with length, please set `support_varchar_with_length` to be true"),
+ _ => Ok(DataType::Utf8),
+ }
+ }
SQLDataType::UnsignedBigInt(_) | SQLDataType::UnsignedInt8(_) =>
Ok(DataType::UInt64),
SQLDataType::Float(_) => Ok(DataType::Float32),
SQLDataType::Real | SQLDataType::Float4 => Ok(DataType::Float32),
SQLDataType::Double | SQLDataType::DoublePrecision |
SQLDataType::Float8 => Ok(DataType::Float64),
SQLDataType::Char(_)
- | SQLDataType::Varchar(_)
| SQLDataType::Text
| SQLDataType::String(_) => Ok(DataType::Utf8),
SQLDataType::Timestamp(None, tz_info) => {
diff --git a/datafusion/sql/tests/sql_integration.rs
b/datafusion/sql/tests/sql_integration.rs
index f196d71d41..e72a439b32 100644
--- a/datafusion/sql/tests/sql_integration.rs
+++ b/datafusion/sql/tests/sql_integration.rs
@@ -84,6 +84,7 @@ fn parse_decimals() {
ParserOptions {
parse_float_as_decimal: true,
enable_ident_normalization: false,
+ support_varchar_with_length: false,
},
);
}
@@ -137,6 +138,7 @@ fn parse_ident_normalization() {
ParserOptions {
parse_float_as_decimal: false,
enable_ident_normalization,
+ support_varchar_with_length: false,
},
);
if plan.is_ok() {
diff --git a/datafusion/sqllogictest/test_files/information_schema.slt
b/datafusion/sqllogictest/test_files/information_schema.slt
index 6f31973fdb..3cc837aa8e 100644
--- a/datafusion/sqllogictest/test_files/information_schema.slt
+++ b/datafusion/sqllogictest/test_files/information_schema.slt
@@ -237,6 +237,7 @@ datafusion.optimizer.top_down_join_key_reordering true
datafusion.sql_parser.dialect generic
datafusion.sql_parser.enable_ident_normalization true
datafusion.sql_parser.parse_float_as_decimal false
+datafusion.sql_parser.support_varchar_with_length true
# show all variables with verbose
query TTT rowsort
@@ -318,6 +319,7 @@ datafusion.optimizer.top_down_join_key_reordering true When
set to true, the phy
datafusion.sql_parser.dialect generic Configure the SQL dialect used by
DataFusion's parser; supported values include: Generic, MySQL, PostgreSQL,
Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi.
datafusion.sql_parser.enable_ident_normalization true When set to true, SQL
parser will normalize ident (convert ident to lowercase when not quoted)
datafusion.sql_parser.parse_float_as_decimal false When set to true, SQL
parser will parse float as decimal type
+datafusion.sql_parser.support_varchar_with_length true If true, permit lengths
for `VARCHAR` such as `VARCHAR(20)`, but ignore the length. If false, error if
a `VARCHAR` with a length is specified. The Arrow type system does not have a
notion of maximum string length and thus DataFusion can not enforce such limits.
# show_variable_in_config_options
query TT
diff --git a/datafusion/sqllogictest/test_files/strings.slt
b/datafusion/sqllogictest/test_files/strings.slt
index 27ed0e2d09..3cd6c339b4 100644
--- a/datafusion/sqllogictest/test_files/strings.slt
+++ b/datafusion/sqllogictest/test_files/strings.slt
@@ -78,3 +78,52 @@ e1
p2
p2e1
p2m1e1
+
+## VARCHAR with length support
+
+# Lengths can be used by default
+query T
+SELECT '12345'::VARCHAR(2);
+----
+12345
+
+# Lengths can not be used when the config setting is disabled
+
+statement ok
+set datafusion.sql_parser.support_varchar_with_length = false;
+
+query error
+SELECT '12345'::VARCHAR(2);
+
+query error
+SELECT s::VARCHAR(2) FROM (VALUES ('12345')) t(s);
+
+statement ok
+create table vals(s char) as values('abc'), ('def');
+
+query error
+SELECT s::VARCHAR(2) FROM vals
+
+# Lengths can be used when the config setting is enabled
+
+statement ok
+set datafusion.sql_parser.support_varchar_with_length = true;
+
+query T
+SELECT '12345'::VARCHAR(2)
+----
+12345
+
+query T
+SELECT s::VARCHAR(2) FROM (VALUES ('12345')) t(s)
+----
+12345
+
+query T
+SELECT s::VARCHAR(2) FROM vals
+----
+abc
+def
+
+statement ok
+drop table vals;
diff --git a/docs/source/user-guide/configs.md
b/docs/source/user-guide/configs.md
index 80d88632ff..c5f22725e0 100644
--- a/docs/source/user-guide/configs.md
+++ b/docs/source/user-guide/configs.md
@@ -113,3 +113,4 @@ Environment variables are read during `SessionConfig`
initialisation so they mus
| datafusion.sql_parser.parse_float_as_decimal |
false | When set to true, SQL parser will parse float as
decimal type
[...]
| datafusion.sql_parser.enable_ident_normalization |
true | When set to true, SQL parser will normalize ident
(convert ident to lowercase when not quoted)
[...]
| datafusion.sql_parser.dialect |
generic | Configure the SQL dialect used by DataFusion's
parser; supported values include: Generic, MySQL, PostgreSQL, Hive, SQLite,
Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi.
[...]
+| datafusion.sql_parser.support_varchar_with_length |
true | If true, permit lengths for `VARCHAR` such as
`VARCHAR(20)`, but ignore the length. If false, error if a `VARCHAR` with a
length is specified. The Arrow type system does not have a notion of maximum
string length and thus DataFusion can not enforce such limits.
[...]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]