This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new cfb96eb [SPARK-37133][SQL] Add a config to optionally enforce ANSI reserved keywords cfb96eb is described below commit cfb96ebd3991c62fc737242aeeb9b5cdb4abe7ae Author: Wenchen Fan <cloud0...@gmail.com> AuthorDate: Thu Oct 28 12:58:15 2021 +0800 [SPARK-37133][SQL] Add a config to optionally enforce ANSI reserved keywords ### What changes were proposed in this pull request? This PR adds a new config to optionally enforce the ANSI reserved keywords in the parser. The default value is true, so we by default still enforce it and there is no behavior change. ### Why are the changes needed? In Spark 3.2, the ANSI mode is GA. We want more people to try and use the ANSI mode, to find data issues as early as possible and get better data quality. However, the reserved keywords thing is a big stopper for many users that want to try ANSI mode. They have to update the SQL queries to pass the parser, which is nothing about data quality but just trouble. With a new config to allow users to not enforce reserved keywords, I think we can get better adoption of the ANSI mode. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? updated tests. Closes #34403 from cloud-fan/parser. Lead-authored-by: Wenchen Fan <cloud0...@gmail.com> Co-authored-by: Wenchen Fan <wenc...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- docs/sql-ref-ansi-compliance.md | 4 +++- .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 2 +- .../org/apache/spark/sql/catalyst/parser/ParseDriver.scala | 2 +- .../main/scala/org/apache/spark/sql/internal/SQLConf.scala | 10 ++++++++++ .../spark/sql/catalyst/parser/ExpressionParserSuite.scala | 12 +++++++++++- .../sql/catalyst/parser/TableIdentifierParserSuite.scala | 9 +++++++++ 6 files changed, 35 insertions(+), 4 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index c10e866..4527faa 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -299,7 +299,9 @@ When the ANSI mode is disabled, Spark SQL has two kinds of keywords: * Non-reserved keywords: Same definition as the one when the ANSI mode enabled. * Strict-non-reserved keywords: A strict version of non-reserved keywords, which can not be used as table alias. -By default `spark.sql.ansi.enabled` is false. +If you want to still use reserved keywords as identifiers with ANSI mode, you can set `spark.sql.ansi.enforceReservedKeywords` to false. + +By default `spark.sql.ansi.enabled` is false and `spark.sql.ansi.enforceReservedKeywords` is true. Below is a list of all the keywords in Spark SQL. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index d36c7ac..768d406 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1728,7 +1728,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with SQLConfHelper with Logg } override def visitCurrentLike(ctx: CurrentLikeContext): Expression = withOrigin(ctx) { - if (conf.ansiEnabled) { + if (conf.enforceReservedKeywords) { ctx.name.getType match { case SqlBaseParser.CURRENT_DATE => CurrentDate() diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index 64216e6..b459a2d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -100,7 +100,7 @@ abstract class AbstractSqlParser extends ParserInterface with SQLConfHelper with parser.addErrorListener(ParseErrorListener) parser.legacy_setops_precedence_enabled = conf.setOpsPrecedenceEnforced parser.legacy_exponent_literal_as_decimal_enabled = conf.exponentLiteralAsDecimalEnabled - parser.SQL_standard_keyword_behavior = conf.ansiEnabled + parser.SQL_standard_keyword_behavior = conf.enforceReservedKeywords try { try { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 5023b4a..fe3204b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -2590,6 +2590,14 @@ object SQLConf { .booleanConf .createWithDefault(false) + val ENFORCE_RESERVED_KEYWORDS = buildConf("spark.sql.ansi.enforceReservedKeywords") + .doc(s"When true and '${ANSI_ENABLED.key}' is true, the Spark SQL parser enforces the ANSI " + + "reserved keywords and forbids SQL queries that use reserved keywords as alias names " + + "and/or identifiers for table, view, function, etc.") + .version("3.3.0") + .booleanConf + .createWithDefault(true) + val SORT_BEFORE_REPARTITION = buildConf("spark.sql.execution.sortBeforeRepartition") .internal() @@ -4041,6 +4049,8 @@ class SQLConf extends Serializable with Logging { def ansiEnabled: Boolean = getConf(ANSI_ENABLED) + def enforceReservedKeywords: Boolean = ansiEnabled && getConf(ENFORCE_RESERVED_KEYWORDS) + def timestampType: AtomicType = getConf(TIMESTAMP_TYPE) match { case "TIMESTAMP_LTZ" => // For historical reason, the TimestampType maps to TIMESTAMP WITH LOCAL TIME ZONE diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index 0a49e3a..9a88b2a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -933,10 +933,20 @@ class ExpressionParserSuite extends AnalysisTest { assertEqual("current_timestamp", CurrentTimestamp()) } - withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") { + def testNonAnsiBehavior(): Unit = { assertEqual("current_date", UnresolvedAttribute.quoted("current_date")) assertEqual("current_timestamp", UnresolvedAttribute.quoted("current_timestamp")) } + withSQLConf( + SQLConf.ANSI_ENABLED.key -> "false", + SQLConf.ENFORCE_RESERVED_KEYWORDS.key -> "true") { + testNonAnsiBehavior() + } + withSQLConf( + SQLConf.ANSI_ENABLED.key -> "true", + SQLConf.ENFORCE_RESERVED_KEYWORDS.key -> "false") { + testNonAnsiBehavior() + } } test("SPARK-36736: (NOT) ILIKE (ANY | SOME | ALL) expressions") { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala index 0e1a6df..4d87c7f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala @@ -318,6 +318,15 @@ class TableIdentifierParserSuite extends SQLKeywordUtils { assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.$keyword")) } } + + withSQLConf( + SQLConf.ANSI_ENABLED.key -> "true", + SQLConf.ENFORCE_RESERVED_KEYWORDS.key -> "false") { + reservedKeywordsInAnsiMode.foreach { keyword => + assert(TableIdentifier(keyword) === parseTableIdentifier(s"$keyword")) + assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.$keyword")) + } + } } test("table identifier - strict keywords") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org