[spark] branch master updated: [SPARK-37133][SQL] Add a config to optionally enforce ANSI reserved keywords

wenchen Wed, 27 Oct 2021 21:59:22 -0700

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new cfb96eb  [SPARK-37133][SQL] Add a config to optionally enforce ANSI 
reserved keywords
cfb96eb is described below

commit cfb96ebd3991c62fc737242aeeb9b5cdb4abe7ae
Author: Wenchen Fan <cloud0...@gmail.com>
AuthorDate: Thu Oct 28 12:58:15 2021 +0800

    [SPARK-37133][SQL] Add a config to optionally enforce ANSI reserved keywords
    
    ### What changes were proposed in this pull request?
    
    This PR adds a new config to optionally enforce the ANSI reserved keywords 
in the parser. The default value is true, so we by default still enforce it and 
there is no behavior change.
    
    ### Why are the changes needed?
    
    In Spark 3.2, the ANSI mode is GA. We want more people to try and use the 
ANSI mode, to find data issues as early as possible and get better data 
quality. However, the reserved keywords thing is a big stopper for many users 
that want to try ANSI mode. They have to update the SQL queries to pass the 
parser, which is nothing about data quality but just trouble.
    
    With a new config to allow users to not enforce reserved keywords, I think 
we can get better adoption of the ANSI mode.
    
    ### Does this PR introduce _any_ user-facing change?
    
    no
    
    ### How was this patch tested?
    
    updated tests.
    
    Closes #34403 from cloud-fan/parser.
    
    Lead-authored-by: Wenchen Fan <cloud0...@gmail.com>
    Co-authored-by: Wenchen Fan <wenc...@databricks.com>
    Signed-off-by: Wenchen Fan <wenc...@databricks.com>
---
 docs/sql-ref-ansi-compliance.md                              |  4 +++-
 .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala    |  2 +-
 .../org/apache/spark/sql/catalyst/parser/ParseDriver.scala   |  2 +-
 .../main/scala/org/apache/spark/sql/internal/SQLConf.scala   | 10 ++++++++++
 .../spark/sql/catalyst/parser/ExpressionParserSuite.scala    | 12 +++++++++++-
 .../sql/catalyst/parser/TableIdentifierParserSuite.scala     |  9 +++++++++
 6 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index c10e866..4527faa 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -299,7 +299,9 @@ When the ANSI mode is disabled, Spark SQL has two kinds of 
keywords:
 * Non-reserved keywords: Same definition as the one when the ANSI mode enabled.
 * Strict-non-reserved keywords: A strict version of non-reserved keywords, 
which can not be used as table alias.
 
-By default `spark.sql.ansi.enabled` is false.
+If you want to still use reserved keywords as identifiers with ANSI mode, you 
can set `spark.sql.ansi.enforceReservedKeywords` to false.
+
+By default `spark.sql.ansi.enabled` is false and 
`spark.sql.ansi.enforceReservedKeywords` is true.
 
 Below is a list of all the keywords in Spark SQL.
 
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index d36c7ac..768d406 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1728,7 +1728,7 @@ class AstBuilder extends SqlBaseBaseVisitor[AnyRef] with 
SQLConfHelper with Logg
   }
 
   override def visitCurrentLike(ctx: CurrentLikeContext): Expression = 
withOrigin(ctx) {
-    if (conf.ansiEnabled) {
+    if (conf.enforceReservedKeywords) {
       ctx.name.getType match {
         case SqlBaseParser.CURRENT_DATE =>
           CurrentDate()
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index 64216e6..b459a2d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -100,7 +100,7 @@ abstract class AbstractSqlParser extends ParserInterface 
with SQLConfHelper with
     parser.addErrorListener(ParseErrorListener)
     parser.legacy_setops_precedence_enabled = conf.setOpsPrecedenceEnforced
     parser.legacy_exponent_literal_as_decimal_enabled = 
conf.exponentLiteralAsDecimalEnabled
-    parser.SQL_standard_keyword_behavior = conf.ansiEnabled
+    parser.SQL_standard_keyword_behavior = conf.enforceReservedKeywords
 
     try {
       try {
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 5023b4a..fe3204b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -2590,6 +2590,14 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
+  val ENFORCE_RESERVED_KEYWORDS = 
buildConf("spark.sql.ansi.enforceReservedKeywords")
+    .doc(s"When true and '${ANSI_ENABLED.key}' is true, the Spark SQL parser 
enforces the ANSI " +
+      "reserved keywords and forbids SQL queries that use reserved keywords as 
alias names " +
+      "and/or identifiers for table, view, function, etc.")
+    .version("3.3.0")
+    .booleanConf
+    .createWithDefault(true)
+
   val SORT_BEFORE_REPARTITION =
     buildConf("spark.sql.execution.sortBeforeRepartition")
       .internal()
@@ -4041,6 +4049,8 @@ class SQLConf extends Serializable with Logging {
 
   def ansiEnabled: Boolean = getConf(ANSI_ENABLED)
 
+  def enforceReservedKeywords: Boolean = ansiEnabled && 
getConf(ENFORCE_RESERVED_KEYWORDS)
+
   def timestampType: AtomicType = getConf(TIMESTAMP_TYPE) match {
     case "TIMESTAMP_LTZ" =>
       // For historical reason, the TimestampType maps to TIMESTAMP WITH LOCAL 
TIME ZONE
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 0a49e3a..9a88b2a 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -933,10 +933,20 @@ class ExpressionParserSuite extends AnalysisTest {
       assertEqual("current_timestamp", CurrentTimestamp())
     }
 
-    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+    def testNonAnsiBehavior(): Unit = {
       assertEqual("current_date", UnresolvedAttribute.quoted("current_date"))
       assertEqual("current_timestamp", 
UnresolvedAttribute.quoted("current_timestamp"))
     }
+    withSQLConf(
+      SQLConf.ANSI_ENABLED.key -> "false",
+      SQLConf.ENFORCE_RESERVED_KEYWORDS.key -> "true") {
+      testNonAnsiBehavior()
+    }
+    withSQLConf(
+      SQLConf.ANSI_ENABLED.key -> "true",
+      SQLConf.ENFORCE_RESERVED_KEYWORDS.key -> "false") {
+      testNonAnsiBehavior()
+    }
   }
 
   test("SPARK-36736: (NOT) ILIKE (ANY | SOME | ALL) expressions") {
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
index 0e1a6df..4d87c7f 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
@@ -318,6 +318,15 @@ class TableIdentifierParserSuite extends SQLKeywordUtils {
         assert(TableIdentifier(keyword, Option("db")) === 
parseTableIdentifier(s"db.$keyword"))
       }
     }
+
+    withSQLConf(
+      SQLConf.ANSI_ENABLED.key -> "true",
+      SQLConf.ENFORCE_RESERVED_KEYWORDS.key -> "false") {
+      reservedKeywordsInAnsiMode.foreach { keyword =>
+        assert(TableIdentifier(keyword) === parseTableIdentifier(s"$keyword"))
+        assert(TableIdentifier(keyword, Option("db")) === 
parseTableIdentifier(s"db.$keyword"))
+      }
+    }
   }
 
   test("table identifier - strict keywords") {

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-37133][SQL] Add a config to optionally enforce ANSI reserved keywords

Reply via email to