This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 5a17537aa4a [SPARK-42979][SQL] Define literal constructors as keywords 5a17537aa4a is described below commit 5a17537aa4a777429431542cfa6184591476e54a Author: Max Gekk <max.g...@gmail.com> AuthorDate: Thu Mar 30 17:43:54 2023 +0300 [SPARK-42979][SQL] Define literal constructors as keywords ### What changes were proposed in this pull request? In the PR, I propose to define literal constructors `DATE`, `TIMESTAMP`, `TIMESTAMP_NTZ`, `TIMESTAMP_LTZ`, `INTERVAL`, and `X` as Spark SQL keywords. ### Why are the changes needed? The non-keywords literal constructors cause some inconveniences while analysing/transforming the lexer tree. For example, while forming the stable column aliases, see https://github.com/apache/spark/pull/40126. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By running the affected test suites: ``` $ build/sbt "test:testOnly *SQLKeywordSuite" $ build/sbt "test:testOnly *.ResolveAliasesSuite" ``` Closes #40593 from MaxGekk/typed-literal-keywords. Authored-by: Max Gekk <max.g...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- docs/sql-ref-ansi-compliance.md | 1 + .../spark/sql/catalyst/parser/SqlBaseLexer.g4 | 1 + .../spark/sql/catalyst/parser/SqlBaseParser.g4 | 12 ++++++++++- .../spark/sql/catalyst/parser/AstBuilder.scala | 23 +++++++++++----------- .../catalyst/analysis/ResolveAliasesSuite.scala | 4 ++-- 5 files changed, 27 insertions(+), 14 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 36d1f8f73eb..d4bb0e93bee 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -672,6 +672,7 @@ Below is a list of all the keywords in Spark SQL. |WINDOW|non-reserved|non-reserved|reserved| |WITH|reserved|non-reserved|reserved| |WITHIN|reserved|non-reserved|reserved| +|X|non-reserved|non-reserved|non-reserved| |YEAR|non-reserved|non-reserved|non-reserved| |YEARS|non-reserved|non-reserved|non-reserved| |ZONE|non-reserved|non-reserved|non-reserved| diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 index 4d446b494f7..c9930fa0986 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 @@ -208,6 +208,7 @@ GRANT: 'GRANT'; GROUP: 'GROUP'; GROUPING: 'GROUPING'; HAVING: 'HAVING'; +BINARY_HEX: 'X'; HOUR: 'HOUR'; HOURS: 'HOURS'; IF: 'IF'; diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index ab54aef35df..a112b6e31fe 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -928,11 +928,19 @@ primaryExpression (FILTER LEFT_PAREN WHERE where=booleanExpression RIGHT_PAREN)? ( OVER windowSpec)? #percentile ; +literalType + : DATE + | TIMESTAMP | TIMESTAMP_LTZ | TIMESTAMP_NTZ + | INTERVAL + | BINARY_HEX + | unsupportedType=identifier + ; + constant : NULL #nullLiteral | COLON identifier #parameterLiteral | interval #intervalLiteral - | identifier stringLit #typeConstructor + | literalType stringLit #typeConstructor | number #numericLiteral | booleanValue #booleanLiteral | stringLit+ #stringLiteral @@ -1227,6 +1235,7 @@ ansiNonReserved | BETWEEN | BIGINT | BINARY + | BINARY_HEX | BOOLEAN | BUCKET | BUCKETS @@ -1514,6 +1523,7 @@ nonReserved | BETWEEN | BIGINT | BINARY + | BINARY_HEX | BOOLEAN | BOTH | BUCKET diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 86880a82bb3..cb06fc31f0e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2398,11 +2398,11 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit */ override def visitTypeConstructor(ctx: TypeConstructorContext): Literal = withOrigin(ctx) { val value = string(visitStringLit(ctx.stringLit)) - val valueType = ctx.identifier.getText.toUpperCase(Locale.ROOT) + val valueType = ctx.literalType.start.getType def toLiteral[T](f: UTF8String => Option[T], t: DataType): Literal = { f(UTF8String.fromString(value)).map(Literal(_, t)).getOrElse { - throw QueryParsingErrors.cannotParseValueTypeError(valueType, value, ctx) + throw QueryParsingErrors.cannotParseValueTypeError(ctx.literalType.getText, value, ctx) } } @@ -2413,17 +2413,17 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit } valueType match { - case "DATE" => + case DATE => val zoneId = getZoneId(conf.sessionLocalTimeZone) val specialDate = convertSpecialDate(value, zoneId).map(Literal(_, DateType)) specialDate.getOrElse(toLiteral(stringToDate, DateType)) - case "TIMESTAMP_NTZ" => + case TIMESTAMP_NTZ => convertSpecialTimestampNTZ(value, getZoneId(conf.sessionLocalTimeZone)) .map(Literal(_, TimestampNTZType)) .getOrElse(toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType)) - case "TIMESTAMP_LTZ" => + case TIMESTAMP_LTZ => constructTimestampLTZLiteral(value) - case "TIMESTAMP" => + case TIMESTAMP => SQLConf.get.timestampType match { case TimestampNTZType => convertSpecialTimestampNTZ(value, getZoneId(conf.sessionLocalTimeZone)) @@ -2444,12 +2444,13 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit constructTimestampLTZLiteral(value) } - case "INTERVAL" => + case INTERVAL => val interval = try { IntervalUtils.stringToInterval(UTF8String.fromString(value)) } catch { case e: IllegalArgumentException => - val ex = QueryParsingErrors.cannotParseValueTypeError(valueType, value, ctx) + val ex = QueryParsingErrors.cannotParseValueTypeError( + ctx.literalType.getText, value, ctx) ex.setStackTrace(e.getStackTrace) throw ex } @@ -2462,7 +2463,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit } else { Literal(interval, CalendarIntervalType) } - case "X" => + case BINARY_HEX => val padding = if (value.length % 2 != 0) "0" else "" try { Literal(Hex.decodeHex(padding + value)) @@ -2472,9 +2473,9 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit ex.setStackTrace(e.getStackTrace) throw ex } - case other => + case _ => throw QueryParsingErrors.literalValueTypeUnsupportedError( - unsupportedType = other, + unsupportedType = ctx.literalType.getText, supportedTypes = Seq("DATE", "TIMESTAMP_NTZ", "TIMESTAMP_LTZ", "TIMESTAMP", "INTERVAL", "X"), ctx) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveAliasesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveAliasesSuite.scala index 071304d5762..6513db43639 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveAliasesSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveAliasesSuite.scala @@ -103,9 +103,9 @@ class ResolveAliasesSuite extends AnalysisTest { """"abc"""" -> """"abc"""", """'\t\n xyz \t\r'""" -> """'\t\n xyz \t\r'""", "1l" -> "1L", "1S" -> "1S", - "date'-0001-1-28'" -> "date'-0001-1-28'", + "date'-0001-1-28'" -> "DATE'-0001-1-28'", "interval 3 year 1 month" -> "INTERVAL3YEAR1MONTH", - "x'00'" -> "x'00'", + "x'00'" -> "X'00'", // Preserve case "CAST(1 as tinyint)" -> "CAST(1ASTINYINT)", // Brackets --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org