This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new ee1c624fa03 [SPARK-40585][SQL][FOLLOWUP] Simplify string literal implementation in SQL parser ee1c624fa03 is described below commit ee1c624fa03d15e6b71b0ecef044ee3c9167a812 Author: Gengliang Wang <gengli...@apache.org> AuthorDate: Thu Oct 6 10:55:49 2022 -0700 [SPARK-40585][SQL][FOLLOWUP] Simplify string literal implementation in SQL parser ### What changes were proposed in this pull request? * Simplify string literal implementation in SQL parser by using `stringLit` as many as possible in SqlBaseParser.g4 * rename `double_quoted_identifiers` as `doubleQuotedIdentifiers` for keeping naming style consistent. ### Why are the changes needed? Simplify string literal implementation in SQL parser. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing UT Closes #38111 from gengliangwang/refactor. Authored-by: Gengliang Wang <gengli...@apache.org> Signed-off-by: Gengliang Wang <gengli...@apache.org> --- .../spark/sql/catalyst/parser/SqlBaseParser.g4 | 12 +++------ .../spark/sql/catalyst/parser/AstBuilder.scala | 30 +++++----------------- .../spark/sql/catalyst/parser/ParseDriver.scala | 2 +- .../org/apache/spark/sql/internal/SQLConf.scala | 2 +- .../spark/sql/execution/SparkSqlParser.scala | 6 ++--- 5 files changed, 14 insertions(+), 38 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 5b61c767fbe..d353573a31e 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -225,8 +225,7 @@ statement ; timezone - : STRING - | {!double_quoted_identifiers}? DOUBLEQUOTED_STRING + : stringLit | LOCAL ; @@ -911,8 +910,7 @@ unitToUnitInterval intervalValue : (PLUS | MINUS)? - (INTEGER_VALUE | DECIMAL_VALUE | STRING - | {!double_quoted_identifiers}? DOUBLEQUOTED_STRING) + (INTEGER_VALUE | DECIMAL_VALUE | stringLit) ; colPosition @@ -1079,15 +1077,13 @@ stringLit ; comment - : STRING - | {!double_quoted_identifiers}? DOUBLEQUOTED_STRING + : stringLit | NULL ; version : INTEGER_VALUE - | STRING - | {!double_quoted_identifiers}? DOUBLEQUOTED_STRING + | stringLit ; // When `SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index fed058a0e9a..ca261b7ab77 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1285,10 +1285,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit if (ctx != null) { if (ctx.INTEGER_VALUE != null) { Some(ctx.INTEGER_VALUE().getText) - } else if (ctx.DOUBLEQUOTED_STRING() != null) { - Option(ctx.DOUBLEQUOTED_STRING()).map(string) } else { - Option(ctx.STRING()).map(string) + Option(string(visitStringLit(ctx.stringLit()))) } } else { None @@ -2614,14 +2612,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit assert(units.length == values.length) val kvs = units.indices.map { i => val u = units(i).getText - val v = if (values(i).STRING() != null || values(i).DOUBLEQUOTED_STRING() != null) { - val value = string(if (values(i).STRING() != null) { - values(i).STRING() - } - else { - values(i).DOUBLEQUOTED_STRING() - } - ) + val v = if (values(i).stringLit() != null) { + val value = string(visitStringLit(values(i).stringLit())) // SPARK-32840: For invalid cases, e.g. INTERVAL '1 day 2' hour, // INTERVAL 'interval 1' day, we need to check ahead before they are concatenated with // units and become valid ones, e.g. '1 day 2 hour'. @@ -2659,12 +2651,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit */ override def visitUnitToUnitInterval(ctx: UnitToUnitIntervalContext): CalendarInterval = { withOrigin(ctx) { - val value = Option(if (ctx.intervalValue.STRING != null) { - ctx.intervalValue.STRING - } else { - ctx.intervalValue.DOUBLEQUOTED_STRING - } - ).map(string).map { interval => + val value = Option(ctx.intervalValue().stringLit()).map(s => string(visitStringLit(s))) + .map { interval => if (ctx.intervalValue().MINUS() == null) { interval } else if (interval.startsWith("-")) { @@ -4605,13 +4593,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit } override def visitComment (ctx: CommentContext): String = { - if (ctx.STRING() != null) { - string(ctx.STRING) - } else if (ctx.DOUBLEQUOTED_STRING() != null) { - string(ctx.DOUBLEQUOTED_STRING()) - } else { - "" - } + Option(ctx.stringLit()).map(s => string(visitStringLit(s))).getOrElse("") } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index f2212bf2437..498d2d9ee13 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -118,7 +118,7 @@ abstract class AbstractSqlParser extends ParserInterface with SQLConfHelper with parser.legacy_setops_precedence_enabled = conf.setOpsPrecedenceEnforced parser.legacy_exponent_literal_as_decimal_enabled = conf.exponentLiteralAsDecimalEnabled parser.SQL_standard_keyword_behavior = conf.enforceReservedKeywords - parser.double_quoted_identifiers = conf.double_quoted_identifiers + parser.double_quoted_identifiers = conf.doubleQuotedIdentifiers try { try { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 5a5f3a83a2a..376bcece3c6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -4592,7 +4592,7 @@ class SQLConf extends Serializable with Logging { def enforceReservedKeywords: Boolean = ansiEnabled && getConf(ENFORCE_RESERVED_KEYWORDS) - def double_quoted_identifiers: Boolean = getConf(DOUBLE_QUOTED_IDENTIFIERS) + def doubleQuotedIdentifiers: Boolean = getConf(DOUBLE_QUOTED_IDENTIFIERS) def timestampType: AtomicType = getConf(TIMESTAMP_TYPE) match { case "TIMESTAMP_LTZ" => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala index 5719b0566df..086fdebd205 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala @@ -168,10 +168,8 @@ class SparkSqlAstBuilder extends AstBuilder { } override def visitTimezone (ctx: TimezoneContext): String = { - if (ctx.STRING() != null) { - string(ctx.STRING) - } else if (ctx.DOUBLEQUOTED_STRING() != null) { - string(ctx.DOUBLEQUOTED_STRING()) + if (ctx.stringLit() != null) { + string(visitStringLit(ctx.stringLit())) } else { TimeZone.getDefault.getID } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org