This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push: new 7e90f4226dd [SPARK-42553][SQL][3.3] Ensure at least one time unit after "interval" 7e90f4226dd is described below commit 7e90f4226dd5e4fae1af5e88a334d87b330019cb Author: jiangyzanze <jiangyanze....@alibaba-inc.com> AuthorDate: Thu Mar 2 18:25:02 2023 +0300 [SPARK-42553][SQL][3.3] Ensure at least one time unit after "interval" ### What changes were proposed in this pull request? This PR aims to ensure "at least one time unit should be given for interval literal" by modifying SqlBaseParser. This is a backport of https://github.com/apache/spark/pull/40195 ### Why are the changes needed? INTERVAL is a Non-Reserved keyword in spark. But when I run ```shell scala> spark.sql("select interval from mytable") ``` I get ``` org.apache.spark.sql.catalyst.parser.ParseException: at least one time unit should be given for interval literal(line 1, pos 7)== SQL == select interval from mytable -------^^^ at org.apache.spark.sql.errors.QueryParsingErrors$.invalidIntervalLiteralError(QueryParsingErrors.scala:196) ...... ``` It is a bug because "Non-Reserved keywords" have a special meaning in particular contexts and can be used as identifiers in other contexts. So by design, INTERVAL can be used as a column name. Currently the interval's grammar is ``` interval : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)? ; ``` There is no need to make the time unit nullable, we can ensure "at least one time unit should be given for interval literal" if the interval's grammar is ``` interval : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval) ; ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test: PlanParsserSuite."SPARK-42553: NonReserved keyword 'interval' can be column name" Local test ```shell scala> val myDF = spark.sparkContext.makeRDD(1 to 5).toDF("interval") myDF: org.apache.spark.sql.DataFrame = [interval: int] scala> myDF.createOrReplaceTempView("mytable") scala> spark.sql("select interval from mytable;").show() +--------+ |interval| +--------+ | 1| | 2| | 3| | 4| | 5| +--------+ ``` Closes #40253 from jiang13021/branch-3.3-42553. Authored-by: jiangyzanze <jiangyanze....@alibaba-inc.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../apache/spark/sql/catalyst/parser/SqlBaseParser.g4 | 2 +- .../apache/spark/sql/catalyst/parser/AstBuilder.scala | 5 ++--- .../org/apache/spark/sql/errors/QueryParsingErrors.scala | 4 ---- .../sql/catalyst/parser/ExpressionParserSuite.scala | 3 --- .../spark/sql/catalyst/parser/PlanParserSuite.scala | 9 +++++++++ .../src/test/resources/sql-tests/inputs/interval.sql | 1 - .../resources/sql-tests/results/ansi/interval.sql.out | 16 +--------------- .../test/resources/sql-tests/results/interval.sql.out | 16 +--------------- 8 files changed, 14 insertions(+), 42 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 701d4bc5aa7..3ec4b9a833d 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -875,7 +875,7 @@ booleanValue ; interval - : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)? + : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval) ; errorCapturingMultiUnitsInterval diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index ecc5360a4f7..4152e24d3e9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2551,15 +2551,14 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit innerCtx.unitToUnitInterval) } visitMultiUnitsInterval(innerCtx.multiUnitsInterval) - } else if (ctx.errorCapturingUnitToUnitInterval != null) { + } else { + assert(ctx.errorCapturingUnitToUnitInterval != null) val innerCtx = ctx.errorCapturingUnitToUnitInterval if (innerCtx.error1 != null || innerCtx.error2 != null) { val errorCtx = if (innerCtx.error1 != null) innerCtx.error1 else innerCtx.error2 throw QueryParsingErrors.moreThanOneFromToUnitInIntervalLiteralError(errorCtx) } visitUnitToUnitInterval(innerCtx.body) - } else { - throw QueryParsingErrors.invalidIntervalLiteralError(ctx) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index e92ed3e3b07..b0b1ccba618 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -213,10 +213,6 @@ private[sql] object QueryParsingErrors extends QueryErrorsBase { new ParseException("Can only have a single from-to unit in the interval literal syntax", ctx) } - def invalidIntervalLiteralError(ctx: IntervalContext): Throwable = { - new ParseException("at least one time unit should be given for interval literal", ctx) - } - def invalidIntervalFormError(value: String, ctx: MultiUnitsIntervalContext): Throwable = { new ParseException("Can only use numbers in the interval value part for" + s" multiple unit value pairs interval form, but got invalid value: $value", ctx) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala index 9e63c817e74..4635033bec6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala @@ -744,9 +744,6 @@ class ExpressionParserSuite extends AnalysisTest { } } - // Empty interval statement - intercept("interval", "at least one time unit should be given for interval literal") - // Single Intervals. val forms = Seq("", "s") val values = Seq("0", "10", "-7", "21") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index d1787cb7666..276dc569989 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -1326,4 +1326,13 @@ class PlanParserSuite extends AnalysisTest { Literal(Decimal(0.1), DecimalType(1, 1)), true).toAggregateExpression() ) } + + test("SPARK-42553: NonReserved keyword 'interval' can be column name") { + comparePlans( + parsePlan("SELECT interval FROM VALUES ('abc') AS tbl(interval);"), + UnresolvedInlineTable( + Seq("interval"), + Seq(Literal("abc")) :: Nil).as("tbl").select('interval) + ) + } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql index 1bd86c45afe..e4da28c2e75 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql @@ -164,7 +164,6 @@ SELECT interval 'interval 2 weeks 2 days 1 hour 3 minutes 2 seconds 100 millisec SELECT interval '2 weeks 2 days 1 hour 3 minutes 2 seconds 100 millisecond 200 microseconds'; -- malformed interval literal -select interval; select interval 1 fake_unit; select interval 1 year to month; select interval '1' year to second; diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index f4ec0afb0cc..6d8a4403d0a 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 286 +-- Number of queries: 285 -- !query @@ -1191,20 +1191,6 @@ struct<INTERVAL '16 01:03:02.1002' DAY TO SECOND:interval day to second> 16 01:03:02.100200000 --- !query -select interval --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -at least one time unit should be given for interval literal(line 1, pos 7) - -== SQL == -select interval --------^^^ - - -- !query select interval 1 fake_unit -- !query schema diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index 71fb0c0845d..829ec2adcd3 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 286 +-- Number of queries: 285 -- !query @@ -1163,20 +1163,6 @@ struct<INTERVAL '16 01:03:02.1002' DAY TO SECOND:interval day to second> 16 01:03:02.100200000 --- !query -select interval --- !query schema -struct<> --- !query output -org.apache.spark.sql.catalyst.parser.ParseException - -at least one time unit should be given for interval literal(line 1, pos 7) - -== SQL == -select interval --------^^^ - - -- !query select interval 1 fake_unit -- !query schema --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org