This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 89f31df [SPARK-30822][SQL] Remove semicolon at the end of a sql query 89f31df is described below commit 89f31df3ffb20af8bd49c4bdb1f4624aa4feadf9 Author: samsetegne <samuelsete...@gmail.com> AuthorDate: Wed Mar 25 15:00:15 2020 +0800 [SPARK-30822][SQL] Remove semicolon at the end of a sql query # What changes were proposed in this pull request? This change proposes ignoring a terminating semicolon from queries submitted by the user (if included) instead of raising a parse exception. # Why are the changes needed? When a user submits a directly executable SQL statement terminated with a semicolon, they receive an `org.apache.spark.sql.catalyst.parser.ParseException` of `extraneous input ';' expecting <EOF>`. SQL-92 describes a direct SQL statement as having the format of `<directly executable statement> <semicolon>` and the majority of SQL implementations either require the semicolon as a statement terminator, or make it optional (meaning not raising an exception when it's included, seemingly i [...] # Does this PR introduce any user-facing change? No # How was this patch tested? Unit test added to `PlanParserSuite` ``` sbt> project catalyst sbt> testOnly *PlanParserSuite [info] - case insensitive (565 milliseconds) [info] - explain (9 milliseconds) [info] - set operations (41 milliseconds) [info] - common table expressions (31 milliseconds) [info] - simple select query (47 milliseconds) [info] - hive-style single-FROM statement (11 milliseconds) [info] - multi select query (32 milliseconds) [info] - query organization (41 milliseconds) [info] - insert into (12 milliseconds) [info] - aggregation (24 milliseconds) [info] - limit (11 milliseconds) [info] - window spec (11 milliseconds) [info] - lateral view (17 milliseconds) [info] - joins (62 milliseconds) [info] - sampled relations (11 milliseconds) [info] - sub-query (11 milliseconds) [info] - scalar sub-query (9 milliseconds) [info] - table reference (2 milliseconds) [info] - table valued function (8 milliseconds) [info] - SPARK-20311 range(N) as alias (2 milliseconds) [info] - SPARK-20841 Support table column aliases in FROM clause (3 milliseconds) [info] - SPARK-20962 Support subquery column aliases in FROM clause (4 milliseconds) [info] - SPARK-20963 Support aliases for join relations in FROM clause (3 milliseconds) [info] - inline table (23 milliseconds) [info] - simple select query with !> and !< (5 milliseconds) [info] - select hint syntax (34 milliseconds) [info] - SPARK-20854: select hint syntax with expressions (12 milliseconds) [info] - SPARK-20854: multiple hints (4 milliseconds) [info] - TRIM function (16 milliseconds) [info] - OVERLAY function (16 milliseconds) [info] - precedence of set operations (18 milliseconds) [info] - create/alter view as insert into table (4 milliseconds) [info] - Invalid insert constructs in the query (10 milliseconds) [info] - relation in v2 catalog (3 milliseconds) [info] - CTE with column alias (2 milliseconds) [info] - statement containing terminal semicolons (3 milliseconds) [info] ScalaTest [info] Run completed in 3 seconds, 129 milliseconds. [info] Total number of tests run: 36 [info] Suites: completed 1, aborted 0 [info] Tests: succeeded 36, failed 0, canceled 0, ignored 0, pending 0 [info] All tests passed. [info] Passed: Total 36, Failed 0, Errors 0, Passed 36 ``` ### Current behavior: #### scala ```scala scala> val df = sql("select 1") // df: org.apache.spark.sql.DataFrame = [1: int] scala> df.show() // +---+ // | 1| // +---+ // | 1| // +---+ scala> val df = sql("select 1;") // org.apache.spark.sql.catalyst.parser.ParseException: // extraneous input ';' expecting <EOF>(line 1, pos 8) // == SQL == // select 1; // --------^^^ // at org.apache.spark.sql.catalyst.parser.ParseException.withCommand(ParseDriver.scala:263) // at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:130) // at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:52) // at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parsePlan(ParseDriver.scala:76) // at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:605) // at org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111) // at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:605) // ... 47 elided ``` #### pyspark ```python df = spark.sql('select 1') df.show() #+---+ #| 1| #+---+ #| 1| #+---+ df = spark.sql('select 1;') # Traceback (most recent call last): # File "<stdin>", line 1, in <module> # File "/Users/ssetegne/spark/python/pyspark/sql/session.py", line 646, in sql # return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped) # File "/Users/ssetegne/spark/python/lib/py4j-0.10.8.1-src.zip/py4j/java_gateway.py", line 1286, in # __call__ # File "/Users/ssetegne/spark/python/pyspark/sql/utils.py", line 102, in deco # raise converted # pyspark.sql.utils.ParseException: # extraneous input ';' expecting <EOF>(line 1, pos 8) # == SQL == # select 1; # --------^^^ ``` ### Behavior after proposed fix: #### scala ```scala scala> val df = sql("select 1") // df: org.apache.spark.sql.DataFrame = [1: int] scala> df.show() // +---+ // | 1| // +---+ // | 1| // +---+ scala> val df = sql("select 1;") // df: org.apache.spark.sql.DataFrame = [1: int] scala> df.show() // +---+ // | 1| // +---+ // | 1| // +---+ ``` #### pyspark ```python df = spark.sql('select 1') df.show() #+---+ #| 1 | #+---+ #| 1 | #+---+ df = spark.sql('select 1;') df.show() #+---+ #| 1 | #+---+ #| 1 | #+---+ ``` Closes #27567 from samsetegne/semicolon. Authored-by: samsetegne <samuelsete...@gmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 44431d4b1a22c3db87d7e4a24df517d6d45905a8) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 2 +- .../org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala | 2 +- .../org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala | 7 +++++++ .../src/test/resources/sql-tests/results/describe-query.sql.out | 4 ++-- sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out | 4 ++-- sql/core/src/test/resources/sql-tests/results/interval.sql.out | 2 +- 6 files changed, 14 insertions(+), 7 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 563ef69..143a567 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -64,7 +64,7 @@ grammar SqlBase; } singleStatement - : statement EOF + : statement ';'* EOF ; singleExpression diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala index 35a54c8..543ea5d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala @@ -1480,7 +1480,7 @@ class DDLParserSuite extends AnalysisTest { AnalyzeColumnStatement(Seq("a", "b", "c"), None, allColumns = true)) intercept("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR ALL COLUMNS key, value", - "mismatched input 'key' expecting <EOF>") + "mismatched input 'key' expecting {<EOF>, ';'}") intercept("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR ALL", "missing 'COLUMNS' at '<EOF>'") } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 875096f..11230e4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -922,4 +922,11 @@ class PlanParserSuite extends AnalysisTest { "WITH t(x) AS (SELECT c FROM a) SELECT * FROM t", cte(table("t").select(star()), "t" -> ((table("a").select('c), Seq("x"))))) } + + test("statement containing terminal semicolons") { + assertEqual("select 1;", OneRowRelation().select(1)) + assertEqual("select a, b;", OneRowRelation().select('a, 'b)) + assertEqual("select a, b from db.c;;;", table("db", "c").select('a, 'b)) + assertEqual("select a, b from db.c; ;; ;", table("db", "c").select('a, 'b)) + } } diff --git a/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out index 6b16aba..2199fc0 100644 --- a/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out @@ -112,7 +112,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -mismatched input 'desc_temp1' expecting {<EOF>, '.'}(line 1, pos 21) +mismatched input 'desc_temp1' expecting {<EOF>, ';'}(line 1, pos 21) == SQL == DESCRIBE INSERT INTO desc_temp1 values (1, 'val1') @@ -126,7 +126,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -mismatched input 'desc_temp1' expecting {<EOF>, '.'}(line 1, pos 21) +mismatched input 'desc_temp1' expecting {<EOF>, ';'}(line 1, pos 21) == SQL == DESCRIBE INSERT INTO desc_temp1 SELECT * FROM desc_temp2 diff --git a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out index 8eeabb3..0a5fe7a 100644 --- a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out @@ -138,7 +138,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -extraneous input 'ROLLUP' expecting <EOF>(line 1, pos 53) +extraneous input 'ROLLUP' expecting {<EOF>, ';'}(line 1, pos 53) == SQL == SELECT a, b, c, count(d) FROM grouping GROUP BY WITH ROLLUP @@ -152,7 +152,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -extraneous input 'CUBE' expecting <EOF>(line 1, pos 53) +extraneous input 'CUBE' expecting {<EOF>, ';'}(line 1, pos 53) == SQL == SELECT a, b, c, count(d) FROM grouping GROUP BY WITH CUBE diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index 7a3dd74..0d758ca 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -639,7 +639,7 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException -extraneous input 'day' expecting <EOF>(line 1, pos 27) +extraneous input 'day' expecting {<EOF>, ';'}(line 1, pos 27) == SQL == select interval 30 day day day --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org