This is an automated email from the ASF dual-hosted git repository. ueshin pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 63cef16ef77 [SPARK-44395][SQL] Update TVF arguments to require parentheses around identifier after TABLE keyword 63cef16ef77 is described below commit 63cef16ef779addaa841fa863a5797fc9f33fd82 Author: Daniel Tenedorio <daniel.tenedo...@databricks.com> AuthorDate: Thu Jul 13 14:04:09 2023 -0700 [SPARK-44395][SQL] Update TVF arguments to require parentheses around identifier after TABLE keyword ### What changes were proposed in this pull request? This PR updates the parsing of table function arguments to require parentheses around identifier after the TABLE keyword. Instead of `TABLE t`, the syntax should look like `TABLE(t)` instead as specified in the SQL standard. * I kept the previous syntax without the parentheses as an optional case in the SQL grammar so that we can catch it in the `AstBuilder` and throw an informative error message telling the user to add parentheses and try the query again. * I had to swap the order of parsing table function arguments, so the `table(identifier)` syntax does not accidentally parse as a scalar function call: ``` functionTableArgument : functionTableReferenceArgument | functionArgument ; ``` ### Why are the changes needed? This syntax is written down in the SQL standard. Per the standard, `TABLE identifier` should actually be passed as `TABLE(identifier)`. ### Does this PR introduce _any_ user-facing change? Yes, SQL syntax changes slightly. ### How was this patch tested? This PR adds and updates unit test coverage. Closes #41965 from dtenedor/parentheses-table-clause. Authored-by: Daniel Tenedorio <daniel.tenedo...@databricks.com> Signed-off-by: Takuya UESHIN <ues...@databricks.com> --- .../src/main/resources/error/error-classes.json | 5 +++++ ...ror-conditions-invalid-sql-syntax-error-class.md | 4 ++++ python/pyspark/sql/tests/test_udtf.py | 10 +++++----- .../spark/sql/catalyst/parser/SqlBaseParser.g4 | 5 +++-- .../spark/sql/catalyst/parser/AstBuilder.scala | 6 ++++++ .../spark/sql/errors/QueryParsingErrors.scala | 11 +++++++++++ .../spark/sql/catalyst/parser/PlanParserSuite.scala | 21 +++++++++++++++++---- 7 files changed, 51 insertions(+), 11 deletions(-) diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index 6691e86b463..99a0a4ae4ba 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -1694,6 +1694,11 @@ "Expected a column reference for transform <transform>: <expr>." ] }, + "INVALID_TABLE_FUNCTION_IDENTIFIER_ARGUMENT_MISSING_PARENTHESES" : { + "message" : [ + "Syntax error: call to table-valued function is invalid because parentheses are missing around the provided TABLE argument <argumentName>; please surround this with parentheses and try again." + ] + }, "INVALID_TABLE_VALUED_FUNC_NAME" : { "message" : [ "Table valued function cannot specify database name: <funcName>." diff --git a/docs/sql-error-conditions-invalid-sql-syntax-error-class.md b/docs/sql-error-conditions-invalid-sql-syntax-error-class.md index 6c9f588ba49..b1e298f7b90 100644 --- a/docs/sql-error-conditions-invalid-sql-syntax-error-class.md +++ b/docs/sql-error-conditions-invalid-sql-syntax-error-class.md @@ -49,6 +49,10 @@ Partition key `<partKey>` must set value. Expected a column reference for transform `<transform>`: `<expr>`. +## INVALID_TABLE_FUNCTION_IDENTIFIER_ARGUMENT_MISSING_PARENTHESES + +Syntax error: call to table-valued function is invalid because parentheses are missing around the provided TABLE argument `<argumentName>`; please surround this with parentheses and try again. + ## INVALID_TABLE_VALUED_FUNC_NAME Table valued function cannot specify database name: `<funcName>`. diff --git a/python/pyspark/sql/tests/test_udtf.py b/python/pyspark/sql/tests/test_udtf.py index e4db542cbb7..8f42b4123e4 100644 --- a/python/pyspark/sql/tests/test_udtf.py +++ b/python/pyspark/sql/tests/test_udtf.py @@ -545,7 +545,7 @@ class BaseUDTFTestsMixin: with self.tempView("v"): self.spark.sql("CREATE OR REPLACE TEMPORARY VIEW v as SELECT id FROM range(0, 8)") self.assertEqual( - self.spark.sql("SELECT * FROM test_udtf(TABLE v)").collect(), + self.spark.sql("SELECT * FROM test_udtf(TABLE (v))").collect(), [Row(a=6), Row(a=7)], ) @@ -561,7 +561,7 @@ class BaseUDTFTestsMixin: with self.tempView("v"): self.spark.sql("CREATE OR REPLACE TEMPORARY VIEW v as SELECT id FROM range(0, 8)") self.assertEqual( - self.spark.sql("SELECT * FROM test_udtf(5, TABLE v)").collect(), + self.spark.sql("SELECT * FROM test_udtf(5, TABLE (v))").collect(), [Row(a=6), Row(a=7)], ) @@ -575,7 +575,7 @@ class BaseUDTFTestsMixin: self.spark.udtf.register("test_udtf", func) with self.assertRaisesRegex(AnalysisException, "TABLE_OR_VIEW_NOT_FOUND"): - self.spark.sql("SELECT * FROM test_udtf(TABLE v)").collect() + self.spark.sql("SELECT * FROM test_udtf(TABLE (v))").collect() def test_udtf_with_table_argument_malformed_query(self): class TestUDTF: @@ -637,7 +637,7 @@ class BaseUDTFTestsMixin: WITH t AS ( SELECT id FROM range(0, 8) ) - SELECT * FROM test_udtf(TABLE t) + SELECT * FROM test_udtf(TABLE (t)) """ ).collect(), [Row(a=6), Row(a=7)], @@ -658,7 +658,7 @@ class BaseUDTFTestsMixin: """ SELECT * FROM range(0, 8) AS t, - LATERAL test_udtf(TABLE t) + LATERAL test_udtf(TABLE (t)) """ ).collect(), [Row(a=6), Row(a=7)], diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 0390785ab5d..85dbc499fbd 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -790,6 +790,7 @@ inlineTable functionTableSubqueryArgument : TABLE identifierReference + | TABLE LEFT_PAREN identifierReference RIGHT_PAREN | TABLE LEFT_PAREN query RIGHT_PAREN ; @@ -803,8 +804,8 @@ functionTableReferenceArgument ; functionTableArgument - : functionArgument - | functionTableReferenceArgument + : functionTableReferenceArgument + | functionArgument ; functionTable diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 1fdfbb97abc..7a28efa3e42 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1554,6 +1554,12 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging { override def visitFunctionTableSubqueryArgument( ctx: FunctionTableSubqueryArgumentContext): Expression = withOrigin(ctx) { val p = Option(ctx.identifierReference).map { r => + // Make sure that the identifier after the TABLE keyword is surrounded by parentheses, as + // required by the SQL standard. If not, return an informative error message. + if (ctx.LEFT_PAREN() == null) { + throw QueryParsingErrors.invalidTableFunctionIdentifierArgumentMissingParentheses( + ctx, argumentName = ctx.identifierReference().getText) + } createUnresolvedRelation(r) }.getOrElse { plan(ctx.query) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index c451f606c23..ced27626be7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -669,4 +669,15 @@ private[sql] object QueryParsingErrors extends QueryErrorsBase { ctx ) } + + def invalidTableFunctionIdentifierArgumentMissingParentheses( + ctx: ParserRuleContext, argumentName: String): Throwable = { + new ParseException( + errorClass = + "INVALID_SQL_SYNTAX.INVALID_TABLE_FUNCTION_IDENTIFIER_ARGUMENT_MISSING_PARENTHESES", + messageParameters = Map( + "argumentName" -> toSQLId(argumentName)), + ctx + ) + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 4bad3ced705..4a5d0a0ae29 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -1443,7 +1443,7 @@ class PlanParserSuite extends AnalysisTest { test("table valued function with table arguments") { assertEqual( - "select * from my_tvf(table v1, table (select 1))", + "select * from my_tvf(table (v1), table (select 1))", UnresolvedTableValuedFunction("my_tvf", FunctionTableSubqueryArgumentExpression(UnresolvedRelation(Seq("v1"))) :: FunctionTableSubqueryArgumentExpression( @@ -1451,7 +1451,7 @@ class PlanParserSuite extends AnalysisTest { // All named arguments assertEqual( - "select * from my_tvf(arg1 => table v1, arg2 => table (select 1))", + "select * from my_tvf(arg1 => table (v1), arg2 => table (select 1))", UnresolvedTableValuedFunction("my_tvf", NamedArgumentExpression("arg1", FunctionTableSubqueryArgumentExpression(UnresolvedRelation(Seq("v1")))) :: @@ -1461,7 +1461,7 @@ class PlanParserSuite extends AnalysisTest { // Unnamed and named arguments assertEqual( - "select * from my_tvf(2, table v1, arg1 => table (select 1))", + "select * from my_tvf(2, table (v1), arg1 => table (select 1))", UnresolvedTableValuedFunction("my_tvf", Literal(2) :: FunctionTableSubqueryArgumentExpression(UnresolvedRelation(Seq("v1"))) :: @@ -1471,12 +1471,25 @@ class PlanParserSuite extends AnalysisTest { // Mixed arguments assertEqual( - "select * from my_tvf(arg1 => table v1, 2, arg2 => true)", + "select * from my_tvf(arg1 => table (v1), 2, arg2 => true)", UnresolvedTableValuedFunction("my_tvf", NamedArgumentExpression("arg1", FunctionTableSubqueryArgumentExpression(UnresolvedRelation(Seq("v1")))) :: Literal(2) :: NamedArgumentExpression("arg2", Literal(true)) :: Nil).select(star())) + + // Negative tests: + // Parentheses are missing from the table argument. + val sql1 = "select * from my_tvf(arg1 => table v1)" + checkError( + exception = parseException(sql1), + errorClass = + "INVALID_SQL_SYNTAX.INVALID_TABLE_FUNCTION_IDENTIFIER_ARGUMENT_MISSING_PARENTHESES", + parameters = Map("argumentName" -> "`v1`"), + context = ExpectedContext( + fragment = "table v1", + start = 29, + stop = sql1.length - 2)) } test("SPARK-32106: TRANSFORM plan") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org