This is an automated email from the ASF dual-hosted git repository.

ueshin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 63cef16ef77 [SPARK-44395][SQL] Update TVF arguments to require 
parentheses around identifier after TABLE keyword
63cef16ef77 is described below

commit 63cef16ef779addaa841fa863a5797fc9f33fd82
Author: Daniel Tenedorio <daniel.tenedo...@databricks.com>
AuthorDate: Thu Jul 13 14:04:09 2023 -0700

    [SPARK-44395][SQL] Update TVF arguments to require parentheses around 
identifier after TABLE keyword
    
    ### What changes were proposed in this pull request?
    
    This PR updates the parsing of table function arguments to require 
parentheses around identifier after the TABLE keyword. Instead of `TABLE t`, 
the syntax should look like `TABLE(t)` instead as specified in the SQL standard.
    
    * I kept the previous syntax without the parentheses as an optional case in 
the SQL grammar so that we can catch it in the `AstBuilder` and throw an 
informative error message telling the user to add parentheses and try the query 
again.
    
    * I had to swap the order of parsing table function arguments, so the 
`table(identifier)` syntax does not accidentally parse as a scalar function 
call:
    
    ```
    functionTableArgument
        : functionTableReferenceArgument
        | functionArgument
        ;
    ```
    
    ### Why are the changes needed?
    
    This syntax is written down in the SQL standard. Per the standard, `TABLE 
identifier` should actually be passed as `TABLE(identifier)`.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, SQL syntax changes slightly.
    
    ### How was this patch tested?
    
    This PR adds and updates unit test coverage.
    
    Closes #41965 from dtenedor/parentheses-table-clause.
    
    Authored-by: Daniel Tenedorio <daniel.tenedo...@databricks.com>
    Signed-off-by: Takuya UESHIN <ues...@databricks.com>
---
 .../src/main/resources/error/error-classes.json     |  5 +++++
 ...ror-conditions-invalid-sql-syntax-error-class.md |  4 ++++
 python/pyspark/sql/tests/test_udtf.py               | 10 +++++-----
 .../spark/sql/catalyst/parser/SqlBaseParser.g4      |  5 +++--
 .../spark/sql/catalyst/parser/AstBuilder.scala      |  6 ++++++
 .../spark/sql/errors/QueryParsingErrors.scala       | 11 +++++++++++
 .../spark/sql/catalyst/parser/PlanParserSuite.scala | 21 +++++++++++++++++----
 7 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/common/utils/src/main/resources/error/error-classes.json 
b/common/utils/src/main/resources/error/error-classes.json
index 6691e86b463..99a0a4ae4ba 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -1694,6 +1694,11 @@
           "Expected a column reference for transform <transform>: <expr>."
         ]
       },
+      "INVALID_TABLE_FUNCTION_IDENTIFIER_ARGUMENT_MISSING_PARENTHESES" : {
+        "message" : [
+          "Syntax error: call to table-valued function is invalid because 
parentheses are missing around the provided TABLE argument <argumentName>; 
please surround this with parentheses and try again."
+        ]
+      },
       "INVALID_TABLE_VALUED_FUNC_NAME" : {
         "message" : [
           "Table valued function cannot specify database name: <funcName>."
diff --git a/docs/sql-error-conditions-invalid-sql-syntax-error-class.md 
b/docs/sql-error-conditions-invalid-sql-syntax-error-class.md
index 6c9f588ba49..b1e298f7b90 100644
--- a/docs/sql-error-conditions-invalid-sql-syntax-error-class.md
+++ b/docs/sql-error-conditions-invalid-sql-syntax-error-class.md
@@ -49,6 +49,10 @@ Partition key `<partKey>` must set value.
 
 Expected a column reference for transform `<transform>`: `<expr>`.
 
+## INVALID_TABLE_FUNCTION_IDENTIFIER_ARGUMENT_MISSING_PARENTHESES
+
+Syntax error: call to table-valued function is invalid because parentheses are 
missing around the provided TABLE argument `<argumentName>`; please surround 
this with parentheses and try again.
+
 ## INVALID_TABLE_VALUED_FUNC_NAME
 
 Table valued function cannot specify database name: `<funcName>`.
diff --git a/python/pyspark/sql/tests/test_udtf.py 
b/python/pyspark/sql/tests/test_udtf.py
index e4db542cbb7..8f42b4123e4 100644
--- a/python/pyspark/sql/tests/test_udtf.py
+++ b/python/pyspark/sql/tests/test_udtf.py
@@ -545,7 +545,7 @@ class BaseUDTFTestsMixin:
         with self.tempView("v"):
             self.spark.sql("CREATE OR REPLACE TEMPORARY VIEW v as SELECT id 
FROM range(0, 8)")
             self.assertEqual(
-                self.spark.sql("SELECT * FROM test_udtf(TABLE v)").collect(),
+                self.spark.sql("SELECT * FROM test_udtf(TABLE (v))").collect(),
                 [Row(a=6), Row(a=7)],
             )
 
@@ -561,7 +561,7 @@ class BaseUDTFTestsMixin:
         with self.tempView("v"):
             self.spark.sql("CREATE OR REPLACE TEMPORARY VIEW v as SELECT id 
FROM range(0, 8)")
             self.assertEqual(
-                self.spark.sql("SELECT * FROM test_udtf(5, TABLE 
v)").collect(),
+                self.spark.sql("SELECT * FROM test_udtf(5, TABLE 
(v))").collect(),
                 [Row(a=6), Row(a=7)],
             )
 
@@ -575,7 +575,7 @@ class BaseUDTFTestsMixin:
         self.spark.udtf.register("test_udtf", func)
 
         with self.assertRaisesRegex(AnalysisException, 
"TABLE_OR_VIEW_NOT_FOUND"):
-            self.spark.sql("SELECT * FROM test_udtf(TABLE v)").collect()
+            self.spark.sql("SELECT * FROM test_udtf(TABLE (v))").collect()
 
     def test_udtf_with_table_argument_malformed_query(self):
         class TestUDTF:
@@ -637,7 +637,7 @@ class BaseUDTFTestsMixin:
                 WITH t AS (
                   SELECT id FROM range(0, 8)
                 )
-                SELECT * FROM test_udtf(TABLE t)
+                SELECT * FROM test_udtf(TABLE (t))
                 """
             ).collect(),
             [Row(a=6), Row(a=7)],
@@ -658,7 +658,7 @@ class BaseUDTFTestsMixin:
                 """
                 SELECT * FROM
                   range(0, 8) AS t,
-                  LATERAL test_udtf(TABLE t)
+                  LATERAL test_udtf(TABLE (t))
                 """
             ).collect(),
             [Row(a=6), Row(a=7)],
diff --git 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 0390785ab5d..85dbc499fbd 100644
--- 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -790,6 +790,7 @@ inlineTable
 
 functionTableSubqueryArgument
     : TABLE identifierReference
+    | TABLE LEFT_PAREN identifierReference RIGHT_PAREN
     | TABLE LEFT_PAREN query RIGHT_PAREN
     ;
 
@@ -803,8 +804,8 @@ functionTableReferenceArgument
     ;
 
 functionTableArgument
-    : functionArgument
-    | functionTableReferenceArgument
+    : functionTableReferenceArgument
+    | functionArgument
     ;
 
 functionTable
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 1fdfbb97abc..7a28efa3e42 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -1554,6 +1554,12 @@ class AstBuilder extends DataTypeAstBuilder with 
SQLConfHelper with Logging {
   override def visitFunctionTableSubqueryArgument(
       ctx: FunctionTableSubqueryArgumentContext): Expression = withOrigin(ctx) 
{
     val p = Option(ctx.identifierReference).map { r =>
+      // Make sure that the identifier after the TABLE keyword is surrounded 
by parentheses, as
+      // required by the SQL standard. If not, return an informative error 
message.
+      if (ctx.LEFT_PAREN() == null) {
+        throw 
QueryParsingErrors.invalidTableFunctionIdentifierArgumentMissingParentheses(
+          ctx, argumentName = ctx.identifierReference().getText)
+      }
       createUnresolvedRelation(r)
     }.getOrElse {
       plan(ctx.query)
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index c451f606c23..ced27626be7 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -669,4 +669,15 @@ private[sql] object QueryParsingErrors extends 
QueryErrorsBase {
       ctx
     )
   }
+
+  def invalidTableFunctionIdentifierArgumentMissingParentheses(
+      ctx: ParserRuleContext, argumentName: String): Throwable = {
+    new ParseException(
+      errorClass =
+        
"INVALID_SQL_SYNTAX.INVALID_TABLE_FUNCTION_IDENTIFIER_ARGUMENT_MISSING_PARENTHESES",
+      messageParameters = Map(
+        "argumentName" -> toSQLId(argumentName)),
+      ctx
+    )
+  }
 }
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 4bad3ced705..4a5d0a0ae29 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -1443,7 +1443,7 @@ class PlanParserSuite extends AnalysisTest {
 
   test("table valued function with table arguments") {
     assertEqual(
-      "select * from my_tvf(table v1, table (select 1))",
+      "select * from my_tvf(table (v1), table (select 1))",
       UnresolvedTableValuedFunction("my_tvf",
         FunctionTableSubqueryArgumentExpression(UnresolvedRelation(Seq("v1"))) 
::
           FunctionTableSubqueryArgumentExpression(
@@ -1451,7 +1451,7 @@ class PlanParserSuite extends AnalysisTest {
 
     // All named arguments
     assertEqual(
-      "select * from my_tvf(arg1 => table v1, arg2 => table (select 1))",
+      "select * from my_tvf(arg1 => table (v1), arg2 => table (select 1))",
       UnresolvedTableValuedFunction("my_tvf",
         NamedArgumentExpression("arg1",
           
FunctionTableSubqueryArgumentExpression(UnresolvedRelation(Seq("v1")))) ::
@@ -1461,7 +1461,7 @@ class PlanParserSuite extends AnalysisTest {
 
     // Unnamed and named arguments
     assertEqual(
-      "select * from my_tvf(2, table v1, arg1 => table (select 1))",
+      "select * from my_tvf(2, table (v1), arg1 => table (select 1))",
       UnresolvedTableValuedFunction("my_tvf",
         Literal(2) ::
           
FunctionTableSubqueryArgumentExpression(UnresolvedRelation(Seq("v1"))) ::
@@ -1471,12 +1471,25 @@ class PlanParserSuite extends AnalysisTest {
 
     // Mixed arguments
     assertEqual(
-      "select * from my_tvf(arg1 => table v1, 2, arg2 => true)",
+      "select * from my_tvf(arg1 => table (v1), 2, arg2 => true)",
       UnresolvedTableValuedFunction("my_tvf",
         NamedArgumentExpression("arg1",
           
FunctionTableSubqueryArgumentExpression(UnresolvedRelation(Seq("v1")))) ::
           Literal(2) ::
           NamedArgumentExpression("arg2", Literal(true)) :: 
Nil).select(star()))
+
+    // Negative tests:
+    // Parentheses are missing from the table argument.
+    val sql1 = "select * from my_tvf(arg1 => table v1)"
+    checkError(
+      exception = parseException(sql1),
+      errorClass =
+        
"INVALID_SQL_SYNTAX.INVALID_TABLE_FUNCTION_IDENTIFIER_ARGUMENT_MISSING_PARENTHESES",
+      parameters = Map("argumentName" -> "`v1`"),
+      context = ExpectedContext(
+        fragment = "table v1",
+        start = 29,
+        stop = sql1.length - 2))
   }
 
   test("SPARK-32106: TRANSFORM plan") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to