This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new 53383fcd2be [SPARK-44391][SQL][3.4] Check the number of argument types 
in `InvokeLike`
53383fcd2be is described below

commit 53383fcd2be178f4f0d231334ee36f1c3d67f64d
Author: Max Gekk <max.g...@gmail.com>
AuthorDate: Fri Jul 14 08:37:29 2023 +0300

    [SPARK-44391][SQL][3.4] Check the number of argument types in `InvokeLike`
    
    ### What changes were proposed in this pull request?
    In the PR, I propose to check the number of argument types in the 
`InvokeLike` expressions. If the input types are provided, the number of types 
should be exactly the same as the number of argument expressions.
    
    This is a backport of https://github.com/apache/spark/pull/41954.
    
    ### Why are the changes needed?
    1. This PR checks the contract described in the comment explicitly:
    
https://github.com/apache/spark/blob/d9248e83bbb3af49333608bebe7149b1aaeca738/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala#L247
    
    that can prevent the errors of expression implementations, and improve code 
maintainability.
    
    2. Also it fixes the issue in the `UrlEncode` and `UrlDecode`.
    
    ### Does this PR introduce _any_ user-facing change?
    No.
    
    ### How was this patch tested?
    By running the related tests:
    ```
    $ build/sbt "test:testOnly *UrlFunctionsSuite"
    $ build/sbt "test:testOnly *DataSourceV2FunctionSuite"
    ```
    
    Authored-by: Max Gekk <max.gekkgmail.com>
    (cherry picked from commit 3e82ac6ea3d9f87c8ac09e481235beefaa1bf758)
    
    Closes #41985 from MaxGekk/fix-url_decode-3.4.
    
    Authored-by: Max Gekk <max.g...@gmail.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 core/src/main/resources/error/error-classes.json          |  5 +++++
 .../sql-error-conditions-datatype-mismatch-error-class.md |  4 ++++
 .../spark/sql/catalyst/analysis/CheckAnalysis.scala       |  5 +++--
 .../spark/sql/catalyst/expressions/objects/objects.scala  | 15 +++++++++++++++
 .../spark/sql/catalyst/expressions/urlExpressions.scala   |  4 ++--
 5 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index febed9283d8..90dec2ee45e 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -468,6 +468,11 @@
           "The <exprName> must be between <valueRange> (current value = 
<currentValue>)."
         ]
       },
+      "WRONG_NUM_ARG_TYPES" : {
+        "message" : [
+          "The expression requires <expectedNum> argument types but the actual 
number is <actualNum>."
+        ]
+      },
       "WRONG_NUM_ENDPOINTS" : {
         "message" : [
           "The number of endpoints must be >= 2 to construct intervals but the 
actual number is <actualNumber>."
diff --git a/docs/sql-error-conditions-datatype-mismatch-error-class.md 
b/docs/sql-error-conditions-datatype-mismatch-error-class.md
index 6ccd63e6ee9..2178deca4f2 100644
--- a/docs/sql-error-conditions-datatype-mismatch-error-class.md
+++ b/docs/sql-error-conditions-datatype-mismatch-error-class.md
@@ -231,6 +231,10 @@ The input of `<functionName>` can't be `<dataType>` type 
data.
 
 The `<exprName>` must be between `<valueRange>` (current value = 
`<currentValue>`).
 
+## WRONG_NUM_ARG_TYPES
+
+The expression requires `<expectedNum>` argument types but the actual number 
is `<actualNum>`.
+
 ## WRONG_NUM_ENDPOINTS
 
 The number of endpoints must be >= 2 to construct intervals but the actual 
number is `<actualNumber>`.
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 223fdf12d6d..e717483ec94 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -288,8 +288,9 @@ trait CheckAnalysis extends PredicateHelper with 
LookupCatalog with QueryErrorsB
                 "srcType" -> c.child.dataType.catalogString,
                 "targetType" -> c.dataType.catalogString))
           case e: RuntimeReplaceable if !e.replacement.resolved =>
-            throw new IllegalStateException("Illegal RuntimeReplaceable: " + e 
+
-              "\nReplacement is unresolved: " + e.replacement)
+            throw SparkException.internalError(
+              s"Cannot resolve the runtime replaceable expression 
${toSQLExpr(e)}. " +
+              s"The replacement is unresolved: ${toSQLExpr(e.replacement)}.")
 
           case g: Grouping =>
             g.failAnalysis(errorClass = "_LEGACY_ERROR_TEMP_2445", 
messageParameters = Map.empty)
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 929beb660ad..7d708e30986 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -31,6 +31,7 @@ import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.serializer._
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.{InternalRow, ScalaReflection}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
@@ -47,6 +48,19 @@ import org.apache.spark.util.Utils
 trait InvokeLike extends Expression with NonSQLExpression with 
ImplicitCastInputTypes {
 
   def arguments: Seq[Expression]
+  protected def argumentTypes: Seq[AbstractDataType] = inputTypes
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    if (!argumentTypes.isEmpty && argumentTypes.length != arguments.length) {
+      TypeCheckResult.DataTypeMismatch(
+        errorSubClass = "WRONG_NUM_ARG_TYPES",
+        messageParameters = Map(
+          "expectedNum" -> arguments.length.toString,
+          "actualNum" -> argumentTypes.length.toString))
+    } else {
+      super.checkInputDataTypes()
+    }
+  }
 
   def propagateNull: Boolean
 
@@ -383,6 +397,7 @@ case class Invoke(
     } else {
       Nil
     }
+  override protected def argumentTypes: Seq[AbstractDataType] = 
methodInputTypes
 
   private lazy val encodedFunctionName = 
ScalaReflection.encodeFieldNameToIdentifier(functionName)
 
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala
index b3ba5656d44..47b37a5edeb 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala
@@ -57,7 +57,7 @@ case class UrlEncode(child: Expression)
       StringType,
       "encode",
       Seq(child, Literal("UTF-8")),
-      Seq(StringType))
+      Seq(StringType, StringType))
 
   override protected def withNewChildInternal(newChild: Expression): 
Expression = {
     copy(child = newChild)
@@ -94,7 +94,7 @@ case class UrlDecode(child: Expression)
       StringType,
       "decode",
       Seq(child, Literal("UTF-8")),
-      Seq(StringType))
+      Seq(StringType, StringType))
 
   override protected def withNewChildInternal(newChild: Expression): 
Expression = {
     copy(child = newChild)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to