This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new 53383fcd2be [SPARK-44391][SQL][3.4] Check the number of argument types in `InvokeLike` 53383fcd2be is described below commit 53383fcd2be178f4f0d231334ee36f1c3d67f64d Author: Max Gekk <max.g...@gmail.com> AuthorDate: Fri Jul 14 08:37:29 2023 +0300 [SPARK-44391][SQL][3.4] Check the number of argument types in `InvokeLike` ### What changes were proposed in this pull request? In the PR, I propose to check the number of argument types in the `InvokeLike` expressions. If the input types are provided, the number of types should be exactly the same as the number of argument expressions. This is a backport of https://github.com/apache/spark/pull/41954. ### Why are the changes needed? 1. This PR checks the contract described in the comment explicitly: https://github.com/apache/spark/blob/d9248e83bbb3af49333608bebe7149b1aaeca738/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala#L247 that can prevent the errors of expression implementations, and improve code maintainability. 2. Also it fixes the issue in the `UrlEncode` and `UrlDecode`. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By running the related tests: ``` $ build/sbt "test:testOnly *UrlFunctionsSuite" $ build/sbt "test:testOnly *DataSourceV2FunctionSuite" ``` Authored-by: Max Gekk <max.gekkgmail.com> (cherry picked from commit 3e82ac6ea3d9f87c8ac09e481235beefaa1bf758) Closes #41985 from MaxGekk/fix-url_decode-3.4. Authored-by: Max Gekk <max.g...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- core/src/main/resources/error/error-classes.json | 5 +++++ .../sql-error-conditions-datatype-mismatch-error-class.md | 4 ++++ .../spark/sql/catalyst/analysis/CheckAnalysis.scala | 5 +++-- .../spark/sql/catalyst/expressions/objects/objects.scala | 15 +++++++++++++++ .../spark/sql/catalyst/expressions/urlExpressions.scala | 4 ++-- 5 files changed, 29 insertions(+), 4 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index febed9283d8..90dec2ee45e 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -468,6 +468,11 @@ "The <exprName> must be between <valueRange> (current value = <currentValue>)." ] }, + "WRONG_NUM_ARG_TYPES" : { + "message" : [ + "The expression requires <expectedNum> argument types but the actual number is <actualNum>." + ] + }, "WRONG_NUM_ENDPOINTS" : { "message" : [ "The number of endpoints must be >= 2 to construct intervals but the actual number is <actualNumber>." diff --git a/docs/sql-error-conditions-datatype-mismatch-error-class.md b/docs/sql-error-conditions-datatype-mismatch-error-class.md index 6ccd63e6ee9..2178deca4f2 100644 --- a/docs/sql-error-conditions-datatype-mismatch-error-class.md +++ b/docs/sql-error-conditions-datatype-mismatch-error-class.md @@ -231,6 +231,10 @@ The input of `<functionName>` can't be `<dataType>` type data. The `<exprName>` must be between `<valueRange>` (current value = `<currentValue>`). +## WRONG_NUM_ARG_TYPES + +The expression requires `<expectedNum>` argument types but the actual number is `<actualNum>`. + ## WRONG_NUM_ENDPOINTS The number of endpoints must be >= 2 to construct intervals but the actual number is `<actualNumber>`. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 223fdf12d6d..e717483ec94 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -288,8 +288,9 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB "srcType" -> c.child.dataType.catalogString, "targetType" -> c.dataType.catalogString)) case e: RuntimeReplaceable if !e.replacement.resolved => - throw new IllegalStateException("Illegal RuntimeReplaceable: " + e + - "\nReplacement is unresolved: " + e.replacement) + throw SparkException.internalError( + s"Cannot resolve the runtime replaceable expression ${toSQLExpr(e)}. " + + s"The replacement is unresolved: ${toSQLExpr(e.replacement)}.") case g: Grouping => g.failAnalysis(errorClass = "_LEGACY_ERROR_TEMP_2445", messageParameters = Map.empty) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala index 929beb660ad..7d708e30986 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala @@ -31,6 +31,7 @@ import org.apache.spark.{SparkConf, SparkEnv} import org.apache.spark.serializer._ import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.{InternalRow, ScalaReflection} +import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ @@ -47,6 +48,19 @@ import org.apache.spark.util.Utils trait InvokeLike extends Expression with NonSQLExpression with ImplicitCastInputTypes { def arguments: Seq[Expression] + protected def argumentTypes: Seq[AbstractDataType] = inputTypes + + override def checkInputDataTypes(): TypeCheckResult = { + if (!argumentTypes.isEmpty && argumentTypes.length != arguments.length) { + TypeCheckResult.DataTypeMismatch( + errorSubClass = "WRONG_NUM_ARG_TYPES", + messageParameters = Map( + "expectedNum" -> arguments.length.toString, + "actualNum" -> argumentTypes.length.toString)) + } else { + super.checkInputDataTypes() + } + } def propagateNull: Boolean @@ -383,6 +397,7 @@ case class Invoke( } else { Nil } + override protected def argumentTypes: Seq[AbstractDataType] = methodInputTypes private lazy val encodedFunctionName = ScalaReflection.encodeFieldNameToIdentifier(functionName) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala index b3ba5656d44..47b37a5edeb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala @@ -57,7 +57,7 @@ case class UrlEncode(child: Expression) StringType, "encode", Seq(child, Literal("UTF-8")), - Seq(StringType)) + Seq(StringType, StringType)) override protected def withNewChildInternal(newChild: Expression): Expression = { copy(child = newChild) @@ -94,7 +94,7 @@ case class UrlDecode(child: Expression) StringType, "decode", Seq(child, Literal("UTF-8")), - Seq(StringType)) + Seq(StringType, StringType)) override protected def withNewChildInternal(newChild: Expression): Expression = { copy(child = newChild) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org