This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 56043c24a55 [SPARK-42840][SQL] Change `_LEGACY_ERROR_TEMP_2004` error to internal error 56043c24a55 is described below commit 56043c24a55663e692c3671ba263e642aebc1a50 Author: Leibniz.Hu <leibni...@gmail.com> AuthorDate: Thu Apr 6 15:53:08 2023 +0300 [SPARK-42840][SQL] Change `_LEGACY_ERROR_TEMP_2004` error to internal error ### What changes were proposed in this pull request? In the PR, I propose to change the `_LEGACY_ERROR_TEMP_2004` error to an internal error. Also this PR improves the error message. ### Why are the changes needed? `_LEGACY_ERROR_TEMP_2004` cannot be triggered from user code (for instance, some SQL query), more detail: 1. `_LEGACY_ERROR_TEMP_2004` error is thrown in `Literal.default` for `CharType`, `VarcharType` and other user-implemented DataType. 2. `Literal.default` is called in these cases below: 1. in `org.apache.spark.sql.catalyst.expressions.aggregate.Average`, for getting initial value; but in this case, DataType would only be DecimalType / YearMonthIntervalType / DayTimeIntervalType / DoubleType; these types are supported by `Literal.default`. 2. in `org.apache.spark.sql.catalyst.expressions.aggregate.Sum`, is same as `Average` 3. in `org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys` and `org.apache.spark.sql.catalyst.plans.logical.AsOfJoin`; In real scene, they are use for DateFrame api, but `CharType` and `VarcharType` cannot be as a part of DataFrame's schema( they would be converted to StringType); and user-defined DataType do not have a matched Encoder in `org.apache.spark.sql.catalyst.encoders.RowEncoder#encoderForDataType` so, it should be an internal error, but not `SparkRuntimeException`. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By running the modified test suites: ```bash build/sbt "sql/testOnly org.apache.spark.sql.catalyst.expressions.LiteralExpressionSuite" ``` Closes #40634 from Leibnizhu/SPARK-42840. Lead-authored-by: Leibniz.Hu <leibni...@gmail.com> Co-authored-by: Leibniz.Hu <leibni...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- core/src/main/resources/error/error-classes.json | 5 ----- .../org/apache/spark/sql/errors/QueryExecutionErrors.scala | 6 ++---- .../sql/catalyst/expressions/LiteralExpressionSuite.scala | 14 +++++++++++++- 3 files changed, 15 insertions(+), 10 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 79c6f3e6d82..7014ce36b1d 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -3706,11 +3706,6 @@ "Unsuccessful try to zip maps with <size> unique keys due to exceeding the array size limit <maxRoundedArrayLength>." ] }, - "_LEGACY_ERROR_TEMP_2004" : { - "message" : [ - "no default for type <dataType>." - ] - }, "_LEGACY_ERROR_TEMP_2005" : { "message" : [ "Type <dataType> does not support ordered operations." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 7ae9d55f96c..7ec5f588754 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -339,10 +339,8 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { "type" -> toSQLType(dataType))) } - def noDefaultForDataTypeError(dataType: DataType): SparkRuntimeException = { - new SparkRuntimeException( - errorClass = "_LEGACY_ERROR_TEMP_2004", - messageParameters = Map("dataType" -> dataType.toString())) + def noDefaultForDataTypeError(dataType: DataType): SparkException = { + SparkException.internalError(s"No default value for type: ${toSQLType(dataType)}.") } def orderedOperationUnsupportedByDataTypeError( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala index 80e7a3206aa..90438d81661 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala @@ -24,12 +24,13 @@ import java.util.TimeZone import scala.reflect.runtime.universe.TypeTag -import org.apache.spark.SparkFunSuite +import org.apache.spark.{SparkException, SparkFunSuite} import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.{CatalystTypeConverters, ScalaReflection} import org.apache.spark.sql.catalyst.encoders.ExamplePointUDT import org.apache.spark.sql.catalyst.util.DateTimeConstants._ import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLType import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.sql.types.DayTimeIntervalType._ @@ -88,6 +89,17 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(Literal.default(StructType(StructField("a", StringType) :: Nil)), Row("")) // ExamplePointUDT.sqlType is ArrayType(DoubleType, false). checkEvaluation(Literal.default(new ExamplePointUDT), Array()) + + // DateType without default value` + List(CharType(1), VarcharType(1)).foreach(errType => { + checkError( + exception = intercept[SparkException] { + Literal.default(errType) + }, + errorClass = "INTERNAL_ERROR", + parameters = Map("message" -> s"No default value for type: ${toSQLType(errType)}.") + ) + }) } test("boolean literals") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org