This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 8b967e191b7 [SPARK-43254][SQL] Assign a name to the error _LEGACY_ERROR_TEMP_2018 8b967e191b7 is described below commit 8b967e191b755d7f2830c15d382c83ce7aeb69c1 Author: dengziming <dengziming1...@gmail.com> AuthorDate: Thu Sep 21 10:22:37 2023 +0300 [SPARK-43254][SQL] Assign a name to the error _LEGACY_ERROR_TEMP_2018 ### What changes were proposed in this pull request? Assign the name `CLASS_UNSUPPORTED_BY_MAP_OBJECTS` to the legacy error class `_LEGACY_ERROR_TEMP_2018`. ### Why are the changes needed? To assign proper name as a part of activity in SPARK-37935 ### Does this PR introduce _any_ user-facing change? Yes, the error message will include the error class name ### How was this patch tested? Add a unit test to produce the error from user code. ### Was this patch authored or co-authored using generative AI tooling? No Closes #42939 from dengziming/SPARK-43254. Authored-by: dengziming <dengziming1...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../src/main/resources/error/error-classes.json | 10 +++--- docs/sql-error-conditions.md | 6 ++++ .../sql/catalyst/encoders/ExpressionEncoder.scala | 2 +- .../spark/sql/errors/QueryExecutionErrors.scala | 2 +- .../expressions/ObjectExpressionsSuite.scala | 11 +++--- .../scala/org/apache/spark/sql/DatasetSuite.scala | 40 ++++++++++++++++++++-- 6 files changed, 57 insertions(+), 14 deletions(-) diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index d92ccfce5c5..8942d3755e9 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -344,6 +344,11 @@ ], "sqlState" : "22003" }, + "CLASS_UNSUPPORTED_BY_MAP_OBJECTS" : { + "message" : [ + "`MapObjects` does not support the class <cls> as resulting collection." + ] + }, "CODEC_NOT_AVAILABLE" : { "message" : [ "The codec <codecName> is not available. Consider to set the config <configKey> to <configVal>." @@ -4944,11 +4949,6 @@ "not resolved." ] }, - "_LEGACY_ERROR_TEMP_2018" : { - "message" : [ - "class `<cls>` is not supported by `MapObjects` as resulting collection." - ] - }, "_LEGACY_ERROR_TEMP_2020" : { "message" : [ "Couldn't find a valid constructor on <cls>." diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md index 1df00f72bc9..f6f94efc2b0 100644 --- a/docs/sql-error-conditions.md +++ b/docs/sql-error-conditions.md @@ -297,6 +297,12 @@ The value `<value>` of the type `<sourceType>` cannot be cast to `<targetType>` Fail to assign a value of `<sourceType>` type to the `<targetType>` type column or variable `<columnName>` due to an overflow. Use `try_cast` on the input value to tolerate overflow and return NULL instead. +### CLASS_UNSUPPORTED_BY_MAP_OBJECTS + +SQLSTATE: none assigned + +`MapObjects` does not support the class `<cls>` as resulting collection. + ### CODEC_NOT_AVAILABLE SQLSTATE: none assigned diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala index ff72b5a0d96..74d7a5e7a67 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala @@ -170,7 +170,7 @@ object ExpressionEncoder { * Function that deserializes an [[InternalRow]] into an object of type `T`. This class is not * thread-safe. */ - class Deserializer[T](private val expressions: Seq[Expression]) + class Deserializer[T](val expressions: Seq[Expression]) extends (InternalRow => T) with Serializable { @transient private[this] var constructProjection: Projection = _ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index e14fef1fad7..84472490128 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -422,7 +422,7 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE def classUnsupportedByMapObjectsError(cls: Class[_]): SparkRuntimeException = { new SparkRuntimeException( - errorClass = "_LEGACY_ERROR_TEMP_2018", + errorClass = "CLASS_UNSUPPORTED_BY_MAP_OBJECTS", messageParameters = Map("cls" -> cls.getName)) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala index 3a662e68d58..de85d6fe0b7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala @@ -404,11 +404,12 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { customCollectionClasses.foreach(testMapObjects(collection, _, inputType)) // Unsupported custom collection class - val errMsg = intercept[RuntimeException] { - testMapObjects(collection, classOf[scala.collection.Map[Int, Int]], inputType) - }.getMessage() - assert(errMsg.contains("`scala.collection.Map` is not supported by `MapObjects` " + - "as resulting collection.")) + checkError( + exception = intercept[SparkRuntimeException] { + testMapObjects(collection, classOf[scala.collection.Map[Int, Int]], inputType) + }, + errorClass = "CLASS_UNSUPPORTED_BY_MAP_OBJECTS", + parameters = Map("cls" -> "scala.collection.Map")) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index e05b545f235..32469534978 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -20,6 +20,7 @@ package org.apache.spark.sql import java.io.{Externalizable, ObjectInput, ObjectOutput} import java.sql.{Date, Timestamp} +import scala.reflect.ClassTag import scala.util.Random import org.apache.hadoop.fs.{Path, PathFilter} @@ -32,8 +33,9 @@ import org.apache.spark.TestUtils.withListener import org.apache.spark.internal.config.MAX_RESULT_SIZE import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart} import org.apache.spark.sql.catalyst.{FooClassWithEnum, FooEnum, ScroogeLikeExample} -import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, OuterScopes} -import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema +import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoders, ExpressionEncoder, OuterScopes} +import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.BoxedIntEncoder +import org.apache.spark.sql.catalyst.expressions.{CodegenObjectFactoryMode, GenericRowWithSchema} import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi} import org.apache.spark.sql.catalyst.util.sideBySide import org.apache.spark.sql.execution.{LogicalRDD, RDDScanExec, SQLExecution} @@ -2561,6 +2563,40 @@ class DatasetSuite extends QueryTest checkDataset(ds.filter(f(col("_1"))), Tuple1(ValueClass(2))) } + + test("CLASS_UNSUPPORTED_BY_MAP_OBJECTS when creating dataset") { + withSQLConf( + // Set CODEGEN_FACTORY_MODE to default value to reproduce CLASS_UNSUPPORTED_BY_MAP_OBJECTS + SQLConf.CODEGEN_FACTORY_MODE.key -> CodegenObjectFactoryMode.NO_CODEGEN.toString) { + // Create our own encoder to cover the default encoder from spark.implicits._ + implicit val im: ExpressionEncoder[Array[Int]] = ExpressionEncoder( + AgnosticEncoders.IterableEncoder( + ClassTag(classOf[Array[Int]]), BoxedIntEncoder, false, false)) + + val df = spark.createDataset(Seq(Array(1))) + val exception = intercept[org.apache.spark.SparkRuntimeException] { + df.collect() + } + val expressions = im.resolveAndBind(df.queryExecution.logical.output, + spark.sessionState.analyzer) + .createDeserializer().expressions + + // Expression decoding error + checkError( + exception = exception, + errorClass = "_LEGACY_ERROR_TEMP_2151", + parameters = Map( + "e" -> exception.getCause.toString(), + "expressions" -> expressions.map( + _.simpleString(SQLConf.get.maxToStringFields)).mkString("\n")) + ) + // class unsupported by map objects + checkError( + exception = exception.getCause.asInstanceOf[org.apache.spark.SparkRuntimeException], + errorClass = "CLASS_UNSUPPORTED_BY_MAP_OBJECTS", + parameters = Map("cls" -> classOf[Array[Int]].getName)) + } + } } class DatasetLargeResultCollectingSuite extends QueryTest --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org