[spark] branch master updated: [SPARK-43254][SQL] Assign a name to the error _LEGACY_ERROR_TEMP_2018

maxgekk Thu, 21 Sep 2023 00:22:57 -0700

This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 8b967e191b7 [SPARK-43254][SQL] Assign a name to the error 
_LEGACY_ERROR_TEMP_2018
8b967e191b7 is described below

commit 8b967e191b755d7f2830c15d382c83ce7aeb69c1
Author: dengziming <dengziming1...@gmail.com>
AuthorDate: Thu Sep 21 10:22:37 2023 +0300

    [SPARK-43254][SQL] Assign a name to the error _LEGACY_ERROR_TEMP_2018
    
    ### What changes were proposed in this pull request?
    Assign the name `CLASS_UNSUPPORTED_BY_MAP_OBJECTS` to the legacy error 
class `_LEGACY_ERROR_TEMP_2018`.
    
    ### Why are the changes needed?
    To assign proper name as a part of activity in SPARK-37935
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, the error message will include the error class name
    
    ### How was this patch tested?
    Add a unit test to produce the error from user code.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #42939 from dengziming/SPARK-43254.
    
    Authored-by: dengziming <dengziming1...@gmail.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 .../src/main/resources/error/error-classes.json    | 10 +++---
 docs/sql-error-conditions.md                       |  6 ++++
 .../sql/catalyst/encoders/ExpressionEncoder.scala  |  2 +-
 .../spark/sql/errors/QueryExecutionErrors.scala    |  2 +-
 .../expressions/ObjectExpressionsSuite.scala       | 11 +++---
 .../scala/org/apache/spark/sql/DatasetSuite.scala  | 40 ++++++++++++++++++++--
 6 files changed, 57 insertions(+), 14 deletions(-)

diff --git a/common/utils/src/main/resources/error/error-classes.json 
b/common/utils/src/main/resources/error/error-classes.json
index d92ccfce5c5..8942d3755e9 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -344,6 +344,11 @@
     ],
     "sqlState" : "22003"
   },
+  "CLASS_UNSUPPORTED_BY_MAP_OBJECTS" : {
+    "message" : [
+      "`MapObjects` does not support the class <cls> as resulting collection."
+    ]
+  },
   "CODEC_NOT_AVAILABLE" : {
     "message" : [
       "The codec <codecName> is not available. Consider to set the config 
<configKey> to <configVal>."
@@ -4944,11 +4949,6 @@
       "not resolved."
     ]
   },
-  "_LEGACY_ERROR_TEMP_2018" : {
-    "message" : [
-      "class `<cls>` is not supported by `MapObjects` as resulting collection."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2020" : {
     "message" : [
       "Couldn't find a valid constructor on <cls>."
diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md
index 1df00f72bc9..f6f94efc2b0 100644
--- a/docs/sql-error-conditions.md
+++ b/docs/sql-error-conditions.md
@@ -297,6 +297,12 @@ The value `<value>` of the type `<sourceType>` cannot be 
cast to `<targetType>`
 
 Fail to assign a value of `<sourceType>` type to the `<targetType>` type 
column or variable `<columnName>` due to an overflow. Use `try_cast` on the 
input value to tolerate overflow and return NULL instead.
 
+### CLASS_UNSUPPORTED_BY_MAP_OBJECTS
+
+SQLSTATE: none assigned
+
+`MapObjects` does not support the class `<cls>` as resulting collection.
+
 ### CODEC_NOT_AVAILABLE
 
 SQLSTATE: none assigned
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index ff72b5a0d96..74d7a5e7a67 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -170,7 +170,7 @@ object ExpressionEncoder {
    * Function that deserializes an [[InternalRow]] into an object of type `T`. 
This class is not
    * thread-safe.
    */
-  class Deserializer[T](private val expressions: Seq[Expression])
+  class Deserializer[T](val expressions: Seq[Expression])
     extends (InternalRow => T) with Serializable {
     @transient
     private[this] var constructProjection: Projection = _
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index e14fef1fad7..84472490128 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -422,7 +422,7 @@ private[sql] object QueryExecutionErrors extends 
QueryErrorsBase with ExecutionE
 
   def classUnsupportedByMapObjectsError(cls: Class[_]): SparkRuntimeException 
= {
     new SparkRuntimeException(
-      errorClass = "_LEGACY_ERROR_TEMP_2018",
+      errorClass = "CLASS_UNSUPPORTED_BY_MAP_OBJECTS",
       messageParameters = Map("cls" -> cls.getName))
   }
 
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
index 3a662e68d58..de85d6fe0b7 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
@@ -404,11 +404,12 @@ class ObjectExpressionsSuite extends SparkFunSuite with 
ExpressionEvalHelper {
       customCollectionClasses.foreach(testMapObjects(collection, _, inputType))
 
       // Unsupported custom collection class
-      val errMsg = intercept[RuntimeException] {
-        testMapObjects(collection, classOf[scala.collection.Map[Int, Int]], 
inputType)
-      }.getMessage()
-      assert(errMsg.contains("`scala.collection.Map` is not supported by 
`MapObjects` " +
-        "as resulting collection."))
+      checkError(
+        exception = intercept[SparkRuntimeException] {
+          testMapObjects(collection, classOf[scala.collection.Map[Int, Int]], 
inputType)
+        },
+        errorClass = "CLASS_UNSUPPORTED_BY_MAP_OBJECTS",
+        parameters = Map("cls" -> "scala.collection.Map"))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index e05b545f235..32469534978 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql
 import java.io.{Externalizable, ObjectInput, ObjectOutput}
 import java.sql.{Date, Timestamp}
 
+import scala.reflect.ClassTag
 import scala.util.Random
 
 import org.apache.hadoop.fs.{Path, PathFilter}
@@ -32,8 +33,9 @@ import org.apache.spark.TestUtils.withListener
 import org.apache.spark.internal.config.MAX_RESULT_SIZE
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql.catalyst.{FooClassWithEnum, FooEnum, 
ScroogeLikeExample}
-import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, OuterScopes}
-import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
+import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoders, 
ExpressionEncoder, OuterScopes}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.BoxedIntEncoder
+import org.apache.spark.sql.catalyst.expressions.{CodegenObjectFactoryMode, 
GenericRowWithSchema}
 import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi}
 import org.apache.spark.sql.catalyst.util.sideBySide
 import org.apache.spark.sql.execution.{LogicalRDD, RDDScanExec, SQLExecution}
@@ -2561,6 +2563,40 @@ class DatasetSuite extends QueryTest
 
     checkDataset(ds.filter(f(col("_1"))), Tuple1(ValueClass(2)))
   }
+
+  test("CLASS_UNSUPPORTED_BY_MAP_OBJECTS when creating dataset") {
+    withSQLConf(
+      // Set CODEGEN_FACTORY_MODE to default value to reproduce 
CLASS_UNSUPPORTED_BY_MAP_OBJECTS
+      SQLConf.CODEGEN_FACTORY_MODE.key -> 
CodegenObjectFactoryMode.NO_CODEGEN.toString) {
+      // Create our own encoder to cover the default encoder from 
spark.implicits._
+      implicit val im: ExpressionEncoder[Array[Int]] = ExpressionEncoder(
+        AgnosticEncoders.IterableEncoder(
+          ClassTag(classOf[Array[Int]]), BoxedIntEncoder, false, false))
+
+      val df = spark.createDataset(Seq(Array(1)))
+      val exception = intercept[org.apache.spark.SparkRuntimeException] {
+        df.collect()
+      }
+      val expressions = im.resolveAndBind(df.queryExecution.logical.output,
+        spark.sessionState.analyzer)
+        .createDeserializer().expressions
+
+      // Expression decoding error
+      checkError(
+        exception = exception,
+        errorClass = "_LEGACY_ERROR_TEMP_2151",
+        parameters = Map(
+          "e" -> exception.getCause.toString(),
+          "expressions" -> expressions.map(
+            _.simpleString(SQLConf.get.maxToStringFields)).mkString("\n"))
+      )
+      // class unsupported by map objects
+      checkError(
+        exception = 
exception.getCause.asInstanceOf[org.apache.spark.SparkRuntimeException],
+        errorClass = "CLASS_UNSUPPORTED_BY_MAP_OBJECTS",
+        parameters = Map("cls" -> classOf[Array[Int]].getName))
+    }
+  }
 }
 
 class DatasetLargeResultCollectingSuite extends QueryTest


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-43254][SQL] Assign a name to the error _LEGACY_ERROR_TEMP_2018

Reply via email to