This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new b094b9863e1 [SPARK-43361][PROTOBUF] update documentation for errors 
related to enum serialization
b094b9863e1 is described below

commit b094b9863e1ed9816d912c0f2bca202242eb4728
Author: Parth Upadhyay <parth.upadh...@gmail.com>
AuthorDate: Fri May 19 15:26:00 2023 +0800

    [SPARK-43361][PROTOBUF] update documentation for errors related to enum 
serialization
    
    ### What changes were proposed in this pull request?
    Follows-up on the comment here: 
https://github.com/apache/spark/pull/41075#discussion_r1194138082
    
    Namely:
    - updates `error-classes.json` and `sql-error-conditions.md` to have the 
updated error name.
    - adds an additional test to assert that enum serialization with invalid 
enum values throws the correct exception.
    
    ### Why are the changes needed?
    Improve documentation
    
    ### Does this PR introduce _any_ user-facing change?
    Yes, documentation.
    
    ### How was this patch tested?
    Existing unit tests
    
    Closes #41188 from justaparth/parth/update-documentation-enum-error-message.
    
    Authored-by: Parth Upadhyay <parth.upadh...@gmail.com>
    Signed-off-by: Ruifeng Zheng <ruife...@apache.org>
---
 .../spark/sql/protobuf/ProtobufSerializer.scala    |  2 +-
 .../sql/protobuf/ProtobufFunctionsSuite.scala      | 42 ++++++++++++++++++++++
 core/src/main/resources/error/error-classes.json   |  8 ++---
 docs/sql-error-conditions.md                       |  8 ++---
 4 files changed, 51 insertions(+), 9 deletions(-)

diff --git 
a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufSerializer.scala
 
b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufSerializer.scala
index 143e51c1bb6..b11284d1f28 100644
--- 
a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufSerializer.scala
+++ 
b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufSerializer.scala
@@ -120,7 +120,7 @@ private[sql] class ProtobufSerializer(
               catalystPath,
               toFieldStr(protoPath),
               data.toString,
-              enumValues.mkString("", ",", ""))
+              enumValues.mkString(", "))
           }
           fieldDescriptor.getEnumType.findValueByNumber(data)
       case (StringType, STRING) =>
diff --git 
a/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufFunctionsSuite.scala
 
b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufFunctionsSuite.scala
index 18e1372e631..7e6cf0a3c96 100644
--- 
a/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufFunctionsSuite.scala
+++ 
b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufFunctionsSuite.scala
@@ -1531,6 +1531,48 @@ class ProtobufFunctionsSuite extends QueryTest with 
SharedSparkSession with Prot
     }
   }
 
+  test("raise enum serialization error") {
+    // Confirm that attempting to serialize an invalid enum value will raise 
the correct exception.
+    val df = spark.range(1).select(
+      struct(
+        lit("INVALID_VALUE").as("basic_enum")
+      ).as("proto")
+    )
+
+    val dfWithInt = spark.range(1).select(
+      struct(
+        lit(9999).as("basic_enum")
+      ).as("proto")
+    )
+
+    checkWithFileAndClassName("SimpleMessageEnum") { case (name, 
descFilePathOpt) =>
+      var parseError = intercept[AnalysisException] {
+        df.select(to_protobuf_wrapper($"proto", name, 
descFilePathOpt)).collect()
+      }
+      checkError(
+        exception = parseError,
+        errorClass = "CANNOT_CONVERT_SQL_VALUE_TO_PROTOBUF_ENUM_TYPE",
+        parameters = Map(
+          "sqlColumn" -> "`basic_enum`",
+          "protobufColumn" -> "field 'basic_enum'",
+          "data" -> "INVALID_VALUE",
+          "enumString" -> "\"NOTHING\", \"FIRST\", \"SECOND\""))
+
+      parseError = intercept[AnalysisException] {
+        dfWithInt.select(to_protobuf_wrapper($"proto", name, 
descFilePathOpt)).collect()
+      }
+      checkError(
+        exception = parseError,
+        errorClass = "CANNOT_CONVERT_SQL_VALUE_TO_PROTOBUF_ENUM_TYPE",
+        parameters = Map(
+          "sqlColumn" -> "`basic_enum`",
+          "protobufColumn" -> "field 'basic_enum'",
+          "data" -> "9999",
+          "enumString" -> "0, 1, 2"))
+    }
+  }
+
+
   def testFromProtobufWithOptions(
     df: DataFrame,
     expectedDf: DataFrame,
diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index 24f972a5006..e7203c0292b 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -62,14 +62,14 @@
       "Unable to convert <protobufType> of Protobuf to SQL type <toType>."
     ]
   },
-  "CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_ENUM_TYPE" : {
+  "CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_FIELD_TYPE" : {
     "message" : [
-      "Cannot convert SQL <sqlColumn> to Protobuf <protobufColumn> because 
<data> cannot be written since it's not defined in ENUM <enumString>."
+      "Cannot convert SQL <sqlColumn> to Protobuf <protobufColumn> because 
schema is incompatible (protobufType = <protobufType>, sqlType = <sqlType>)."
     ]
   },
-  "CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_FIELD_TYPE" : {
+  "CANNOT_CONVERT_SQL_VALUE_TO_PROTOBUF_ENUM_TYPE" : {
     "message" : [
-      "Cannot convert SQL <sqlColumn> to Protobuf <protobufColumn> because 
schema is incompatible (protobufType = <protobufType>, sqlType = <sqlType>)."
+      "Cannot convert SQL <sqlColumn> to Protobuf <protobufColumn> because 
<data> is not in defined values for enum: <enumString>."
     ]
   },
   "CANNOT_DECODE_URL" : {
diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md
index b52b1a51f62..b9e9a2ed997 100644
--- a/docs/sql-error-conditions.md
+++ b/docs/sql-error-conditions.md
@@ -77,17 +77,17 @@ SQLSTATE: none assigned
 
 Unable to convert `<protobufType>` of Protobuf to SQL type `<toType>`.
 
-### CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_ENUM_TYPE
+### CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_FIELD_TYPE
 
 SQLSTATE: none assigned
 
-Cannot convert SQL `<sqlColumn>` to Protobuf `<protobufColumn>` because 
`<data>` cannot be written since it's not defined in ENUM `<enumString>`.
+Cannot convert SQL `<sqlColumn>` to Protobuf `<protobufColumn>` because schema 
is incompatible (protobufType = `<protobufType>`, sqlType = `<sqlType>`).
 
-### CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_FIELD_TYPE
+### CANNOT_CONVERT_SQL_VALUE_TO_PROTOBUF_ENUM_TYPE
 
 SQLSTATE: none assigned
 
-Cannot convert SQL `<sqlColumn>` to Protobuf `<protobufColumn>` because schema 
is incompatible (protobufType = `<protobufType>`, sqlType = `<sqlType>`).
+Cannot convert SQL `<sqlColumn>` to Protobuf `<protobufColumn>` because 
`<data>` is not in defined values for enum: `<enumString>`.
 
 ### CANNOT_DECODE_URL
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to