This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new b094b9863e1 [SPARK-43361][PROTOBUF] update documentation for errors related to enum serialization b094b9863e1 is described below commit b094b9863e1ed9816d912c0f2bca202242eb4728 Author: Parth Upadhyay <parth.upadh...@gmail.com> AuthorDate: Fri May 19 15:26:00 2023 +0800 [SPARK-43361][PROTOBUF] update documentation for errors related to enum serialization ### What changes were proposed in this pull request? Follows-up on the comment here: https://github.com/apache/spark/pull/41075#discussion_r1194138082 Namely: - updates `error-classes.json` and `sql-error-conditions.md` to have the updated error name. - adds an additional test to assert that enum serialization with invalid enum values throws the correct exception. ### Why are the changes needed? Improve documentation ### Does this PR introduce _any_ user-facing change? Yes, documentation. ### How was this patch tested? Existing unit tests Closes #41188 from justaparth/parth/update-documentation-enum-error-message. Authored-by: Parth Upadhyay <parth.upadh...@gmail.com> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- .../spark/sql/protobuf/ProtobufSerializer.scala | 2 +- .../sql/protobuf/ProtobufFunctionsSuite.scala | 42 ++++++++++++++++++++++ core/src/main/resources/error/error-classes.json | 8 ++--- docs/sql-error-conditions.md | 8 ++--- 4 files changed, 51 insertions(+), 9 deletions(-) diff --git a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufSerializer.scala b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufSerializer.scala index 143e51c1bb6..b11284d1f28 100644 --- a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufSerializer.scala +++ b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufSerializer.scala @@ -120,7 +120,7 @@ private[sql] class ProtobufSerializer( catalystPath, toFieldStr(protoPath), data.toString, - enumValues.mkString("", ",", "")) + enumValues.mkString(", ")) } fieldDescriptor.getEnumType.findValueByNumber(data) case (StringType, STRING) => diff --git a/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufFunctionsSuite.scala b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufFunctionsSuite.scala index 18e1372e631..7e6cf0a3c96 100644 --- a/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufFunctionsSuite.scala +++ b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufFunctionsSuite.scala @@ -1531,6 +1531,48 @@ class ProtobufFunctionsSuite extends QueryTest with SharedSparkSession with Prot } } + test("raise enum serialization error") { + // Confirm that attempting to serialize an invalid enum value will raise the correct exception. + val df = spark.range(1).select( + struct( + lit("INVALID_VALUE").as("basic_enum") + ).as("proto") + ) + + val dfWithInt = spark.range(1).select( + struct( + lit(9999).as("basic_enum") + ).as("proto") + ) + + checkWithFileAndClassName("SimpleMessageEnum") { case (name, descFilePathOpt) => + var parseError = intercept[AnalysisException] { + df.select(to_protobuf_wrapper($"proto", name, descFilePathOpt)).collect() + } + checkError( + exception = parseError, + errorClass = "CANNOT_CONVERT_SQL_VALUE_TO_PROTOBUF_ENUM_TYPE", + parameters = Map( + "sqlColumn" -> "`basic_enum`", + "protobufColumn" -> "field 'basic_enum'", + "data" -> "INVALID_VALUE", + "enumString" -> "\"NOTHING\", \"FIRST\", \"SECOND\"")) + + parseError = intercept[AnalysisException] { + dfWithInt.select(to_protobuf_wrapper($"proto", name, descFilePathOpt)).collect() + } + checkError( + exception = parseError, + errorClass = "CANNOT_CONVERT_SQL_VALUE_TO_PROTOBUF_ENUM_TYPE", + parameters = Map( + "sqlColumn" -> "`basic_enum`", + "protobufColumn" -> "field 'basic_enum'", + "data" -> "9999", + "enumString" -> "0, 1, 2")) + } + } + + def testFromProtobufWithOptions( df: DataFrame, expectedDf: DataFrame, diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 24f972a5006..e7203c0292b 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -62,14 +62,14 @@ "Unable to convert <protobufType> of Protobuf to SQL type <toType>." ] }, - "CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_ENUM_TYPE" : { + "CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_FIELD_TYPE" : { "message" : [ - "Cannot convert SQL <sqlColumn> to Protobuf <protobufColumn> because <data> cannot be written since it's not defined in ENUM <enumString>." + "Cannot convert SQL <sqlColumn> to Protobuf <protobufColumn> because schema is incompatible (protobufType = <protobufType>, sqlType = <sqlType>)." ] }, - "CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_FIELD_TYPE" : { + "CANNOT_CONVERT_SQL_VALUE_TO_PROTOBUF_ENUM_TYPE" : { "message" : [ - "Cannot convert SQL <sqlColumn> to Protobuf <protobufColumn> because schema is incompatible (protobufType = <protobufType>, sqlType = <sqlType>)." + "Cannot convert SQL <sqlColumn> to Protobuf <protobufColumn> because <data> is not in defined values for enum: <enumString>." ] }, "CANNOT_DECODE_URL" : { diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md index b52b1a51f62..b9e9a2ed997 100644 --- a/docs/sql-error-conditions.md +++ b/docs/sql-error-conditions.md @@ -77,17 +77,17 @@ SQLSTATE: none assigned Unable to convert `<protobufType>` of Protobuf to SQL type `<toType>`. -### CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_ENUM_TYPE +### CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_FIELD_TYPE SQLSTATE: none assigned -Cannot convert SQL `<sqlColumn>` to Protobuf `<protobufColumn>` because `<data>` cannot be written since it's not defined in ENUM `<enumString>`. +Cannot convert SQL `<sqlColumn>` to Protobuf `<protobufColumn>` because schema is incompatible (protobufType = `<protobufType>`, sqlType = `<sqlType>`). -### CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_FIELD_TYPE +### CANNOT_CONVERT_SQL_VALUE_TO_PROTOBUF_ENUM_TYPE SQLSTATE: none assigned -Cannot convert SQL `<sqlColumn>` to Protobuf `<protobufColumn>` because schema is incompatible (protobufType = `<protobufType>`, sqlType = `<sqlType>`). +Cannot convert SQL `<sqlColumn>` to Protobuf `<protobufColumn>` because `<data>` is not in defined values for enum: `<enumString>`. ### CANNOT_DECODE_URL --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org