This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new c41be4ec0ad [SPARK-42298][SQL] Assign name to _LEGACY_ERROR_TEMP_2132 c41be4ec0ad is described below commit c41be4ec0ad97f587a0581d5583b2ca9975b2a0f Author: Hisoka <fanjiaemi...@qq.com> AuthorDate: Mon Jun 12 23:54:02 2023 +0300 [SPARK-42298][SQL] Assign name to _LEGACY_ERROR_TEMP_2132 ### What changes were proposed in this pull request? This PR proposes to assign name to _LEGACY_ERROR_TEMP_2132, "CANNOT_PARSE_JSON_ARRAYS_AS_STRUCTS". ### Why are the changes needed? Assign proper name to LEGACY_ERROR_TEMP ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? ./build/sbt "testOnly org.apache.spark.sql.errors.QueryExecutionErrorsSuite" Closes #40632 from Hisoka-X/_LEGACY_ERROR_TEMP_2132. Lead-authored-by: Hisoka <fanjiaemi...@qq.com> Co-authored-by: Jia Fan <fanjiaemi...@qq.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- core/src/main/resources/error/error-classes.json | 20 ++++++++++++++------ .../spark/sql/catalyst/json/JacksonParser.scala | 2 +- .../spark/sql/catalyst/util/BadRecordException.scala | 5 +++++ .../spark/sql/catalyst/util/FailureSafeParser.scala | 10 ++++++++-- .../spark/sql/errors/QueryExecutionErrors.scala | 10 ++++++---- .../catalyst/expressions/JsonExpressionsSuite.scala | 2 +- .../org/apache/spark/sql/CsvFunctionsSuite.scala | 2 +- .../org/apache/spark/sql/JsonFunctionsSuite.scala | 12 ++++++------ .../spark/sql/errors/QueryExecutionErrorsSuite.scala | 15 +++++++++++++++ .../sql/execution/datasources/csv/CSVSuite.scala | 2 +- .../sql/execution/datasources/json/JsonSuite.scala | 4 ++-- .../spark/sql/hive/thriftserver/CliSuite.scala | 4 ++-- .../ThriftServerWithSparkContextSuite.scala | 4 ++-- 13 files changed, 64 insertions(+), 28 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index a12a8000870..183ea31a7cb 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1542,7 +1542,20 @@ "message" : [ "Malformed records are detected in record parsing: <badRecord>.", "Parse Mode: <failFastMode>. To process malformed records as null result, try setting the option 'mode' as 'PERMISSIVE'." - ] + ], + "subClass" : { + "CANNOT_PARSE_JSON_ARRAYS_AS_STRUCTS" : { + "message" : [ + "Parsing JSON arrays as structs is forbidden." + ] + }, + "WITHOUT_SUGGESTION" : { + "message" : [ + "" + ] + } + }, + "sqlState" : "22023" }, "MISSING_AGGREGATION" : { "message" : [ @@ -4692,11 +4705,6 @@ "Exception when registering StreamingQueryListener." ] }, - "_LEGACY_ERROR_TEMP_2132" : { - "message" : [ - "Parsing JSON arrays as structs is forbidden." - ] - }, "_LEGACY_ERROR_TEMP_2133" : { "message" : [ "Cannot parse field name <fieldName>, field value <fieldValue>, [<token>] as target spark data type [<dataType>]." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala index bf07d65caa0..48ee50938cd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala @@ -144,7 +144,7 @@ class JacksonParser( array.toArray[InternalRow](schema) } case START_ARRAY => - throw QueryExecutionErrors.cannotParseJsonArraysAsStructsError() + throw JsonArraysAsStructsException() } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/BadRecordException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/BadRecordException.scala index 67defe78a6c..cfbe9da6ec5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/BadRecordException.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/BadRecordException.scala @@ -41,3 +41,8 @@ case class BadRecordException( @transient record: () => UTF8String, @transient partialResult: () => Option[InternalRow], cause: Throwable) extends Exception(cause) + +/** + * Exception thrown when the underlying parser parses a JSON array as a struct. + */ +case class JsonArraysAsStructsException() extends RuntimeException() diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala index fcdcd21b6dc..84fad1bb477 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala @@ -65,8 +65,14 @@ class FailureSafeParser[IN]( case DropMalformedMode => Iterator.empty case FailFastMode => - throw QueryExecutionErrors.malformedRecordsDetectedInRecordParsingError( - toResultRow(e.partialResult(), e.record).toString, e) + e.getCause match { + case _: JsonArraysAsStructsException => + // SPARK-42298 we recreate the exception here to make sure the error message + // have the record content. + throw QueryExecutionErrors.cannotParseJsonArraysAsStructsError(e.record().toString) + case _ => throw QueryExecutionErrors.malformedRecordsDetectedInRecordParsingError( + toResultRow(e.partialResult(), e.record).toString, e) + } } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 68243233216..498723c1491 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -1416,10 +1416,12 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { cause = null) } - def cannotParseJsonArraysAsStructsError(): SparkRuntimeException = { + def cannotParseJsonArraysAsStructsError(recordStr: String): SparkRuntimeException = { new SparkRuntimeException( - errorClass = "_LEGACY_ERROR_TEMP_2132", - messageParameters = Map.empty) + errorClass = "MALFORMED_RECORD_IN_PARSING.CANNOT_PARSE_JSON_ARRAYS_AS_STRUCTS", + messageParameters = Map( + "badRecord" -> recordStr, + "failFastMode" -> FailFastMode.name)) } def cannotParseStringAsDataTypeError(parser: JsonParser, token: JsonToken, dataType: DataType) @@ -1771,7 +1773,7 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { def malformedRecordsDetectedInRecordParsingError( badRecord: String, e: BadRecordException): Throwable = { new SparkException( - errorClass = "MALFORMED_RECORD_IN_PARSING", + errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION", messageParameters = Map( "badRecord" -> badRecord, "failFastMode" -> FailFastMode.name), diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala index a1db7e4c3ab..94e40b98065 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala @@ -448,7 +448,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with }.getCause checkError( exception = exception.asInstanceOf[SparkException], - errorClass = "MALFORMED_RECORD_IN_PARSING", + errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION", parameters = Map("badRecord" -> "[null]", "failFastMode" -> "FAILFAST") ) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala index 67ba5511263..77b9b380852 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala @@ -307,7 +307,7 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession { }.getCause checkError( exception = exception1.asInstanceOf[SparkException], - errorClass = "MALFORMED_RECORD_IN_PARSING", + errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION", parameters = Map("badRecord" -> "[null,null,\"]", "failFastMode" -> "FAILFAST") ) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala index 34f9ea2c731..d2ffea07921 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala @@ -837,7 +837,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { }.getCause checkError( exception = exception1.asInstanceOf[SparkException], - errorClass = "MALFORMED_RECORD_IN_PARSING", + errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION", parameters = Map( "badRecord" -> "[null,null,{\"a\" 1, \"b\": 11}]", "failFastMode" -> "FAILFAST") @@ -872,7 +872,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { checkError( exception = exception.asInstanceOf[SparkException], - errorClass = "MALFORMED_RECORD_IN_PARSING", + errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION", parameters = Map( "badRecord" -> "[null,11,{\"a\": \"1\", \"b\": 11}]", "failFastMode" -> "FAILFAST") @@ -1205,7 +1205,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { }.getCause checkError( exception = exception1.asInstanceOf[SparkException], - errorClass = "MALFORMED_RECORD_IN_PARSING", + errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION", parameters = Map( "badRecord" -> "[null,null]", "failFastMode" -> "FAILFAST") @@ -1216,7 +1216,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { }.getCause checkError( exception = exception2.asInstanceOf[SparkException], - errorClass = "MALFORMED_RECORD_IN_PARSING", + errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION", parameters = Map( "badRecord" -> "[null,null]", "failFastMode" -> "FAILFAST") @@ -1239,7 +1239,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { }.getCause checkError( exception = exception1.asInstanceOf[SparkException], - errorClass = "MALFORMED_RECORD_IN_PARSING", + errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION", parameters = Map( "badRecord" -> "[null]", "failFastMode" -> "FAILFAST") @@ -1250,7 +1250,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession { }.getCause checkError( exception = exception2.asInstanceOf[SparkException], - errorClass = "MALFORMED_RECORD_IN_PARSING", + errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION", parameters = Map( "badRecord" -> "[null]", "failFastMode" -> "FAILFAST") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala index 069fce237f2..73a3e088894 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala @@ -381,6 +381,21 @@ class QueryExecutionErrorsSuite sqlState = "22018") } + test("CANNOT_PARSE_JSON_ARRAYS_AS_STRUCTS: parse json arrays as structs") { + val jsonStr = """[{"a":1, "b":0.8}]""" + checkError( + exception = intercept[SparkRuntimeException] { + sql(s"SELECT from_json('$jsonStr', 'a INT, b DOUBLE', map('mode','FAILFAST') )") + .collect() + }, + errorClass = "MALFORMED_RECORD_IN_PARSING.CANNOT_PARSE_JSON_ARRAYS_AS_STRUCTS", + parameters = Map( + "badRecord" -> jsonStr, + "failFastMode" -> "FAILFAST" + ), + sqlState = "22023") + } + test("FAILED_EXECUTE_UDF: execute user defined function") { val luckyCharOfWord = udf { (word: String, index: Int) => { word.substring(index, index + 1) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala index 814805a7272..aba94d903eb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala @@ -3218,7 +3218,7 @@ class CSVv1Suite extends CSVSuite { checkError( exception = exception.getCause.asInstanceOf[SparkException], - errorClass = "MALFORMED_RECORD_IN_PARSING", + errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION", parameters = Map( "badRecord" -> "[2015,Chevy,Volt,null,null]", "failFastMode" -> "FAILFAST") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index 1f9a2da5dd7..cb7bab2ddea 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -1076,7 +1076,7 @@ abstract class JsonSuite }.getCause checkError( exception = exceptionTwo.asInstanceOf[SparkException], - errorClass = "MALFORMED_RECORD_IN_PARSING", + errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION", parameters = Map( "badRecord" -> "[null]", "failFastMode" -> "FAILFAST") @@ -1997,7 +1997,7 @@ abstract class JsonSuite }.getCause checkError( exception = exceptionTwo.asInstanceOf[SparkException], - errorClass = "MALFORMED_RECORD_IN_PARSING", + errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION", parameters = Map( "badRecord" -> "[null]", "failFastMode" -> "FAILFAST") diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala index 651c6b7aafb..90f371c7ec7 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala @@ -563,7 +563,7 @@ class CliSuite extends SparkFunSuite { extraArgs = Seq("--hiveconf", "hive.session.silent=false", "-e", "select from_json('a', 'a INT', map('mode', 'FAILFAST'));"), errorResponses = Seq("JsonParseException"))( - ("", "SparkException: [MALFORMED_RECORD_IN_PARSING]"), + ("", "SparkException: [MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION]"), ("", "JsonParseException: Unrecognized token 'a'")) // If it is in silent mode, will print the error message only runCliWithin( @@ -571,7 +571,7 @@ class CliSuite extends SparkFunSuite { extraArgs = Seq("--conf", "spark.hive.session.silent=true", "-e", "select from_json('a', 'a INT', map('mode', 'FAILFAST'));"), errorResponses = Seq("SparkException"))( - ("", "SparkException: [MALFORMED_RECORD_IN_PARSING]")) + ("", "SparkException: [MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION]")) } test("SPARK-30808: use Java 8 time API in Thrift SQL CLI by default") { diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala index aef9dc69656..e59fcb46dc9 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala @@ -69,7 +69,7 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer { } assert(e.getMessage.contains("JsonParseException: Unrecognized token 'a'")) assert(!e.getMessage.contains( - "SparkException: [MALFORMED_RECORD_IN_PARSING]")) + "SparkException: [MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION]")) } withJdbcStatement() { statement => @@ -78,7 +78,7 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer { } assert(e.getMessage.contains("JsonParseException: Unrecognized token 'a'")) assert(e.getMessage.contains( - "SparkException: [MALFORMED_RECORD_IN_PARSING]")) + "SparkException: [MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION]")) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org