This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new e3c70cdbc03 [SPARK-40663][SQL] Migrate execution errors onto error classes: _LEGACY_ERROR_TEMP_2126-2150 e3c70cdbc03 is described below commit e3c70cdbc03efdffbd20126d83c63d48e1171815 Author: itholic <haejoon....@databricks.com> AuthorDate: Wed Oct 12 17:50:37 2022 +0300 [SPARK-40663][SQL] Migrate execution errors onto error classes: _LEGACY_ERROR_TEMP_2126-2150 ### What changes were proposed in this pull request? This PR proposes to migrate 25 execution errors onto temporary error classes with the prefix `_LEGACY_ERROR_TEMP_2126` to `_LEGACY_ERROR_TEMP_2150`. The error classes are prefixed with `_LEGACY_ERROR_TEMP_` indicates the dev-facing error messages, and won't be exposed to end users. ### Why are the changes needed? To speed-up the error class migration. The migration on temporary error classes allow us to analyze the errors, so we can detect the most popular error classes. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? ``` $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite" $ build/sbt "test:testOnly *SQLQuerySuite" $ build/sbt -Phive-thriftserver "hive-thriftserver/testOnly org.apache.spark.sql.hive.thriftserver.ThriftServerQueryTestSuite" ``` Closes #38127 from itholic/SPARK-40540-2126-2150. Authored-by: itholic <haejoon....@databricks.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- core/src/main/resources/error/error-classes.json | 127 ++++++++++++ .../spark/sql/errors/QueryExecutionErrors.scala | 216 +++++++++++++-------- .../results/timestampNTZ/timestamp-ansi.sql.out | 27 ++- .../results/timestampNTZ/timestamp.sql.out | 27 ++- 4 files changed, 309 insertions(+), 88 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 312f42fd955..dd95c0f83d1 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -3678,5 +3678,132 @@ "message" : [ "Failed to merge incompatible data types ${leftCatalogString} and ${rightCatalogString}" ] + }, + "_LEGACY_ERROR_TEMP_2126" : { + "message" : [ + "Unsuccessful attempt to build maps with <size> elements due to exceeding the map size limit <maxRoundedArrayLength>." + ] + }, + "_LEGACY_ERROR_TEMP_2127" : { + "message" : [ + "Duplicate map key <key> was found, please check the input data. If you want to remove the duplicated keys, you can set <mapKeyDedupPolicy> to <lastWin> so that the key inserted at last takes precedence." + ] + }, + "_LEGACY_ERROR_TEMP_2128" : { + "message" : [ + "The key array and value array of MapData must have the same length." + ] + }, + "_LEGACY_ERROR_TEMP_2129" : { + "message" : [ + "Conflict found: Field <field> <actual> differs from <field> <expected> derived from <candidate>" + ] + }, + "_LEGACY_ERROR_TEMP_2130" : { + "message" : [ + "Fail to recognize '<pattern>' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html" + ] + }, + "_LEGACY_ERROR_TEMP_2131" : { + "message" : [ + "Exception when registering StreamingQueryListener" + ] + }, + "_LEGACY_ERROR_TEMP_2132" : { + "message" : [ + "Parsing JSON arrays as structs is forbidden." + ] + }, + "_LEGACY_ERROR_TEMP_2133" : { + "message" : [ + "Cannot parse field name <fieldName>, field value <fieldValue>, [<token>] as target spark data type [<dataType>]." + ] + }, + "_LEGACY_ERROR_TEMP_2134" : { + "message" : [ + "Cannot parse field value <value> for pattern <pattern> as target spark data type [<dataType>]." + ] + }, + "_LEGACY_ERROR_TEMP_2135" : { + "message" : [ + "Failed to parse an empty string for data type <dataType>" + ] + }, + "_LEGACY_ERROR_TEMP_2136" : { + "message" : [ + "Failed to parse field name <fieldName>, field value <fieldValue>, [<token>] to target spark data type [<dataType>]." + ] + }, + "_LEGACY_ERROR_TEMP_2137" : { + "message" : [ + "Root converter returned null" + ] + }, + "_LEGACY_ERROR_TEMP_2138" : { + "message" : [ + "Cannot have circular references in bean class, but got the circular reference of class <clazz>" + ] + }, + "_LEGACY_ERROR_TEMP_2139" : { + "message" : [ + "cannot have circular references in class, but got the circular reference of class <t>" + ] + }, + "_LEGACY_ERROR_TEMP_2140" : { + "message" : [ + "`<fieldName>` is not a valid identifier of Java and cannot be used as field name", + "<walkedTypePath>" + ] + }, + "_LEGACY_ERROR_TEMP_2141" : { + "message" : [ + "No Encoder found for <tpe>", + "<walkedTypePath>" + ] + }, + "_LEGACY_ERROR_TEMP_2142" : { + "message" : [ + "Attributes for type <schema> is not supported" + ] + }, + "_LEGACY_ERROR_TEMP_2143" : { + "message" : [ + "Schema for type <tpe> is not supported" + ] + }, + "_LEGACY_ERROR_TEMP_2144" : { + "message" : [ + "Unable to find constructor for <tpe>. This could happen if <tpe> is an interface, or a trait without companion object constructor." + ] + }, + "_LEGACY_ERROR_TEMP_2145" : { + "message" : [ + "<paramName> cannot be more than one character" + ] + }, + "_LEGACY_ERROR_TEMP_2146" : { + "message" : [ + "<paramName> should be an integer. Found <value>" + ] + }, + "_LEGACY_ERROR_TEMP_2147" : { + "message" : [ + "<paramName> flag can be true or false" + ] + }, + "_LEGACY_ERROR_TEMP_2148" : { + "message" : [ + "null value found but field <name> is not nullable." + ] + }, + "_LEGACY_ERROR_TEMP_2149" : { + "message" : [ + "Malformed CSV record" + ] + }, + "_LEGACY_ERROR_TEMP_2150" : { + "message" : [ + "Due to Scala's limited support of tuple, tuple with more than 22 elements are not supported." + ] } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index 6e121414017..392b9bf6c72 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -1332,26 +1332,43 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { cause = null) } - def exceedMapSizeLimitError(size: Int): Throwable = { - new RuntimeException(s"Unsuccessful attempt to build maps with $size elements " + - s"due to exceeding the map size limit ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}.") + def exceedMapSizeLimitError(size: Int): SparkRuntimeException = { + new SparkRuntimeException( + errorClass = "_LEGACY_ERROR_TEMP_2126", + messageParameters = Map( + "size" -> size.toString(), + "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString())) } - def duplicateMapKeyFoundError(key: Any): Throwable = { - new RuntimeException(s"Duplicate map key $key was found, please check the input " + - "data. If you want to remove the duplicated keys, you can set " + - s"${SQLConf.MAP_KEY_DEDUP_POLICY.key} to ${SQLConf.MapKeyDedupPolicy.LAST_WIN} so that " + - "the key inserted at last takes precedence.") + def duplicateMapKeyFoundError(key: Any): SparkRuntimeException = { + new SparkRuntimeException( + errorClass = "_LEGACY_ERROR_TEMP_2127", + messageParameters = Map( + "key" -> key.toString(), + "mapKeyDedupPolicy" -> toSQLConf(SQLConf.MAP_KEY_DEDUP_POLICY.key), + "lastWin" -> toSQLConf(SQLConf.MapKeyDedupPolicy.LAST_WIN.toString()))) } - def mapDataKeyArrayLengthDiffersFromValueArrayLengthError(): Throwable = { - new RuntimeException("The key array and value array of MapData must have the same length.") + def mapDataKeyArrayLengthDiffersFromValueArrayLengthError(): SparkRuntimeException = { + new SparkRuntimeException( + errorClass = "_LEGACY_ERROR_TEMP_2128", + messageParameters = Map.empty) } def fieldDiffersFromDerivedLocalDateError( - field: ChronoField, actual: Int, expected: Int, candidate: LocalDate): Throwable = { - new DateTimeException(s"Conflict found: Field $field $actual differs from" + - s" $field $expected derived from $candidate") + field: ChronoField, + actual: Int, + expected: Int, + candidate: LocalDate): SparkDateTimeException = { + new SparkDateTimeException( + errorClass = "_LEGACY_ERROR_TEMP_2129", + messageParameters = Map( + "field" -> field.toString(), + "actual" -> actual.toString(), + "expected" -> expected.toString(), + "candidate" -> candidate.toString()), + context = Array.empty, + summary = "") } def failToParseDateTimeInNewParserError(s: String, e: Throwable): Throwable = { @@ -1372,15 +1389,18 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { e) } - def failToRecognizePatternError(pattern: String, e: Throwable): Throwable = { - new RuntimeException(s"Fail to recognize '$pattern' pattern in the" + - " DateTimeFormatter. You can form a valid datetime pattern" + - " with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html", - e) + def failToRecognizePatternError(pattern: String, e: Throwable): SparkRuntimeException = { + new SparkRuntimeException( + errorClass = "_LEGACY_ERROR_TEMP_2130", + messageParameters = Map("pattern" -> pattern), + cause = e) } def registeringStreamingQueryListenerError(e: Exception): Throwable = { - new SparkException("Exception when registering StreamingQueryListener", e) + new SparkException( + errorClass = "_LEGACY_ERROR_TEMP_2131", + messageParameters = Map.empty, + cause = e) } def concurrentQueryInstanceError(): Throwable = { @@ -1389,105 +1409,149 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { messageParameters = Map.empty[String, String]) } - def cannotParseJsonArraysAsStructsError(): Throwable = { - new RuntimeException("Parsing JSON arrays as structs is forbidden.") + def cannotParseJsonArraysAsStructsError(): SparkRuntimeException = { + new SparkRuntimeException( + errorClass = "_LEGACY_ERROR_TEMP_2132", + messageParameters = Map.empty) } def cannotParseStringAsDataTypeError(parser: JsonParser, token: JsonToken, dataType: DataType) - : Throwable = { - new RuntimeException( - s"Cannot parse field name ${parser.getCurrentName}, " + - s"field value ${parser.getText}, " + - s"[$token] as target spark data type [$dataType].") + : SparkRuntimeException = { + new SparkRuntimeException( + errorClass = "_LEGACY_ERROR_TEMP_2133", + messageParameters = Map( + "fieldName" -> parser.getCurrentName, + "fieldValue" -> parser.getText, + "token" -> token.toString(), + "dataType" -> dataType.toString())) } def cannotParseStringAsDataTypeError(pattern: String, value: String, dataType: DataType) - : Throwable = { - new RuntimeException( - s"Cannot parse field value ${toSQLValue(value, StringType)} " + - s"for pattern ${toSQLValue(pattern, StringType)} " + - s"as target spark data type [$dataType].") + : SparkRuntimeException = { + new SparkRuntimeException( + errorClass = "_LEGACY_ERROR_TEMP_2134", + messageParameters = Map( + "value" -> toSQLValue(value, StringType), + "pattern" -> toSQLValue(pattern, StringType), + "dataType" -> dataType.toString())) } - def failToParseEmptyStringForDataTypeError(dataType: DataType): Throwable = { - new RuntimeException( - s"Failed to parse an empty string for data type ${dataType.catalogString}") + def failToParseEmptyStringForDataTypeError(dataType: DataType): SparkRuntimeException = { + new SparkRuntimeException( + errorClass = "_LEGACY_ERROR_TEMP_2135", + messageParameters = Map( + "dataType" -> dataType.catalogString)) } def failToParseValueForDataTypeError(parser: JsonParser, token: JsonToken, dataType: DataType) - : Throwable = { - new RuntimeException( - s"Failed to parse field name ${parser.getCurrentName}, " + - s"field value ${parser.getText}, " + - s"[$token] to target spark data type [$dataType].") + : SparkRuntimeException = { + new SparkRuntimeException( + errorClass = "_LEGACY_ERROR_TEMP_2136", + messageParameters = Map( + "fieldName" -> parser.getCurrentName.toString(), + "fieldValue" -> parser.getText.toString(), + "token" -> token.toString(), + "dataType" -> dataType.toString())) } - def rootConverterReturnNullError(): Throwable = { - new RuntimeException("Root converter returned null") + def rootConverterReturnNullError(): SparkRuntimeException = { + new SparkRuntimeException( + errorClass = "_LEGACY_ERROR_TEMP_2137", + messageParameters = Map.empty) } - def cannotHaveCircularReferencesInBeanClassError(clazz: Class[_]): Throwable = { - new UnsupportedOperationException( - "Cannot have circular references in bean class, but got the circular reference " + - s"of class $clazz") + def cannotHaveCircularReferencesInBeanClassError( + clazz: Class[_]): SparkUnsupportedOperationException = { + new SparkUnsupportedOperationException( + errorClass = "_LEGACY_ERROR_TEMP_2138", + messageParameters = Map("clazz" -> clazz.toString())) } - def cannotHaveCircularReferencesInClassError(t: String): Throwable = { - new UnsupportedOperationException( - s"cannot have circular references in class, but got the circular reference of class $t") + def cannotHaveCircularReferencesInClassError(t: String): SparkUnsupportedOperationException = { + new SparkUnsupportedOperationException( + errorClass = "_LEGACY_ERROR_TEMP_2139", + messageParameters = Map("t" -> t)) } def cannotUseInvalidJavaIdentifierAsFieldNameError( - fieldName: String, walkedTypePath: WalkedTypePath): Throwable = { - new UnsupportedOperationException(s"`$fieldName` is not a valid identifier of " + - s"Java and cannot be used as field name\n$walkedTypePath") + fieldName: String, walkedTypePath: WalkedTypePath): SparkUnsupportedOperationException = { + new SparkUnsupportedOperationException( + errorClass = "_LEGACY_ERROR_TEMP_2140", + messageParameters = Map( + "fieldName" -> fieldName, + "walkedTypePath" -> walkedTypePath.toString())) } def cannotFindEncoderForTypeError( - tpe: String, walkedTypePath: WalkedTypePath): Throwable = { - new UnsupportedOperationException(s"No Encoder found for $tpe\n$walkedTypePath") + tpe: String, walkedTypePath: WalkedTypePath): SparkUnsupportedOperationException = { + new SparkUnsupportedOperationException( + errorClass = "_LEGACY_ERROR_TEMP_2141", + messageParameters = Map( + "tpe" -> tpe, + "walkedTypePath" -> walkedTypePath.toString())) } - def attributesForTypeUnsupportedError(schema: Schema): Throwable = { - new UnsupportedOperationException(s"Attributes for type $schema is not supported") + def attributesForTypeUnsupportedError(schema: Schema): SparkUnsupportedOperationException = { + new SparkUnsupportedOperationException( + errorClass = "_LEGACY_ERROR_TEMP_2142", + messageParameters = Map( + "schema" -> schema.toString())) } - def schemaForTypeUnsupportedError(tpe: String): Throwable = { - new UnsupportedOperationException(s"Schema for type $tpe is not supported") + def schemaForTypeUnsupportedError(tpe: String): SparkUnsupportedOperationException = { + new SparkUnsupportedOperationException( + errorClass = "_LEGACY_ERROR_TEMP_2143", + messageParameters = Map( + "tpe" -> tpe)) } - def cannotFindConstructorForTypeError(tpe: String): Throwable = { - new UnsupportedOperationException( - s""" - |Unable to find constructor for $tpe. - |This could happen if $tpe is an interface, or a trait without companion object - |constructor. - """.stripMargin.replaceAll("\n", " ")) + def cannotFindConstructorForTypeError(tpe: String): SparkUnsupportedOperationException = { + new SparkUnsupportedOperationException( + errorClass = "_LEGACY_ERROR_TEMP_2144", + messageParameters = Map( + "tpe" -> tpe)) } - def paramExceedOneCharError(paramName: String): Throwable = { - new RuntimeException(s"$paramName cannot be more than one character") + def paramExceedOneCharError(paramName: String): SparkRuntimeException = { + new SparkRuntimeException( + errorClass = "_LEGACY_ERROR_TEMP_2145", + messageParameters = Map( + "paramName" -> paramName)) } - def paramIsNotIntegerError(paramName: String, value: String): Throwable = { - new RuntimeException(s"$paramName should be an integer. Found ${toSQLValue(value, StringType)}") + def paramIsNotIntegerError(paramName: String, value: String): SparkRuntimeException = { + new SparkRuntimeException( + errorClass = "_LEGACY_ERROR_TEMP_2146", + messageParameters = Map( + "paramName" -> paramName, + "value" -> value)) } def paramIsNotBooleanValueError(paramName: String): Throwable = { - new Exception(s"$paramName flag can be true or false") + new SparkException( + errorClass = "_LEGACY_ERROR_TEMP_2147", + messageParameters = Map( + "paramName" -> paramName), + cause = null) } - def foundNullValueForNotNullableFieldError(name: String): Throwable = { - new RuntimeException(s"null value found but field $name is not nullable.") + def foundNullValueForNotNullableFieldError(name: String): SparkRuntimeException = { + new SparkRuntimeException( + errorClass = "_LEGACY_ERROR_TEMP_2148", + messageParameters = Map( + "name" -> name)) } - def malformedCSVRecordError(): Throwable = { - new RuntimeException("Malformed CSV record") + def malformedCSVRecordError(): SparkRuntimeException = { + new SparkRuntimeException( + errorClass = "_LEGACY_ERROR_TEMP_2149", + messageParameters = Map.empty) } - def elementsOfTupleExceedLimitError(): Throwable = { - new UnsupportedOperationException("Due to Scala's limited support of tuple, " + - "tuple with more than 22 elements are not supported.") + def elementsOfTupleExceedLimitError(): SparkUnsupportedOperationException = { + new SparkUnsupportedOperationException( + errorClass = "_LEGACY_ERROR_TEMP_2150", + messageParameters = Map.empty) } def expressionDecodingError(e: Exception, expressions: Seq[Expression]): Throwable = { diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out index a373c1f513a..5e04562a648 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out @@ -890,8 +890,13 @@ select to_timestamp('2019-10-06 A', 'yyyy-MM-dd GGGGG') -- !query schema struct<> -- !query output -java.lang.RuntimeException -Fail to recognize 'yyyy-MM-dd GGGGG' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html +org.apache.spark.SparkRuntimeException +{ + "errorClass" : "_LEGACY_ERROR_TEMP_2130", + "messageParameters" : { + "pattern" : "yyyy-MM-dd GGGGG" + } +} -- !query @@ -899,8 +904,13 @@ select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEEE') -- !query schema struct<> -- !query output -java.lang.RuntimeException -Fail to recognize 'dd MM yyyy EEEEEE' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html +org.apache.spark.SparkRuntimeException +{ + "errorClass" : "_LEGACY_ERROR_TEMP_2130", + "messageParameters" : { + "pattern" : "dd MM yyyy EEEEEE" + } +} -- !query @@ -908,8 +918,13 @@ select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE') -- !query schema struct<> -- !query output -java.lang.RuntimeException -Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html +org.apache.spark.SparkRuntimeException +{ + "errorClass" : "_LEGACY_ERROR_TEMP_2130", + "messageParameters" : { + "pattern" : "dd MM yyyy EEEEE" + } +} -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out index c831b75c681..8d98209e625 100644 --- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out @@ -867,8 +867,13 @@ select to_timestamp('2019-10-06 A', 'yyyy-MM-dd GGGGG') -- !query schema struct<> -- !query output -java.lang.RuntimeException -Fail to recognize 'yyyy-MM-dd GGGGG' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html +org.apache.spark.SparkRuntimeException +{ + "errorClass" : "_LEGACY_ERROR_TEMP_2130", + "messageParameters" : { + "pattern" : "yyyy-MM-dd GGGGG" + } +} -- !query @@ -876,8 +881,13 @@ select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEEE') -- !query schema struct<> -- !query output -java.lang.RuntimeException -Fail to recognize 'dd MM yyyy EEEEEE' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html +org.apache.spark.SparkRuntimeException +{ + "errorClass" : "_LEGACY_ERROR_TEMP_2130", + "messageParameters" : { + "pattern" : "dd MM yyyy EEEEEE" + } +} -- !query @@ -885,8 +895,13 @@ select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE') -- !query schema struct<> -- !query output -java.lang.RuntimeException -Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html +org.apache.spark.SparkRuntimeException +{ + "errorClass" : "_LEGACY_ERROR_TEMP_2130", + "messageParameters" : { + "pattern" : "dd MM yyyy EEEEE" + } +} -- !query --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org