This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 5d840eb4553 [SPARK-44299][SQL] Assign names to the error class _LEGACY_ERROR_TEMP_227[4-6,8] 5d840eb4553 is described below commit 5d840eb455350ef3f6235a031a1689bf4a51007d Author: panbingkun <pbk1...@gmail.com> AuthorDate: Thu Jul 6 10:08:45 2023 +0300 [SPARK-44299][SQL] Assign names to the error class _LEGACY_ERROR_TEMP_227[4-6,8] ### What changes were proposed in this pull request? The pr aims to assign names to the error class, include: - _LEGACY_ERROR_TEMP_2274 => UNSUPPORTED_FEATURE.REPLACE_NESTED_COLUMN - _LEGACY_ERROR_TEMP_2275 => CANNOT_INVOKE_IN_TRANSFORMATIONS - _LEGACY_ERROR_TEMP_2276 => UNSUPPORTED_FEATURE .HIVE_WITH_ANSI_INTERVALS - _LEGACY_ERROR_TEMP_2278 => INVALID_FORMAT.MISMATCH_INPUT ### Why are the changes needed? The changes improve the error framework. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? - Update & Add new UT. - Manually test. - Pass GA. Closes #41858 from panbingkun/SPARK-44299. Authored-by: panbingkun <pbk1...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../src/main/resources/error/error-classes.json | 40 +++++++++++----------- ...-error-conditions-invalid-format-error-class.md | 4 +++ ...r-conditions-unsupported-feature-error-class.md | 8 +++++ docs/sql-error-conditions.md | 6 ++++ .../spark/sql/catalyst/util/ToNumberParser.scala | 4 +-- .../spark/sql/errors/QueryExecutionErrors.scala | 20 +++++------ .../expressions/StringExpressionsSuite.scala | 9 +++-- .../apache/spark/sql/execution/command/ddl.scala | 2 +- .../sql-tests/results/postgreSQL/numeric.sql.out | 10 +++--- .../results/postgreSQL/numeric.sql.out.java21 | 10 +++--- .../apache/spark/sql/DataFrameFunctionsSuite.scala | 13 +++++++ .../spark/sql/DataFrameNaFunctionsSuite.scala | 12 ++++--- .../spark/sql/hive/execution/HiveDDLSuite.scala | 2 +- .../command/AlterTableAddColumnsSuite.scala | 13 ++++--- 14 files changed, 101 insertions(+), 52 deletions(-) diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index 8bdb02470ef..44bec5e8ced 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -128,6 +128,11 @@ ], "sqlState" : "22546" }, + "CANNOT_INVOKE_IN_TRANSFORMATIONS" : { + "message" : [ + "Dataset transformations and actions can only be invoked by the driver, not inside of other Dataset transformations; for example, dataset1.map(x => dataset2.values.count() * x) is invalid because the values transformation and count action cannot be performed inside of the dataset1.map transformation. For more information, see SPARK-28702." + ] + }, "CANNOT_LOAD_FUNCTION_CLASS" : { "message" : [ "Cannot load class <className> when registering the function <functionName>, please make sure it is on the classpath." @@ -1192,6 +1197,11 @@ "The escape character is not allowed to precede <char>." ] }, + "MISMATCH_INPUT" : { + "message" : [ + "The input <inputType> '<input>' does not match the format." + ] + }, "THOUSANDS_SEPS_MUST_BEFORE_DEC" : { "message" : [ "Thousands separators (, or G) may not appear after the decimal point in the number format." @@ -2583,6 +2593,11 @@ "Drop the namespace <namespace>." ] }, + "HIVE_WITH_ANSI_INTERVALS" : { + "message" : [ + "Hive table <tableName> with ANSI intervals." + ] + }, "INSERT_PARTITION_SPEC_IF_NOT_EXISTS" : { "message" : [ "INSERT INTO <tableName> with IF NOT EXISTS in the PARTITION spec." @@ -2663,6 +2678,11 @@ "Remove a comment from the namespace <namespace>." ] }, + "REPLACE_NESTED_COLUMN" : { + "message" : [ + "The replace function does not support nested column <colName>." + ] + }, "SET_NAMESPACE_PROPERTY" : { "message" : [ "<property> is a reserved namespace property, <msg>." @@ -5627,31 +5647,11 @@ "<message>" ] }, - "_LEGACY_ERROR_TEMP_2274" : { - "message" : [ - "Nested field <colName> is not supported." - ] - }, - "_LEGACY_ERROR_TEMP_2275" : { - "message" : [ - "Dataset transformations and actions can only be invoked by the driver, not inside of other Dataset transformations; for example, dataset1.map(x => dataset2.values.count() * x) is invalid because the values transformation and count action cannot be performed inside of the dataset1.map transformation. For more information, see SPARK-28702." - ] - }, - "_LEGACY_ERROR_TEMP_2276" : { - "message" : [ - "Hive table <tableName> with ANSI intervals is not supported." - ] - }, "_LEGACY_ERROR_TEMP_2277" : { "message" : [ "Number of dynamic partitions created is <numWrittenParts>, which is more than <maxDynamicPartitions>. To solve this try to set <maxDynamicPartitionsKey> to at least <numWrittenParts>." ] }, - "_LEGACY_ERROR_TEMP_2278" : { - "message" : [ - "The input <valueType> '<input>' does not match the given number format: '<format>'." - ] - }, "_LEGACY_ERROR_TEMP_2320" : { "message" : [ "distinct aggregates are not allowed in observed metrics, but found: <sqlExpr>." diff --git a/docs/sql-error-conditions-invalid-format-error-class.md b/docs/sql-error-conditions-invalid-format-error-class.md index 365296ba29f..6470883435c 100644 --- a/docs/sql-error-conditions-invalid-format-error-class.md +++ b/docs/sql-error-conditions-invalid-format-error-class.md @@ -49,6 +49,10 @@ The escape character is not allowed to end with. The escape character is not allowed to precede `<char>`. +## MISMATCH_INPUT + +The input <inputType> '<input>' does not match the format. + ## THOUSANDS_SEPS_MUST_BEFORE_DEC Thousands separators (, or G) may not appear after the decimal point in the number format. diff --git a/docs/sql-error-conditions-unsupported-feature-error-class.md b/docs/sql-error-conditions-unsupported-feature-error-class.md index 232338bb12b..25f09118f74 100644 --- a/docs/sql-error-conditions-unsupported-feature-error-class.md +++ b/docs/sql-error-conditions-unsupported-feature-error-class.md @@ -69,6 +69,10 @@ DESC TABLE COLUMN for a specific partition. Drop the namespace `<namespace>`. +## HIVE_WITH_ANSI_INTERVALS + +Hive table <tableName> with ANSI intervals. + ## INSERT_PARTITION_SPEC_IF_NOT_EXISTS INSERT INTO `<tableName>` with IF NOT EXISTS in the PARTITION spec. @@ -117,6 +121,10 @@ Parameter markers are not allowed in `<statement>`. Invalid partitioning: `<cols>` is missing or is in a map or array. +## REPLACE_NESTED_COLUMN + +The replace function does not support nested column <colName>. + ## PIVOT_AFTER_GROUP_BY PIVOT clause following a GROUP BY clause. Consider pushing the GROUP BY into a subquery. diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md index b069f694ff3..6396f7a08d3 100644 --- a/docs/sql-error-conditions.md +++ b/docs/sql-error-conditions.md @@ -153,6 +153,12 @@ Cannot convert SQL `<sqlColumn>` to Protobuf `<protobufColumn>` because `<data>` Cannot decode url : `<url>`. +### CANNOT_INVOKE_IN_TRANSFORMATIONS + +SQLSTATE: none assigned + +Dataset transformations and actions can only be invoked by the driver, not inside of other Dataset transformations; for example, dataset1.map(x => dataset2.values.count() * x) is invalid because the values transformation and count action cannot be performed inside of the dataset1.map transformation. For more information, see SPARK-28702. + ### CANNOT_LOAD_FUNCTION_CLASS SQLSTATE: none assigned diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala index 5099b3fdb4b..d56bca30a05 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala @@ -594,14 +594,14 @@ class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Seriali private def formatMatchFailure(input: UTF8String, originNumberFormat: String): Decimal = { if (errorOnFail) { throw QueryExecutionErrors.invalidNumberFormatError( - "string", input.toString, originNumberFormat) + StringType, input.toString, originNumberFormat) } null } private def formatMatchFailure(input: Decimal, originNumberFormat: String): UTF8String = { if (errorOnFail) { throw QueryExecutionErrors.invalidNumberFormatError( - "Decimal value", input.toString, originNumberFormat) + DecimalType.fromDecimal(input), input.toString, originNumberFormat) } null } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index eded5e6534f..157bb1b6192 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala @@ -2604,14 +2604,13 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { def nestedFieldUnsupportedError(colName: String): SparkUnsupportedOperationException = { new SparkUnsupportedOperationException( - errorClass = "_LEGACY_ERROR_TEMP_2274", - messageParameters = Map( - "colName" -> colName)) + errorClass = "UNSUPPORTED_FEATURE.REPLACE_NESTED_COLUMN", + messageParameters = Map("colName" -> toSQLId(colName))) } def transformationsAndActionsNotInvokedByDriverError(): Throwable = { new SparkException( - errorClass = "_LEGACY_ERROR_TEMP_2275", + errorClass = "CANNOT_INVOKE_IN_TRANSFORMATIONS", messageParameters = Map.empty, cause = null) } @@ -2683,10 +2682,11 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { "functionName" -> toSQLId("aes_encrypt"))) } - def hiveTableWithAnsiIntervalsError(tableName: String): SparkUnsupportedOperationException = { + def hiveTableWithAnsiIntervalsError( + table: TableIdentifier): SparkUnsupportedOperationException = { new SparkUnsupportedOperationException( - errorClass = "_LEGACY_ERROR_TEMP_2276", - messageParameters = Map("tableName" -> tableName)) + errorClass = "UNSUPPORTED_FEATURE.HIVE_WITH_ANSI_INTERVALS", + messageParameters = Map("tableName" -> toSQLId(table.nameParts))) } def cannotConvertOrcTimestampToTimestampNTZError(): Throwable = { @@ -2720,11 +2720,11 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase { } def invalidNumberFormatError( - valueType: String, input: String, format: String): SparkIllegalArgumentException = { + dataType: DataType, input: String, format: String): SparkIllegalArgumentException = { new SparkIllegalArgumentException( - errorClass = "_LEGACY_ERROR_TEMP_2278", + errorClass = "INVALID_FORMAT.MISMATCH_INPUT", messageParameters = Map( - "valueType" -> valueType, + "inputType" -> toSQLType(dataType), "input" -> input, "format" -> format)) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index f320012d131..006c4a78056 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -1474,8 +1474,13 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val toNumberExpr = ToNumber(Literal(str), Literal(format)) assert(toNumberExpr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess) - checkExceptionInExpression[SparkIllegalArgumentException]( - toNumberExpr, "does not match the given number format") + checkErrorInExpression[SparkIllegalArgumentException]( + toNumberExpr, + errorClass = "INVALID_FORMAT.MISMATCH_INPUT", + parameters = Map( + "inputType" -> "\"STRING\"", + "input" -> str, + "format" -> format)) val tryToNumberExpr = TryToNumber(Literal(str), Literal(format)) assert(tryToNumberExpr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala index 8acf52b1250..bbe0d3c0c83 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala @@ -994,7 +994,7 @@ object DDLUtils extends Logging { case HIVE_PROVIDER => val serde = table.storage.serde if (schema.exists(_.dataType.isInstanceOf[AnsiIntervalType])) { - throw hiveTableWithAnsiIntervalsError(table.identifier.toString) + throw hiveTableWithAnsiIntervalsError(table.identifier) } else if (serde == HiveSerDe.sourceToSerDe("orc").get.serde) { checkDataColNames("orc", schema) } else if (serde == HiveSerDe.sourceToSerDe("parquet").get.serde || diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out index 61b7a07631c..98159c947a2 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out @@ -4697,11 +4697,12 @@ struct<> -- !query output org.apache.spark.SparkIllegalArgumentException { - "errorClass" : "_LEGACY_ERROR_TEMP_2278", + "errorClass" : "INVALID_FORMAT.MISMATCH_INPUT", + "sqlState" : "42601", "messageParameters" : { "format" : "99G999G999", "input" : "-34,338,492", - "valueType" : "string" + "inputType" : "\"STRING\"" } } @@ -4797,11 +4798,12 @@ struct<> -- !query output org.apache.spark.SparkIllegalArgumentException { - "errorClass" : "_LEGACY_ERROR_TEMP_2278", + "errorClass" : "INVALID_FORMAT.MISMATCH_INPUT", + "sqlState" : "42601", "messageParameters" : { "format" : "999G999", "input" : "123456", - "valueType" : "string" + "inputType" : "\"STRING\"" } } diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out.java21 b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out.java21 index a1d9dbbc830..ad27a63837e 100644 --- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out.java21 +++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out.java21 @@ -4697,11 +4697,12 @@ struct<> -- !query output org.apache.spark.SparkIllegalArgumentException { - "errorClass" : "_LEGACY_ERROR_TEMP_2278", + "errorClass" : "INVALID_FORMAT.MISMATCH_INPUT", + "sqlState" : "42601", "messageParameters" : { "format" : "99G999G999", "input" : "-34,338,492", - "valueType" : "string" + "inputType" : "\"STRING\"" } } @@ -4797,11 +4798,12 @@ struct<> -- !query output org.apache.spark.SparkIllegalArgumentException { - "errorClass" : "_LEGACY_ERROR_TEMP_2278", + "errorClass" : "INVALID_FORMAT.MISMATCH_INPUT", + "sqlState" : "42601", "messageParameters" : { "format" : "999G999", "input" : "123456", - "valueType" : "string" + "inputType" : "\"STRING\"" } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala index e7277451e04..c28ee3d8483 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala @@ -5901,6 +5901,19 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession { checkAnswer(df2, Seq(Row(Row(1, 2)))) assert(df2.schema === expectedSchema) } + + test("CANNOT_INVOKE_IN_TRANSFORMATIONS - Dataset transformations and actions " + + "can only be invoked by the driver, not inside of other Dataset transformations") { + val df1 = Seq((1)).toDF("a") + val df2 = Seq((4, 5)).toDF("e", "f") + checkError( + exception = intercept[SparkException] { + df1.map(r => df2.count() * r.getInt(0)).collect() + }.getCause.asInstanceOf[SparkException], + errorClass = "CANNOT_INVOKE_IN_TRANSFORMATIONS", + parameters = Map.empty + ) + } } object DataFrameFunctionsSuite { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala index 14baa75d81b..fb4bad5b9fa 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql import scala.collection.JavaConverters._ +import org.apache.spark.SparkUnsupportedOperationException import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.{StringType, StructType} @@ -546,9 +547,12 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession { test("SPARK-34649: replace value of a nested column") { val df = createDFWithNestedColumns - val exception = intercept[UnsupportedOperationException] { - df.na.replace("c1.c1-1", Map("b1" ->"a1")) - } - assert(exception.getMessage.equals("Nested field c1.c1-1 is not supported.")) + checkError( + exception = intercept[SparkUnsupportedOperationException] { + df.na.replace("c1.c1-1", Map("b1" ->"a1")) + }, + errorClass = "UNSUPPORTED_FEATURE.REPLACE_NESTED_COLUMN", + parameters = Map("colName" -> "`c1`.`c1-1`") + ) } } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 8e5f0b8b507..692c2215fde 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala @@ -3311,7 +3311,7 @@ class HiveDDLSuite exception = intercept[SparkUnsupportedOperationException] { sql(sqlCmd) }, - errorClass = "_LEGACY_ERROR_TEMP_2276", + errorClass = "UNSUPPORTED_FEATURE.HIVE_WITH_ANSI_INTERVALS", parameters = Map("tableName" -> s"`$SESSION_CATALOG_NAME`.`default`.`$tbl`") ) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddColumnsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddColumnsSuite.scala index 2b28890eb45..3ae2ff562d1 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddColumnsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddColumnsSuite.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.hive.execution.command +import org.apache.spark.SparkUnsupportedOperationException +import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLId import org.apache.spark.sql.execution.command.v1 /** @@ -29,10 +31,13 @@ class AlterTableAddColumnsSuite test("SPARK-36949: Disallow tables with ANSI intervals when the provider is Hive") { def check(tbl: String): Unit = { - val errMsg = intercept[UnsupportedOperationException] { - sql(s"ALTER TABLE $tbl ADD COLUMNS (ym INTERVAL YEAR)") - }.getMessage - assert(errMsg.contains("ANSI intervals is not supported")) + checkError( + exception = intercept[SparkUnsupportedOperationException] { + sql(s"ALTER TABLE $tbl ADD COLUMNS (ym INTERVAL YEAR)") + }, + errorClass = "UNSUPPORTED_FEATURE.HIVE_WITH_ANSI_INTERVALS", + parameters = Map("tableName" -> toSQLId(tbl)) + ) } withNamespaceAndTable("ns", "tbl") { tbl => sql(s"CREATE TABLE $tbl (id INT) $defaultUsing") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org