This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 6bb68b5f75a [SPARK-41295][SPARK-41296][SQL] Rename the error classes 6bb68b5f75a is described below commit 6bb68b5f75ac883423a68583750258656f381c33 Author: narek_karapetian <narek.karapetia...@yandex.ru> AuthorDate: Mon Feb 6 00:04:40 2023 +0300 [SPARK-41295][SPARK-41296][SQL] Rename the error classes ### What changes were proposed in this pull request? In the PR, I propose to assign the proper names to the legacy error classes: - `_LEGACY_ERROR_TEMP_1105` -> `INVALID_EXTRACT_FIELD_TYPE` - `_LEGACY_ERROR_TEMP_1106` -> `INVALID_EXTRACT_BASE_FIELD_TYPE` - `_LEGACY_ERROR_TEMP_1209` -> `AMBIGUOUS_REFERENCE_TO_FIELDS` Changed error messages for: - `_LEGACY_ERROR_TEMP_1106` from `Can't extract value from <child>: need struct type but got <other>.` to `"Can't extract a value from <base>. Need a complex type [struct, array, map] but got <other>."` - `_LEGACY_ERROR_TEMP_1209` from `"Ambiguous reference to fields <fields>."` to `"Ambiguous reference to the field <field>. It appears <count> times in the schema."` - `_LEGACY_ERROR_TEMP_1106` from `"Field name should be String Literal, but it's <extraction>."` to `"Field name should be a non-null string literal, but it's <extraction>."` and modify test suite to use checkError() which checks the error class name, context and etc. **Also this PR contains an additional change (AMBIGUOUS_REFERENCE_TO_FIELDS) which is not tracked in JIRA.** ### Why are the changes needed? Proper name improves user experience w/ Spark SQL. fix a bug, you can clarify why it is a bug. ### Does this PR introduce _any_ user-facing change? Yes, the PR changes an user-facing error message. ### How was this patch tested? Added additional test cases in `org.apache.spark.sql.errors.QueryCompilationErrorsSuite` Closes #39501 from NarekDW/SPARK-41295. Authored-by: narek_karapetian <narek.karapetia...@yandex.ru> Signed-off-by: Max Gekk <max.g...@gmail.com> --- core/src/main/resources/error/error-classes.json | 33 +++++----- .../expressions/complexTypeExtractors.scala | 6 +- .../spark/sql/errors/QueryCompilationErrors.scala | 16 ++--- .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 20 ++++-- .../catalyst/expressions/ComplexTypeSuite.scala | 21 ------ .../apache/spark/sql/ColumnExpressionSuite.scala | 76 ++++++++++++++-------- .../sql/errors/QueryCompilationErrorsSuite.scala | 69 +++++++++++++++++++- .../sql/hive/execution/HiveResolutionSuite.scala | 12 ++-- 8 files changed, 167 insertions(+), 86 deletions(-) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index 7ecd924ea8d..afabc56a431 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -17,6 +17,12 @@ ], "sqlState" : "42704" }, + "AMBIGUOUS_REFERENCE_TO_FIELDS" : { + "message" : [ + "Ambiguous reference to the field <field>. It appears <count> times in the schema." + ], + "sqlState" : "42000" + }, "ARITHMETIC_OVERFLOW" : { "message" : [ "<message>.<alternative> If necessary set <config> to \"false\" to bypass this error." @@ -750,12 +756,24 @@ ], "sqlState" : "42K05" }, + "INVALID_EXTRACT_BASE_FIELD_TYPE" : { + "message" : [ + "Can't extract a value from <base>. Need a complex type [STRUCT, ARRAY, MAP] but got <other>." + ], + "sqlState" : "42000" + }, "INVALID_EXTRACT_FIELD" : { "message" : [ "Cannot extract <field> from <expr>." ], "sqlState" : "42601" }, + "INVALID_EXTRACT_FIELD_TYPE" : { + "message" : [ + "Field name should be a non-null string literal, but it's <extraction>." + ], + "sqlState" : "42000" + }, "INVALID_FIELD_NAME" : { "message" : [ "Field name <fieldName> is invalid: <path> is not a struct." @@ -2491,16 +2509,6 @@ "The second argument should be a double literal." ] }, - "_LEGACY_ERROR_TEMP_1105" : { - "message" : [ - "Field name should be String Literal, but it's <extraction>." - ] - }, - "_LEGACY_ERROR_TEMP_1106" : { - "message" : [ - "Can't extract value from <child>: need struct type but got <other>." - ] - }, "_LEGACY_ERROR_TEMP_1107" : { "message" : [ "Table <table> declares <batchWrite> capability but <v2WriteClassName> is not an instance of <v1WriteClassName>." @@ -2972,11 +2980,6 @@ "The duration and time inputs to window must be an integer, long or string literal." ] }, - "_LEGACY_ERROR_TEMP_1209" : { - "message" : [ - "Ambiguous reference to fields <fields>." - ] - }, "_LEGACY_ERROR_TEMP_1210" : { "message" : [ "The second argument in <funcName> should be a boolean literal." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala index d0ef5365bc9..e22af21daaa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala @@ -64,7 +64,7 @@ object ExtractValue { case (_: ArrayType, _) => GetArrayItem(child, extraction) - case (MapType(kt, _, _), _) => GetMapValue(child, extraction) + case (MapType(_, _, _), _) => GetMapValue(child, extraction) case (otherType, _) => throw QueryCompilationErrors.dataTypeUnsupportedByExtractValueError( @@ -82,8 +82,8 @@ object ExtractValue { if (ordinal == -1) { throw QueryCompilationErrors.noSuchStructFieldInGivenFieldsError(fieldName, fields) } else if (fields.indexWhere(checkField, ordinal + 1) != -1) { - throw QueryCompilationErrors.ambiguousReferenceToFieldsError( - fields.filter(checkField).mkString(", ")) + val numberOfAppearance = fields.count(checkField) + throw QueryCompilationErrors.ambiguousReferenceToFieldsError(fieldName, numberOfAppearance) } else { ordinal } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 0bc3b2fc4f6..de0a0124767 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -1117,14 +1117,14 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { dataType match { case StructType(_) => new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1105", - messageParameters = Map("extraction" -> extraction.toString)) + errorClass = "INVALID_EXTRACT_FIELD_TYPE", + messageParameters = Map("extraction" -> toSQLExpr(extraction))) case other => new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1106", + errorClass = "INVALID_EXTRACT_BASE_FIELD_TYPE", messageParameters = Map( - "child" -> child.toString, - "other" -> other.catalogString)) + "base" -> toSQLExpr(child), + "other" -> toSQLType(other))) } } @@ -2099,10 +2099,10 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { "fields" -> fields.map(f => toSQLId(f.name)).mkString(", "))) } - def ambiguousReferenceToFieldsError(fields: String): Throwable = { + def ambiguousReferenceToFieldsError(field: String, numberOfAppearance: Int): Throwable = { new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1209", - messageParameters = Map("fields" -> fields)) + errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS", + messageParameters = Map("field" -> toSQLId(field), "count" -> numberOfAppearance.toString)) } def secondArgumentInFunctionIsNotBooleanLiteralError(funcName: String): Throwable = { diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala index 71d3deb36c2..cbd6749807f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala @@ -370,17 +370,25 @@ class AnalysisErrorSuite extends AnalysisTest { "expressionAnyValue" -> "\"any_value(b)\"") ) - errorTest( + errorClassTest( "ambiguous field", nestedRelation.select($"top.duplicateField"), - "Ambiguous reference to fields" :: "duplicateField" :: Nil, - caseSensitive = false) + errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS", + messageParameters = Map( + "field" -> "`duplicateField`", + "count" -> "2"), + caseSensitive = false + ) - errorTest( + errorClassTest( "ambiguous field due to case insensitivity", nestedRelation.select($"top.differentCase"), - "Ambiguous reference to fields" :: "differentCase" :: "differentcase" :: Nil, - caseSensitive = false) + errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS", + messageParameters = Map( + "field" -> "`differentCase`", + "count" -> "2"), + caseSensitive = false + ) errorClassTest( "missing field", diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala index ecff40c0b3b..8dbca473cfb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala @@ -462,27 +462,6 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper { 1, create_row(create_row(1))) } - test("error message of ExtractValue") { - val structType = StructType(StructField("a", StringType, true) :: Nil) - val otherType = StringType - - def checkErrorMessage( - childDataType: DataType, - fieldDataType: DataType, - errorMessage: String): Unit = { - val e = intercept[org.apache.spark.sql.AnalysisException] { - ExtractValue( - Literal.create(null, childDataType), - Literal.create(null, fieldDataType), - _ == _) - } - assert(e.getMessage().contains(errorMessage)) - } - - checkErrorMessage(structType, IntegerType, "Field name should be String Literal") - checkErrorMessage(otherType, StringType, "Can't extract value from") - } - test("ensure to preserve metadata") { val metadata = new MetadataBuilder() .putString("key", "value") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala index 718405bd8ac..b93b643cbb8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala @@ -1087,17 +1087,22 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { } test("withField should throw an exception if intermediate field reference is ambiguous") { - intercept[AnalysisException] { - val structLevel2: DataFrame = spark.createDataFrame( - sparkContext.parallelize(Row(Row(Row(1, null, 3), 4)) :: Nil), - StructType(Seq( - StructField("a", StructType(Seq( - StructField("a", structType, nullable = false), - StructField("a", structType, nullable = false))), - nullable = false)))) + checkError( + exception = intercept[AnalysisException] { + val structLevel2: DataFrame = spark.createDataFrame( + sparkContext.parallelize(Row(Row(Row(1, null, 3), 4)) :: Nil), + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", structType, nullable = false), + StructField("a", structType, nullable = false))), + nullable = false)))) - structLevel2.withColumn("a", $"a".withField("a.b", lit(2))) - }.getMessage should include("Ambiguous reference to fields") + structLevel2.withColumn("a", $"a".withField("a.b", lit(2))) + }, + errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS", + sqlState = "42000", + parameters = Map("field" -> "`a`", "count" -> "2") + ) } test("withField should add field with no name") { @@ -1647,10 +1652,15 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { .select($"struct_col".withField("a.c", lit(3))), Row(Row(Row(1, 2, 3)))) - intercept[AnalysisException] { - sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col") - .select($"struct_col".withField("a.c", lit(3))) - }.getMessage should include("Ambiguous reference to fields") + checkError( + exception = intercept[AnalysisException] { + sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col") + .select($"struct_col".withField("a.c", lit(3))) + }, + errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS", + sqlState = "42000", + parameters = Map("field" -> "`a`", "count" -> "2") + ) checkAnswer( sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col") @@ -1862,17 +1872,22 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { } test("dropFields should throw an exception if intermediate field reference is ambiguous") { - intercept[AnalysisException] { - val structLevel2: DataFrame = spark.createDataFrame( - sparkContext.parallelize(Row(Row(Row(1, null, 3), 4)) :: Nil), - StructType(Seq( - StructField("a", StructType(Seq( - StructField("a", structType, nullable = false), - StructField("a", structType, nullable = false))), - nullable = false)))) + checkError( + exception = intercept[AnalysisException] { + val structLevel2: DataFrame = spark.createDataFrame( + sparkContext.parallelize(Row(Row(Row(1, null, 3), 4)) :: Nil), + StructType(Seq( + StructField("a", StructType(Seq( + StructField("a", structType, nullable = false), + StructField("a", structType, nullable = false))), + nullable = false)))) - structLevel2.withColumn("a", $"a".dropFields("a.b")) - }.getMessage should include("Ambiguous reference to fields") + structLevel2.withColumn("a", $"a".dropFields("a.b")) + }, + errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS", + sqlState = "42000", + parameters = Map("field" -> "`a`", "count" -> "2") + ) } test("dropFields should drop field in struct") { @@ -2208,10 +2223,15 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession { .select($"struct_col".dropFields("a.b")), Row(Row(Row(1)))) - intercept[AnalysisException] { - sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col") - .select($"struct_col".dropFields("a.c")) - }.getMessage should include("Ambiguous reference to fields") + checkError( + exception = intercept[AnalysisException] { + sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col") + .select($"struct_col".dropFields("a.c")) + }, + errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS", + sqlState = "42000", + parameters = Map("field" -> "`a`", "count" -> "2") + ) checkAnswer( sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2, 'c', 3)) struct_col") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala index 55964afba74..47c4a86ccc5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala @@ -21,7 +21,7 @@ import org.apache.spark.sql.{AnalysisException, ClassData, IntegratedUDFTestUtil import org.apache.spark.sql.api.java.{UDF1, UDF2, UDF23Test} import org.apache.spark.sql.catalyst.parser.ParseException import org.apache.spark.sql.expressions.SparkUserDefinedFunction -import org.apache.spark.sql.functions.{from_json, grouping, grouping_id, lit, struct, sum, udf} +import org.apache.spark.sql.functions.{array, from_json, grouping, grouping_id, lit, struct, sum, udf} import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSparkSession import org.apache.spark.sql.types.{IntegerType, MapType, StringType, StructField, StructType} @@ -715,6 +715,73 @@ class QueryCompilationErrorsSuite ) } } + + test("AMBIGUOUS_REFERENCE_TO_FIELDS: select ambiguous field from struct") { + val data = Seq( + Row(Row("test1", "test1")), + Row(Row("test2", "test2"))) + + val schema = new StructType() + .add("name", + new StructType() + .add("firstname", StringType) + .add("firstname", StringType)) + + val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema) + + checkError( + exception = intercept[AnalysisException] { + df.select($"name.firstname") + }, + errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS", + sqlState = "42000", + parameters = Map("field" -> "`firstname`", "count" -> "2") + ) + } + + test("INVALID_EXTRACT_BASE_FIELD_TYPE: select ambiguous field from struct") { + val df = Seq("test").toDF("firstname") + + checkError( + exception = intercept[AnalysisException] { + df.select($"firstname.test_field") + }, + errorClass = "INVALID_EXTRACT_BASE_FIELD_TYPE", + sqlState = "42000", + parameters = Map("base" -> "\"firstname\"", "other" -> "\"STRING\"")) + } + + test("INVALID_EXTRACT_FIELD_TYPE: extract not string literal field") { + val structureData = Seq( + Row(Row(Row("test1", "test1"))), + Row(Row(Row("test2", "test2")))) + + val structureSchema = new StructType() + .add("name", + new StructType() + .add("inner", + new StructType() + .add("firstname", StringType) + .add("lastname", StringType))) + + val df = spark.createDataFrame(spark.sparkContext.parallelize(structureData), structureSchema) + + checkError( + exception = intercept[AnalysisException] { + df.select(struct($"name"(struct("test")))) + }, + errorClass = "INVALID_EXTRACT_FIELD_TYPE", + sqlState = "42000", + parameters = Map("extraction" -> "\"struct(test)\"")) + + checkError( + exception = intercept[AnalysisException] { + df.select($"name"(array("test"))) + }, + errorClass = "INVALID_EXTRACT_FIELD_TYPE", + sqlState = "42000", + parameters = Map("extraction" -> "\"array(test)\"")) + } } class MyCastToString extends SparkUserDefinedFunction( diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala index 72cfabb9246..8e2a5a66172 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala @@ -44,10 +44,14 @@ class HiveResolutionSuite extends HiveComparisonTest { .createOrReplaceTempView("nested") // there are 2 filed matching field name "b", we should report Ambiguous reference error - val exception = intercept[AnalysisException] { - sql("SELECT a[0].b from nested").queryExecution.analyzed - } - assert(exception.getMessage.contains("Ambiguous reference to fields")) + checkError( + exception = intercept[AnalysisException] { + sql("SELECT a[0].b from nested").queryExecution.analyzed + }, + errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS", + sqlState = "42000", + parameters = Map("field" -> "`b`", "count" -> "2") + ) } createQueryTest("table.attr", --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org