This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 0ef7afe0dc3 [SPARK-41931][SQL] Better error message for incomplete complex type definition 0ef7afe0dc3 is described below commit 0ef7afe0dc3723b97b750c071a908f363e514a26 Author: Runyao Chen <runyao.c...@databricks.com> AuthorDate: Fri Jan 27 18:06:32 2023 +0300 [SPARK-41931][SQL] Better error message for incomplete complex type definition ### What changes were proposed in this pull request? This PR improves error messages for `ARRAY` / `MAP` / `STRUCT` types without element type specification. A new error class `INCOMPLETE_TYPE_DEFINITION` with subclasses (`ARRAY`, `MAP`, and `STRUCT`) is introduced. **Details** In the case where we do `CAST AS` or `CREATE` a complex type without specifying its element type, e.g. ``` CREATE TABLE t (col ARRAY) ``` `[UNSUPPORTED_DATATYPE] Unsupported data type "ARRAY"` error would be thrown, while we do support the `ARRAY` type and just require it to be typed. This PR proposes a better error message like ``` The definition of `ARRAY` type is incomplete. You must provide an element type. For example: `ARRAY<elementType>`. ``` ### Why are the changes needed? The previous error message for incomplete complex types is confusing. A `UNSUPPORTED_DATATYPE` error would be thrown, while we do support complex types. We just require complex types to have their element types specified. We need a clear error message with an example in this case. ### Does this PR introduce _any_ user-facing change? Yes, this PR changes the error message which is user-facing. Error message before this PR: ``` spark-sql> SELECT CAST(array(1, 2, 3) AS ARRAY); [UNSUPPORTED_DATATYPE] Unsupported data type "ARRAY"(line 1, pos 30) ``` Error message after this PR: ``` [INCOMPLETE_TYPE_DEFINITION.ARRAY] Incomplete complex type: The definition of `ARRAY` type is incomplete. You must provide an element type. For example: `ARRAY<elementType>`. ``` Similarly for MAP and STRUCT types. ### How was this patch tested? Added unit tests covering CAST and CREATE with ARRAY / STRUCT / MAP types and their nested combinations. Closes #39711 from RunyaoChen/better_error_msg_nested_type. Lead-authored-by: Runyao Chen <runyao.c...@databricks.com> Co-authored-by: RunyaoChen <runyao.c...@databricks.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- core/src/main/resources/error/error-classes.json | 23 +++++++ .../spark/sql/catalyst/parser/AstBuilder.scala | 2 + .../spark/sql/errors/QueryParsingErrors.scala | 21 +++++++ .../spark/sql/errors/QueryParsingErrorsSuite.scala | 72 ++++++++++++++++++++++ 4 files changed, 118 insertions(+) diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index e6876751a22..ae766de3e20 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -592,6 +592,29 @@ "Detected an incompatible DataSourceRegister. Please remove the incompatible library from classpath or upgrade it. Error: <message>" ] }, + "INCOMPLETE_TYPE_DEFINITION" : { + "message" : [ + "Incomplete complex type:" + ], + "subClass" : { + "ARRAY" : { + "message" : [ + "The definition of \"ARRAY\" type is incomplete. You must provide an element type. For example: \"ARRAY<elementType>\"." + ] + }, + "MAP" : { + "message" : [ + "The definition of \"MAP\" type is incomplete. You must provide a key type and a value type. For example: \"MAP<TIMESTAMP, INT>\"." + ] + }, + "STRUCT" : { + "message" : [ + "The definition of \"STRUCT\" type is incomplete. You must provide at least one field type. For example: \"STRUCT<Field1: INT>\"." + ] + } + }, + "sqlState" : "42K01" + }, "INCONSISTENT_BEHAVIOR_CROSS_VERSION" : { "message" : [ "You may get a different result due to the upgrading to" diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index c6e50f3f514..d2a1cb1eb16 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -2889,6 +2889,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit case ("interval", Nil) => CalendarIntervalType case (dt @ ("character" | "char" | "varchar"), Nil) => throw QueryParsingErrors.charTypeMissingLengthError(dt, ctx) + case (dt @ ("array" | "struct" | "map"), Nil) => + throw QueryParsingErrors.nestedTypeMissingElementTypeError(dt, ctx) case (dt, params) => val dtStr = if (params.nonEmpty) s"$dt(${params.mkString(",")})" else dt throw QueryParsingErrors.dataTypeUnsupportedError(dtStr, ctx) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala index 29766251abd..e54bbb9c9d1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala @@ -292,6 +292,27 @@ private[sql] object QueryParsingErrors extends QueryErrorsBase { ctx) } + def nestedTypeMissingElementTypeError( + dataType: String, ctx: PrimitiveDataTypeContext): Throwable = { + dataType match { + case "array" => + new ParseException( + errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY", + messageParameters = Map("elementType" -> "<INT>"), + ctx) + case "struct" => + new ParseException( + errorClass = "INCOMPLETE_TYPE_DEFINITION.STRUCT", + messageParameters = Map.empty, + ctx) + case "map" => + new ParseException( + errorClass = "INCOMPLETE_TYPE_DEFINITION.MAP", + messageParameters = Map.empty, + ctx) + } + } + def partitionTransformNotExpectedError( name: String, describe: String, ctx: ApplyTransformContext): Throwable = { new ParseException( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala index 71483534d40..b30998b6aa0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala @@ -546,4 +546,76 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession { start = 0, stop = 124)) } + + test("INCOMPLETE_TYPE_DEFINITION: array type definition is incomplete") { + // Cast simple array without specifying element type + checkError( + exception = parseException("SELECT CAST(array(1,2,3) AS ARRAY)"), + errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY", + sqlState = "42K01", + parameters = Map("elementType" -> "<INT>"), + context = ExpectedContext(fragment = "ARRAY", start = 28, stop = 32)) + // Cast array of array without specifying element type for inner array + checkError( + exception = parseException("SELECT CAST(array(array(3)) AS ARRAY<ARRAY>)"), + errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY", + sqlState = "42K01", + parameters = Map("elementType" -> "<INT>"), + context = ExpectedContext(fragment = "ARRAY", start = 37, stop = 41)) + // Create column of array type without specifying element type + checkError( + exception = parseException("CREATE TABLE tbl_120691 (col1 ARRAY)"), + errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY", + sqlState = "42K01", + parameters = Map("elementType" -> "<INT>"), + context = ExpectedContext(fragment = "ARRAY", start = 30, stop = 34)) + } + + test("INCOMPLETE_TYPE_DEFINITION: struct type definition is incomplete") { + // Cast simple struct without specifying field type + checkError( + exception = parseException("SELECT CAST(struct(1,2,3) AS STRUCT)"), + errorClass = "INCOMPLETE_TYPE_DEFINITION.STRUCT", + sqlState = "42K01", + context = ExpectedContext(fragment = "STRUCT", start = 29, stop = 34)) + // Cast array of struct without specifying field type in struct + checkError( + exception = parseException("SELECT CAST(array(struct(1,2)) AS ARRAY<STRUCT>)"), + errorClass = "INCOMPLETE_TYPE_DEFINITION.STRUCT", + sqlState = "42K01", + context = ExpectedContext(fragment = "STRUCT", start = 40, stop = 45)) + // Create column of struct type without specifying field type + checkError( + exception = parseException("CREATE TABLE tbl_120691 (col1 STRUCT)"), + errorClass = "INCOMPLETE_TYPE_DEFINITION.STRUCT", + sqlState = "42K01", + context = ExpectedContext(fragment = "STRUCT", start = 30, stop = 35)) + // Invalid syntax `STRUCT<INT>` without field name + checkError( + exception = parseException("SELECT CAST(struct(1,2,3) AS STRUCT<INT>)"), + errorClass = "PARSE_SYNTAX_ERROR", + sqlState = "42601", + parameters = Map("error" -> "'>'", "hint" -> "")) + } + + test("INCOMPLETE_TYPE_DEFINITION: map type definition is incomplete") { + // Cast simple map without specifying element type + checkError( + exception = parseException("SELECT CAST(map(1,'2') AS MAP)"), + errorClass = "INCOMPLETE_TYPE_DEFINITION.MAP", + sqlState = "42K01", + context = ExpectedContext(fragment = "MAP", start = 26, stop = 28)) + // Create column of map type without specifying key/value types + checkError( + exception = parseException("CREATE TABLE tbl_120691 (col1 MAP)"), + errorClass = "INCOMPLETE_TYPE_DEFINITION.MAP", + sqlState = "42K01", + context = ExpectedContext(fragment = "MAP", start = 30, stop = 32)) + // Invalid syntax `MAP<String>` with only key type + checkError( + exception = parseException("SELECT CAST(map('1',2) AS MAP<STRING>)"), + errorClass = "PARSE_SYNTAX_ERROR", + sqlState = "42601", + parameters = Map("error" -> "'>'", "hint" -> "")) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org