This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new e00f14ff521 [SPARK-41314][SQL] Assign a name to the error class 
`_LEGACY_ERROR_TEMP_1094`
e00f14ff521 is described below

commit e00f14ff5216e194fe39ef38d2c9414a22ef696a
Author: yangjie01 <yangji...@baidu.com>
AuthorDate: Thu Dec 1 11:49:42 2022 +0300

    [SPARK-41314][SQL] Assign a name to the error class 
`_LEGACY_ERROR_TEMP_1094`
    
    ### What changes were proposed in this pull request?
    This pr aims to rename error class `_LEGACY_ERROR_TEMP_1094` to 
`INVALID_SCHEMA.NON_STRUCT_TYPE`.
    
    ### Why are the changes needed?
    Proper names of error classes to improve user experience with Spark SQL.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Add new tests to check `INVALID_SCHEMA.NON_STRUCT_TYPE`
    
    Closes #38856 from LuciferYang/SPARK-41314.
    
    Authored-by: yangjie01 <yangji...@baidu.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 core/src/main/resources/error/error-classes.json   | 10 ++++----
 .../spark/sql/catalyst/expressions/ExprUtils.scala |  2 +-
 .../spark/sql/errors/QueryCompilationErrors.scala  |  9 ++++---
 .../resources/sql-tests/inputs/csv-functions.sql   |  1 +
 .../sql-tests/results/csv-functions.sql.out        | 22 ++++++++++++++++
 .../org/apache/spark/sql/CsvFunctionsSuite.scala   | 29 ++++++++++++++++++++++
 6 files changed, 64 insertions(+), 9 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index 65b6dc68d12..347b9a14862 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -782,6 +782,11 @@
           "The input expression must be string literal and not null."
         ]
       },
+      "NON_STRUCT_TYPE" : {
+        "message" : [
+          "The input expression should be evaluated to struct type, but got 
<dataType>."
+        ]
+      },
       "PARSE_ERROR" : {
         "message" : [
           "Cannot parse the schema:",
@@ -2211,11 +2216,6 @@
       "Cannot read table property '<key>' as it's corrupted.<details>."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1094" : {
-    "message" : [
-      "Schema should be struct type but got <dataType>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1097" : {
     "message" : [
       "The field for corrupt records must be string type and nullable."
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
index fbe3d5eb458..2fa970bac0c 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
@@ -46,7 +46,7 @@ object ExprUtils extends QueryErrorsBase {
   def evalSchemaExpr(exp: Expression): StructType = {
     val dataType = evalTypeExpr(exp)
     if (!dataType.isInstanceOf[StructType]) {
-      throw QueryCompilationErrors.schemaIsNotStructTypeError(dataType)
+      throw QueryCompilationErrors.schemaIsNotStructTypeError(exp, dataType)
     }
     dataType.asInstanceOf[StructType]
   }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index fc9a08104b4..2e20d7aec8d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -1010,10 +1010,13 @@ private[sql] object QueryCompilationErrors extends 
QueryErrorsBase {
       messageParameters = Map("inputSchema" -> toSQLExpr(exp)))
   }
 
-  def schemaIsNotStructTypeError(dataType: DataType): Throwable = {
+  def schemaIsNotStructTypeError(exp: Expression, dataType: DataType): 
Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1094",
-      messageParameters = Map("dataType" -> dataType.toString))
+      errorClass = "INVALID_SCHEMA.NON_STRUCT_TYPE",
+      messageParameters = Map(
+        "inputSchema" -> toSQLExpr(exp),
+        "dataType" -> toSQLType(dataType)
+      ))
   }
 
   def keyValueInMapNotStringError(m: CreateMap): Throwable = {
diff --git a/sql/core/src/test/resources/sql-tests/inputs/csv-functions.sql 
b/sql/core/src/test/resources/sql-tests/inputs/csv-functions.sql
index a1a4bc9de3f..01d436534a1 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/csv-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/csv-functions.sql
@@ -4,6 +4,7 @@ select from_csv('26/08/2015', 'time Timestamp', 
map('timestampFormat', 'dd/MM/yy
 -- Check if errors handled
 select from_csv('1', 1);
 select from_csv('1', 'a InvalidType');
+select from_csv('1', 'Array<int>');
 select from_csv('1', 'a INT', named_struct('mode', 'PERMISSIVE'));
 select from_csv('1', 'a INT', map('mode', 1));
 select from_csv();
diff --git 
a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
index e5e70714a1b..58c0a7b9f3c 100644
--- a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
@@ -59,6 +59,28 @@ org.apache.spark.sql.AnalysisException
 }
 
 
+-- !query
+select from_csv('1', 'Array<int>')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INVALID_SCHEMA.NON_STRUCT_TYPE",
+  "messageParameters" : {
+    "dataType" : "\"ARRAY<INT>\"",
+    "inputSchema" : "\"Array<int>\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "from_csv('1', 'Array<int>')"
+  } ]
+}
+
+
 -- !query
 select from_csv('1', 'a INT', named_struct('mode', 'PERMISSIVE'))
 -- !query schema
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
index ab4c148da04..fcdc40404e7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
@@ -43,6 +43,35 @@ class CsvFunctionsSuite extends QueryTest with 
SharedSparkSession {
       Row(Row(1)) :: Nil)
   }
 
+  test("from_csv with non struct schema") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        Seq("1").toDS().select(from_csv($"value", lit("ARRAY<int>"), 
Map[String, String]().asJava))
+      },
+      errorClass = "INVALID_SCHEMA.NON_STRUCT_TYPE",
+      parameters = Map(
+        "inputSchema" -> "\"ARRAY<int>\"",
+        "dataType" -> "\"ARRAY<INT>\""
+      )
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        Seq("1").toDF("csv").selectExpr(s"from_csv(csv, 'ARRAY<int>')")
+      },
+      errorClass = "INVALID_SCHEMA.NON_STRUCT_TYPE",
+      parameters = Map(
+        "inputSchema" -> "\"ARRAY<int>\"",
+        "dataType" -> "\"ARRAY<INT>\""
+      ),
+      context = ExpectedContext(
+        fragment = "from_csv(csv, 'ARRAY<int>')",
+        start = 0,
+        stop = 26
+      )
+    )
+  }
+
   test("from_csv with option (timestampFormat)") {
     val df = Seq("26/08/2015 18:00").toDS()
     val schema = new StructType().add("time", TimestampType)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to