This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new bdb4d5e4da5 [SPARK-41180][SQL] Reuse `INVALID_SCHEMA` instead of 
`_LEGACY_ERROR_TEMP_1227`
bdb4d5e4da5 is described below

commit bdb4d5e4da558775df2be712dd8760d5f5f14747
Author: yangjie01 <yangji...@baidu.com>
AuthorDate: Mon Nov 28 20:26:27 2022 +0300

    [SPARK-41180][SQL] Reuse `INVALID_SCHEMA` instead of 
`_LEGACY_ERROR_TEMP_1227`
    
    ### What changes were proposed in this pull request?
    This pr aims rename `_LEGACY_ERROR_TEMP_1227` to `INVALID_SCHEMA`
    
    ### Why are the changes needed?
    Proper names of error classes to improve user experience with Spark SQL.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Pass GitHub Actions
    
    Closes #38754 from LuciferYang/SPARK-41180.
    
    Lead-authored-by: yangjie01 <yangji...@baidu.com>
    Co-authored-by: YangJie <yangji...@baidu.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 core/src/main/resources/error/error-classes.json   | 23 ++++---
 project/MimaExcludes.scala                         |  5 +-
 .../spark/sql/catalyst/expressions/ExprUtils.scala |  2 +-
 .../spark/sql/errors/QueryCompilationErrors.scala  | 23 ++++---
 .../apache/spark/sql/errors/QueryErrorsBase.scala  |  4 ++
 .../org/apache/spark/sql/types/DataType.scala      | 13 ++--
 .../scala/org/apache/spark/sql/functions.scala     |  1 -
 .../sql-tests/results/csv-functions.sql.out        | 12 ++--
 .../sql-tests/results/json-functions.sql.out       | 12 ++--
 .../org/apache/spark/sql/CsvFunctionsSuite.scala   |  4 +-
 .../apache/spark/sql/DataFrameFunctionsSuite.scala |  4 +-
 .../org/apache/spark/sql/JsonFunctionsSuite.scala  | 73 ++++++++++++++++------
 12 files changed, 115 insertions(+), 61 deletions(-)

diff --git a/core/src/main/resources/error/error-classes.json 
b/core/src/main/resources/error/error-classes.json
index 9f4337d0618..89728777201 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -780,8 +780,21 @@
   },
   "INVALID_SCHEMA" : {
     "message" : [
-      "The expression <expr> is not a valid schema string."
-    ]
+      "The input schema <inputSchema> is not a valid schema string."
+    ],
+    "subClass" : {
+      "NON_STRING_LITERAL" : {
+        "message" : [
+          "The input expression must be string literal and not null."
+        ]
+      },
+      "PARSE_ERROR" : {
+        "message" : [
+          "Cannot parse the schema:",
+          "<reason>"
+        ]
+      }
+    }
   },
   "INVALID_SQL_SYNTAX" : {
     "message" : [
@@ -2844,12 +2857,6 @@
       "The SQL config '<configName>' was removed in the version <version>. 
<comment>"
     ]
   },
-  "_LEGACY_ERROR_TEMP_1227" : {
-    "message" : [
-      "<msg><e1>",
-      "Failed fallback parsing: <e2>"
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1228" : {
     "message" : [
       "Decimal scale (<scale>) cannot be greater than precision (<precision>)."
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index d8f87a504fa..eed79d1f204 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -120,7 +120,10 @@ object MimaExcludes {
     
ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleBlockFetcherIterator#FetchRequest.apply"),
 
     // [SPARK-41072][SS] Add the error class STREAM_FAILED to 
StreamingQueryException
-    
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQueryException.this")
+    
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQueryException.this"),
+
+    // [SPARK-41180][SQL] Reuse INVALID_SCHEMA instead of 
_LEGACY_ERROR_TEMP_1227
+    
ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.types.DataType.parseTypeWithFallback")
   )
 
   // Defulat exclude rules
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
index 3e10b820aa6..e9084442b22 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
@@ -35,7 +35,7 @@ object ExprUtils extends QueryErrorsBase {
         case s: UTF8String if s != null =>
           val dataType = DataType.fromDDL(s.toString)
           CharVarcharUtils.failIfHasCharVarchar(dataType)
-        case _ => throw QueryCompilationErrors.invalidSchemaStringError(exp)
+        case _ => throw QueryCompilationErrors.unexpectedSchemaTypeError(exp)
 
       }
     } else {
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 486bd21b844..ce99bf4aa47 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -993,10 +993,20 @@ private[sql] object QueryCompilationErrors extends 
QueryErrorsBase {
       messageParameters = Map("key" -> key, "details" -> details))
   }
 
-  def invalidSchemaStringError(exp: Expression): Throwable = {
+  def schemaFailToParseError(schema: String, e: Throwable): Throwable = {
     new AnalysisException(
-      errorClass = "INVALID_SCHEMA",
-      messageParameters = Map("expr" -> toSQLExpr(exp)))
+      errorClass = "INVALID_SCHEMA.PARSE_ERROR",
+      messageParameters = Map(
+        "inputSchema" -> toSQLSchema(schema),
+        "reason" -> e.getMessage
+      ),
+      cause = Some(e))
+  }
+
+  def unexpectedSchemaTypeError(exp: Expression): Throwable = {
+    new AnalysisException(
+      errorClass = "INVALID_SCHEMA.NON_STRING_LITERAL",
+      messageParameters = Map("inputSchema" -> toSQLExpr(exp)))
   }
 
   def schemaNotFoldableError(exp: Expression): Throwable = {
@@ -2229,13 +2239,6 @@ private[sql] object QueryCompilationErrors extends 
QueryErrorsBase {
         "comment" -> comment))
   }
 
-  def failedFallbackParsingError(msg: String, e1: Throwable, e2: Throwable): 
Throwable = {
-    new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1227",
-      messageParameters = Map("msg" -> msg, "e1" -> e1.getMessage, "e2" -> 
e2.getMessage),
-      cause = Some(e1.getCause))
-  }
-
   def decimalCannotGreaterThanPrecisionError(scale: Int, precision: Int): 
Throwable = {
     new AnalysisException(
       errorClass = "_LEGACY_ERROR_TEMP_1228",
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala
index 560c7d9beb9..5460de77a14 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala
@@ -104,6 +104,10 @@ private[sql] trait QueryErrorsBase {
     quoteByDefault(toPrettySQL(e))
   }
 
+  def toSQLSchema(schema: String): String = {
+    quoteByDefault(schema)
+  }
+
   def getSummary(sqlContext: SQLQueryContext): String = {
     if (sqlContext == null) "" else sqlContext.summary
   }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
index ef7f1553be9..ecd1cc77515 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -27,6 +27,7 @@ import org.json4s.JsonAST.JValue
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
+import org.apache.spark.SparkThrowable
 import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.expressions.{Cast, Expression}
@@ -133,7 +134,6 @@ object DataType {
     parseTypeWithFallback(
       ddl,
       CatalystSqlParser.parseDataType,
-      "Cannot parse the data type: ",
       fallbackParser = str => CatalystSqlParser.parseTableSchema(str))
   }
 
@@ -144,24 +144,25 @@ object DataType {
    *
    * @param schema The schema string to parse by `parser` or `fallbackParser`.
    * @param parser The function that should be invoke firstly.
-   * @param errorMsg The error message for `parser`.
    * @param fallbackParser The function that is called when `parser` fails.
    * @return The data type parsed from the `schema` schema.
    */
   def parseTypeWithFallback(
       schema: String,
       parser: String => DataType,
-      errorMsg: String,
       fallbackParser: String => DataType): DataType = {
     try {
       parser(schema)
     } catch {
-      case NonFatal(e1) =>
+      case NonFatal(e) =>
         try {
           fallbackParser(schema)
         } catch {
-          case NonFatal(e2) =>
-            throw QueryCompilationErrors.failedFallbackParsingError(errorMsg, 
e1, e2)
+          case NonFatal(_) =>
+            if (e.isInstanceOf[SparkThrowable]) {
+              throw e
+            }
+            throw QueryCompilationErrors.schemaFailToParseError(schema, e)
         }
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index f38f24920fa..dc47e358341 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -4563,7 +4563,6 @@ object functions {
     val dataType = parseTypeWithFallback(
       schema,
       DataType.fromJson,
-      "Cannot parse the schema in JSON format: ",
       fallbackParser = DataType.fromDDL)
     from_json(e, dataType, options)
   }
diff --git 
a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
index 200ddd837e1..bddd8ccc37c 100644
--- a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
@@ -22,9 +22,9 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "INVALID_SCHEMA",
+  "errorClass" : "INVALID_SCHEMA.NON_STRING_LITERAL",
   "messageParameters" : {
-    "expr" : "\"1\""
+    "inputSchema" : "\"1\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -43,11 +43,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_1227",
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42000",
   "messageParameters" : {
-    "e1" : "\n[PARSE_SYNTAX_ERROR] Syntax error at or near 'InvalidType': 
extra input 'InvalidType'(line 1, pos 2)\n\n== SQL ==\na InvalidType\n--^^^\n",
-    "e2" : "\nDataType invalidtype is not supported.(line 1, pos 2)\n\n== SQL 
==\na InvalidType\n--^^^\n",
-    "msg" : "Cannot parse the data type: "
+    "error" : "'InvalidType'",
+    "hint" : ": extra input 'InvalidType'"
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git 
a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out 
b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
index a9c4dd0b9fd..c8c8bee3925 100644
--- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
@@ -148,9 +148,9 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "INVALID_SCHEMA",
+  "errorClass" : "INVALID_SCHEMA.NON_STRING_LITERAL",
   "messageParameters" : {
-    "expr" : "\"1\""
+    "inputSchema" : "\"1\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -169,11 +169,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_1227",
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42000",
   "messageParameters" : {
-    "e1" : "\n[PARSE_SYNTAX_ERROR] Syntax error at or near 'InvalidType': 
extra input 'InvalidType'(line 1, pos 2)\n\n== SQL ==\na InvalidType\n--^^^\n",
-    "e2" : "\nDataType invalidtype is not supported.(line 1, pos 2)\n\n== SQL 
==\na InvalidType\n--^^^\n",
-    "msg" : "Cannot parse the data type: "
+    "error" : "'InvalidType'",
+    "hint" : ": extra input 'InvalidType'"
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
index 2a3058d9395..940eaaed6ac 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
@@ -367,8 +367,8 @@ class CsvFunctionsSuite extends QueryTest with 
SharedSparkSession {
       exception = intercept[AnalysisException] {
         Seq("1").toDF("csv").select(from_csv($"csv", lit(1), 
options)).collect()
       },
-      errorClass = "INVALID_SCHEMA",
-      parameters = Map("expr" -> "\"1\"")
+      errorClass = "INVALID_SCHEMA.NON_STRING_LITERAL",
+      parameters = Map("inputSchema" -> "\"1\"")
     )
   }
 
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index e9455ea51c1..2b747735f18 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -5226,9 +5226,9 @@ class DataFrameFunctionsSuite extends QueryTest with 
SharedSparkSession {
       exception = intercept[AnalysisException] {
         sql("select from_json('{\"a\":1}', 1)")
       },
-      errorClass = "INVALID_SCHEMA",
+      errorClass = "INVALID_SCHEMA.NON_STRING_LITERAL",
       parameters = Map(
-        "expr" -> "\"1\""
+        "inputSchema" -> "\"1\""
       ),
       context = ExpectedContext(
         fragment = "from_json('{\"a\":1}', 1)",
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 56bdefc98ba..800a602425b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -443,8 +443,8 @@ class JsonFunctionsSuite extends QueryTest with 
SharedSparkSession {
       exception = intercept[AnalysisException] {
         df3.selectExpr("from_json(value, 1)")
       },
-      errorClass = "INVALID_SCHEMA",
-      parameters = Map("expr" -> "\"1\""),
+      errorClass = "INVALID_SCHEMA.NON_STRING_LITERAL",
+      parameters = Map("inputSchema" -> "\"1\""),
       context = ExpectedContext(
         fragment = "from_json(value, 1)",
         start = 0,
@@ -452,10 +452,22 @@ class JsonFunctionsSuite extends QueryTest with 
SharedSparkSession {
       )
     )
 
-    val errMsg2 = intercept[AnalysisException] {
-      df3.selectExpr("""from_json(value, 'time InvalidType')""")
-    }
-    assert(errMsg2.getMessage.contains("DataType invalidtype is not 
supported"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df3.selectExpr("""from_json(value, 'time InvalidType')""")
+      },
+      errorClass = "PARSE_SYNTAX_ERROR",
+      sqlState = "42000",
+      parameters = Map(
+        "error" -> "'InvalidType'",
+        "hint" -> ": extra input 'InvalidType'"
+      ),
+      context = ExpectedContext(
+        fragment = "from_json(value, 'time InvalidType')",
+        start = 0,
+        stop = 35
+      )
+    )
     checkError(
       exception = intercept[AnalysisException] {
         df3.selectExpr("from_json(value, 'time Timestamp', named_struct('a', 
1))")
@@ -991,22 +1003,47 @@ class JsonFunctionsSuite extends QueryTest with 
SharedSparkSession {
   test("SPARK-33286: from_json - combined error messages") {
     val df = Seq("""{"a":1}""").toDF("json")
     val invalidJsonSchema = """{"fields": [{"a":123}], "type": "struct"}"""
-    val errMsg1 = intercept[AnalysisException] {
-      df.select(from_json($"json", invalidJsonSchema, Map.empty[String, 
String])).collect()
-    }.getMessage
-    assert(errMsg1.contains("""Failed to convert the JSON string '{"a":123}' 
to a field"""))
+    val invalidJsonSchemaReason = "Failed to convert the JSON string 
'{\"a\":123}' to a field."
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(from_json($"json", invalidJsonSchema, Map.empty[String, 
String])).collect()
+      },
+      errorClass = "INVALID_SCHEMA.PARSE_ERROR",
+      parameters = Map(
+        "inputSchema" -> "\"{\"fields\": [{\"a\":123}], \"type\": 
\"struct\"}\"",
+        "reason" -> invalidJsonSchemaReason
+      )
+    )
 
     val invalidDataType = "MAP<INT, cow>"
-    val errMsg2 = intercept[AnalysisException] {
-      df.select(from_json($"json", invalidDataType, Map.empty[String, 
String])).collect()
-    }.getMessage
-    assert(errMsg2.contains("DataType cow is not supported"))
+    val invalidDataTypeReason = "Unrecognized token 'MAP': " +
+      "was expecting (JSON String, Number, Array, Object or token 'null', 
'true' or 'false')\n " +
+      "at [Source: (String)\"MAP<INT, cow>\"; line: 1, column: 4]"
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(from_json($"json", invalidDataType, Map.empty[String, 
String])).collect()
+      },
+      errorClass = "INVALID_SCHEMA.PARSE_ERROR",
+      parameters = Map(
+        "inputSchema" -> "\"MAP<INT, cow>\"",
+        "reason" -> invalidDataTypeReason
+      )
+    )
 
     val invalidTableSchema = "x INT, a cow"
-    val errMsg3 = intercept[AnalysisException] {
-      df.select(from_json($"json", invalidTableSchema, Map.empty[String, 
String])).collect()
-    }.getMessage
-    assert(errMsg3.contains("DataType cow is not supported"))
+    val invalidTableSchemaReason = "Unrecognized token 'x': " +
+      "was expecting (JSON String, Number, Array, Object or token 'null', 
'true' or 'false')\n" +
+      " at [Source: (String)\"x INT, a cow\"; line: 1, column: 2]"
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(from_json($"json", invalidTableSchema, Map.empty[String, 
String])).collect()
+      },
+      errorClass = "INVALID_SCHEMA.PARSE_ERROR",
+      parameters = Map(
+        "inputSchema" -> "\"x INT, a cow\"",
+        "reason" -> invalidTableSchemaReason
+      )
+    )
   }
 
   test("SPARK-33907: bad json input with json pruning optimization: 
GetStructField") {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to