This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new 159c7ab0d96 [SPARK-42301][SQL] Assign name to _LEGACY_ERROR_TEMP_1129 159c7ab0d96 is described below commit 159c7ab0d964ec94f8ebffd315b77295478fec29 Author: itholic <haejoon....@databricks.com> AuthorDate: Wed Feb 8 06:36:02 2023 +0500 [SPARK-42301][SQL] Assign name to _LEGACY_ERROR_TEMP_1129 ### What changes were proposed in this pull request? This PR proposes to assign name to _LEGACY_ERROR_TEMP_1129, "UNABLE_TO_INFER_SCHEMA". ### Why are the changes needed? We should assign proper name to _LEGACY_ERROR_TEMP_* ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? `./build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite*"` Closes #39871 from itholic/LEGACY_1129. Authored-by: itholic <haejoon....@databricks.com> Signed-off-by: Max Gekk <max.g...@gmail.com> (cherry picked from commit 05ea27e0b0ad43103f2bf2b1a9b499211492a0fe) Signed-off-by: Max Gekk <max.g...@gmail.com> --- R/pkg/tests/fulltests/test_sparkSQL.R | 3 +-- core/src/main/resources/error/error-classes.json | 11 ++++++----- .../apache/spark/sql/errors/QueryCompilationErrors.scala | 2 +- .../org/apache/spark/sql/execution/command/DDLSuite.scala | 7 +++++-- .../spark/sql/execution/datasources/orc/OrcQuerySuite.scala | 11 +++++++---- .../apache/spark/sql/streaming/FileStreamSourceSuite.scala | 13 ++++++++----- .../apache/spark/sql/test/DataFrameReaderWriterSuite.scala | 11 +++++++---- .../org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala | 12 +++++++----- 8 files changed, 42 insertions(+), 28 deletions(-) diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R index e5408840e72..bec184750e9 100644 --- a/R/pkg/tests/fulltests/test_sparkSQL.R +++ b/R/pkg/tests/fulltests/test_sparkSQL.R @@ -4014,8 +4014,7 @@ test_that("Call DataFrameWriter.load() API in Java without path and check argume # It makes sure that we can omit path argument in read.df API and then it calls # DataFrameWriter.load() without path. expect_error(read.df(source = "json"), - paste("Error in load : analysis error - Unable to infer schema for JSON.", - "It must be specified manually")) + "Error in load : analysis error - \\[UNABLE_TO_INFER_SCHEMA\\].*") expect_error(read.df("arbitrary_path"), "Error in load : analysis error - \\[PATH_NOT_FOUND\\].*") expect_error(read.json("arbitrary_path"), diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json index ea2f28a7fc4..8a4afd3133b 100644 --- a/core/src/main/resources/error/error-classes.json +++ b/core/src/main/resources/error/error-classes.json @@ -1388,6 +1388,12 @@ "Unable to convert SQL type <toType> to Protobuf type <protobufType>." ] }, + "UNABLE_TO_INFER_SCHEMA" : { + "message" : [ + "Unable to infer schema for <format>. It must be specified manually." + ], + "sqlState" : "42KD9" + }, "UNBOUND_SQL_PARAMETER" : { "message" : [ "Found the unbound parameter: <name>. Please, fix `args` and provide a mapping of the parameter to a SQL literal." @@ -2620,11 +2626,6 @@ "Failed to resolve the schema for <format> for the partition column: <partitionColumn>. It must be specified manually." ] }, - "_LEGACY_ERROR_TEMP_1129" : { - "message" : [ - "Unable to infer schema for <format>. It must be specified manually." - ] - }, "_LEGACY_ERROR_TEMP_1131" : { "message" : [ "Data source <className> does not support <outputMode> output mode." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 85444060d3b..9da7c7bccd7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -1394,7 +1394,7 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { def dataSchemaNotSpecifiedError(format: String): Throwable = { new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1129", + errorClass = "UNABLE_TO_INFER_SCHEMA", messageParameters = Map("format" -> format)) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index b1398f17e9e..3d88d4f7ab9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -1174,8 +1174,11 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase { withTable("tab1", "tab2") { (("a", "b") :: Nil).toDF().write.json(tempDir.getCanonicalPath) - val e = intercept[AnalysisException] { sql("CREATE TABLE tab1 USING json") }.getMessage - assert(e.contains("Unable to infer schema for JSON. It must be specified manually")) + checkError( + exception = intercept[AnalysisException] { sql("CREATE TABLE tab1 USING json") }, + errorClass = "UNABLE_TO_INFER_SCHEMA", + parameters = Map("format" -> "JSON") + ) sql(s"CREATE TABLE tab2 using json location '${tempDir.toURI}'") checkAnswer(spark.table("tab2"), Row("a", "b")) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala index 99cf6496512..6757747a81a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala @@ -590,10 +590,13 @@ abstract class OrcQueryTest extends OrcTest { withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> "true") { testIgnoreCorruptFiles() testIgnoreCorruptFilesWithoutSchemaInfer() - val m1 = intercept[AnalysisException] { - testAllCorruptFiles() - }.getMessage - assert(m1.contains("Unable to infer schema for ORC")) + checkError( + exception = intercept[AnalysisException] { + testAllCorruptFiles() + }, + errorClass = "UNABLE_TO_INFER_SCHEMA", + parameters = Map("format" -> "ORC") + ) testAllCorruptFilesWithoutSchemaInfer() } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala index a8a4df2ad04..e5229c5f253 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala @@ -411,11 +411,14 @@ class FileStreamSourceSuite extends FileStreamSourceTest { withTempDir { src => withSQLConf(SQLConf.STREAMING_SCHEMA_INFERENCE.key -> "true") { - val e = intercept[AnalysisException] { - createFileStreamSourceAndGetSchema( - format = Some("json"), path = Some(src.getCanonicalPath), schema = None) - } - assert("Unable to infer schema for JSON. It must be specified manually." === e.getMessage) + checkError( + exception = intercept[AnalysisException] { + createFileStreamSourceAndGetSchema( + format = Some("json"), path = Some(src.getCanonicalPath), schema = None) + }, + errorClass = "UNABLE_TO_INFER_SCHEMA", + parameters = Map("format" -> "JSON") + ) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala index 17a003dfe8f..a9836d281f0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala @@ -629,10 +629,13 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with val schema = df.schema // Reader, without user specified schema - val message = intercept[AnalysisException] { - testRead(spark.read.csv(), Seq.empty, schema) - }.getMessage - assert(message.contains("Unable to infer schema for CSV. It must be specified manually.")) + checkError( + exception = intercept[AnalysisException] { + testRead(spark.read.csv(), Seq.empty, schema) + }, + errorClass = "UNABLE_TO_INFER_SCHEMA", + parameters = Map("format" -> "CSV") + ) testRead(spark.read.csv(dir), data, schema) testRead(spark.read.csv(dir, dir), data ++ data, schema) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala index 2bd5c21ee8a..e52d9b639dc 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala @@ -54,11 +54,13 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton { val zeroPath = new Path(path, "zero.orc") zeroPath.getFileSystem(spark.sessionState.newHadoopConf()).create(zeroPath) - val errorMessage = intercept[AnalysisException] { - spark.read.orc(path) - }.getMessage - - assert(errorMessage.contains("Unable to infer schema for ORC")) + checkError( + exception = intercept[AnalysisException] { + spark.read.orc(path) + }, + errorClass = "UNABLE_TO_INFER_SCHEMA", + parameters = Map("format" -> "ORC") + ) val singleRowDF = Seq((0, "foo")).toDF("key", "value").coalesce(1) singleRowDF.createOrReplaceTempView("single") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org