This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 78f7c30e140f [SPARK-42328][SQL] Remove _LEGACY_ERROR_TEMP_1175 from error classes 78f7c30e140f is described below commit 78f7c30e140fd8cf4a80b783dd7e9ee4d1b4d7e2 Author: Nikola Mandic <nikola.man...@databricks.com> AuthorDate: Thu Feb 22 12:09:02 2024 +0300 [SPARK-42328][SQL] Remove _LEGACY_ERROR_TEMP_1175 from error classes ### What changes were proposed in this pull request? Only occurrence of `_LEGACY_ERROR_TEMP_1175` appears under conversion from Spark data types to Parquet. All supported documented [Spark data types](https://spark.apache.org/docs/latest/sql-ref-datatypes.html) are covered in the [conversion function](https://github.com/apache/spark/blob/3e0808c33f185c13808ce2d547ce9ba0057d31a6/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala#L517-L745) (`VarcharType` and `CharType` are not present [...] Convert the error class to `INTERNAL_ERROR`. ### Why are the changes needed? Remove legacy error classes as part of activity in [SPARK-37935](https://issues.apache.org/jira/browse/SPARK-37935). ### Does this PR introduce _any_ user-facing change? If the Spark works correctly, user shouldn't be able to run into `INTERNAL_ERROR` by using the public API. ### How was this patch tested? Added test to `QueryCompilationErrorsSuite` and tested with sbt: ``` project sql testOnly *QueryCompilationErrorsSuite ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #45183 from nikolamand-db/nikolamand-db/SPARK-42328. Authored-by: Nikola Mandic <nikola.man...@databricks.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../utils/src/main/resources/error/error-classes.json | 5 ----- .../spark/sql/errors/QueryCompilationErrors.scala | 5 +++-- .../sql/errors/QueryCompilationErrorsSuite.scala | 19 +++++++++++++++++++ 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index c6149ce35a43..d4bb4920db88 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -5118,11 +5118,6 @@ "Unrecognized Parquet type: <field>." ] }, - "_LEGACY_ERROR_TEMP_1175" : { - "message" : [ - "Unsupported data type <dataType>." - ] - }, "_LEGACY_ERROR_TEMP_1181" : { "message" : [ "Stream-stream join without equality predicate is not supported." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 53338f38ed6d..e96474862b1d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -1908,8 +1908,9 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat def cannotConvertDataTypeToParquetTypeError(field: StructField): Throwable = { new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1175", - messageParameters = Map("dataType" -> field.dataType.catalogString)) + errorClass = "INTERNAL_ERROR", + messageParameters = Map("message" -> + s"Cannot convert Spark data type ${toSQLType(field.dataType)} to any Parquet type.")) } def incompatibleViewSchemaChangeError( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala index e22399c326f6..d4e4a41155ea 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala @@ -24,6 +24,7 @@ import org.apache.spark.sql._ import org.apache.spark.sql.api.java.{UDF1, UDF2, UDF23Test} import org.apache.spark.sql.catalyst.expressions.{Coalesce, Literal, UnsafeRow} import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.execution.datasources.parquet.SparkToParquetSchemaConverter import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog import org.apache.spark.sql.expressions.SparkUserDefinedFunction import org.apache.spark.sql.functions._ @@ -962,6 +963,24 @@ class QueryCompilationErrorsSuite "methodName" -> "update", "className" -> "org.apache.spark.sql.catalyst.expressions.UnsafeRow")) } + + test("INTERNAL_ERROR: Convert unsupported data type from Spark to Parquet") { + val converter = new SparkToParquetSchemaConverter + val dummyDataType = new DataType { + override def defaultSize: Int = 0 + + override def simpleString: String = "Dummy" + + override private[spark] def asNullable = NullType + } + checkError( + exception = intercept[AnalysisException] { + converter.convertField(StructField("test", dummyDataType)) + }, + errorClass = "INTERNAL_ERROR", + parameters = Map("message" -> "Cannot convert Spark data type \"DUMMY\" to any Parquet type.") + ) + } } class MyCastToString extends SparkUserDefinedFunction( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org