This is an automated email from the ASF dual-hosted git repository. yao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 2f7a9a1e26f [SPARK-44520][SQL] Replace the term UNSUPPORTED_DATA_SOURCE_FOR_DIRECT_QUERY with UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY and disclosure root AE 2f7a9a1e26f is described below commit 2f7a9a1e26f46484a6bda4cf65f5119fdea3ba4c Author: Kent Yao <y...@apache.org> AuthorDate: Tue Jul 25 09:39:19 2023 +0800 [SPARK-44520][SQL] Replace the term UNSUPPORTED_DATA_SOURCE_FOR_DIRECT_QUERY with UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY and disclosure root AE ### What changes were proposed in this pull request? 1. UNSUPPORTED_DATA_SOURCE_FOR_DIRECT_QUERY is duplicated with UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY. This PR uses the shorter one. 2. Bugfix for hiding root AE from UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY ### Why are the changes needed? UNSUPPORTED_DATA_SOURCE_FOR_DIRECT_QUERY is duplicated. Bugfix for hiding root AE from UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY For example, ```scala select id from parquet.`abc`; [UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY] Unsupported data source type for direct query on files: parquet; line 1 pos 15 org.apache.spark.sql.AnalysisException: [UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY] Unsupported data source type for direct query on files: parquet; line 1 pos 15 ``` ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? existing tests Closes #42124 from yaooqinn/SPARK-44520. Authored-by: Kent Yao <y...@apache.org> Signed-off-by: Kent Yao <y...@apache.org> --- common/utils/src/main/resources/error/error-classes.json | 5 ----- docs/sql-error-conditions.md | 6 ------ .../apache/spark/sql/errors/QueryCompilationErrors.scala | 6 ------ .../org/apache/spark/sql/execution/datasources/rules.scala | 14 ++++++++------ .../test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 10 ++++------ .../apache/spark/sql/hive/execution/SQLQuerySuite.scala | 11 +++++++++++ 6 files changed, 23 insertions(+), 29 deletions(-) diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index 73b1cff7c4e..75300404194 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -2765,11 +2765,6 @@ ], "sqlState" : "0A000" }, - "UNSUPPORTED_DATA_SOURCE_FOR_DIRECT_QUERY" : { - "message" : [ - "The direct query on files does not support the data source type: <className>. Please try a different data source type or consider using a different query method." - ] - }, "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE" : { "message" : [ "The <format> datasource doesn't support the column <columnName> of the type <columnType>." diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md index 12238b6724b..06485193b9a 100644 --- a/docs/sql-error-conditions.md +++ b/docs/sql-error-conditions.md @@ -1900,12 +1900,6 @@ Unsupported data source type for direct query on files: `<dataSourceType>` Unsupported data type `<typeName>`. -### UNSUPPORTED_DATA_SOURCE_FOR_DIRECT_QUERY - -SQLSTATE: none assigned - -The direct query on files does not support the data source type: `<className>`. Please try a different data source type or consider using a different query method. - ### UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE SQLSTATE: none assigned diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 8120eb9b57e..03b0a72bba5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -1688,12 +1688,6 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { "tableSchema" -> tableSchema.toString)) } - def unsupportedDataSourceTypeForDirectQueryOnFilesError(className: String): Throwable = { - new AnalysisException( - errorClass = "UNSUPPORTED_DATA_SOURCE_FOR_DIRECT_QUERY", - messageParameters = Map("className" -> className)) - } - def saveDataIntoViewNotAllowedError(): Throwable = { new AnalysisException( errorClass = "_LEGACY_ERROR_TEMP_1158", diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala index 3f235e10c81..4cbd54e6d20 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala @@ -45,15 +45,17 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] { conf.runSQLonFile && u.multipartIdentifier.size == 2 } - private def resolveDataSource(ident: Seq[String]): DataSource = { + private def resolveDataSource(unresolved: UnresolvedRelation): DataSource = { + val ident = unresolved.multipartIdentifier val dataSource = DataSource(sparkSession, paths = Seq(ident.last), className = ident.head) // `dataSource.providingClass` may throw ClassNotFoundException, the caller side will try-catch // it and return the original plan, so that the analyzer can report table not found later. val isFileFormat = classOf[FileFormat].isAssignableFrom(dataSource.providingClass) if (!isFileFormat || dataSource.className.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) { - throw QueryCompilationErrors.unsupportedDataSourceTypeForDirectQueryOnFilesError( - dataSource.className) + unresolved.failAnalysis( + errorClass = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY", + messageParameters = Map("dataSourceType" -> ident.head)) } dataSource } @@ -65,7 +67,7 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] { // fail to time travel. Otherwise, this is some other catalog table that isn't resolved yet, // so we should leave it be for now. try { - resolveDataSource(u.multipartIdentifier) + resolveDataSource(u) throw QueryCompilationErrors.timeTravelUnsupportedError(toSQLId(u.multipartIdentifier)) } catch { case _: ClassNotFoundException => r @@ -73,11 +75,11 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] { case u: UnresolvedRelation if maybeSQLFile(u) => try { - val ds = resolveDataSource(u.multipartIdentifier) + val ds = resolveDataSource(u) LogicalRelation(ds.resolveRelation()) } catch { case _: ClassNotFoundException => u - case e: Exception => + case e: Exception if !e.isInstanceOf[AnalysisException] => // the provider is valid, but failed to create a logical plan u.failAnalysis( errorClass = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY", diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 7ad27f05a58..0e08fed12ba 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -1647,18 +1647,16 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark exception = intercept[AnalysisException] { sql("select * from json.invalid_file") }, - errorClass = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY", - parameters = Map("dataSourceType" -> "json"), - context = ExpectedContext("json.invalid_file", 14, 30) + errorClass = "PATH_NOT_FOUND", + parameters = Map("path" -> "file:/.*invalid_file"), + matchPVals = true ) checkError( exception = intercept[AnalysisException] { sql(s"select id from `org.apache.spark.sql.hive.orc`.`file_path`") }, - errorClass = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY", - parameters = Map("dataSourceType" -> "org.apache.spark.sql.hive.orc"), - context = ExpectedContext("`org.apache.spark.sql.hive.orc`.`file_path`", 15, 57) + errorClass = "_LEGACY_ERROR_TEMP_1138" ) e = intercept[AnalysisException] { diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala index e93576761c8..9308d1eda14 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala @@ -1354,6 +1354,17 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi }) } + test("SPARK-44520: invalid path for support direct query shall throw correct exception") { + checkError( + exception = intercept[AnalysisException] { + sql(s"select id from parquet.`invalid_path`") + }, + errorClass = "PATH_NOT_FOUND", + parameters = Map("path" -> "file.*invalid_path"), + matchPVals = true + ) + } + test("run sql directly on files - orc") { val df = spark.range(100).toDF() withTempPath(f => { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org