This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new c162f6df1b3d [SPARK-46144][SQL] Fail INSERT INTO ... REPLACE statement if the condition contains subquery c162f6df1b3d is described below commit c162f6df1b3d6ccc2944b6fb6db033482c9f01ee Author: Gengliang Wang <gengli...@apache.org> AuthorDate: Wed Nov 29 21:19:58 2023 -0800 [SPARK-46144][SQL] Fail INSERT INTO ... REPLACE statement if the condition contains subquery ### What changes were proposed in this pull request? For the following query: ``` INSERT INTO tbl REPLACE WHERE id = (select c2 from values(1) as t(c2)) SELECT * FROM source ``` There will be an analysis error: ``` [UNRESOLVED_COLUMN.WITHOUT_SUGGESTION] A column, variable, or function parameter with name `c2` cannot be resolved. SQLSTATE: 42703; line 1 pos 51; 'OverwriteByExpression RelationV2[id#27L, data#28] testcat.tbl testcat.tbl, (id#27L = scalar-subquery#26 []), false ``` The error message is confusing. The actual reason is the OverwriteByExpression plan doesn't support subqueries. While supporting the feature is non-trivial, this PR is to improve the error message as ``` [UNSUPPORTED_FEATURE.OVERWRITE_BY_SUBQUERY] The feature is not supported: INSERT OVERWRITE with a subquery condition. SQLSTATE: 0A000; line 1 pos 43; ``` ### Why are the changes needed? Error message improvement ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? New UT ### Was this patch authored or co-authored using generative AI tooling? No Closes #44060 from gengliangwang/replace. Authored-by: Gengliang Wang <gengli...@apache.org> Signed-off-by: Gengliang Wang <gengli...@apache.org> --- .../src/main/resources/error/error-classes.json | 5 +++++ ...r-conditions-unsupported-feature-error-class.md | 4 ++++ .../sql/catalyst/analysis/CheckAnalysis.scala | 5 +++++ .../spark/sql/connector/DataSourceV2SQLSuite.scala | 26 ++++++++++++++++++++++ 4 files changed, 40 insertions(+) diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index 5b70edf249d1..9e0019b34728 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -3529,6 +3529,11 @@ "Unable to convert <orcType> of Orc to data type <toType>." ] }, + "OVERWRITE_BY_SUBQUERY" : { + "message" : [ + "INSERT OVERWRITE with a subquery condition." + ] + }, "PANDAS_UDAF_IN_PIVOT" : { "message" : [ "Pandas user defined aggregate function in the PIVOT clause." diff --git a/docs/sql-error-conditions-unsupported-feature-error-class.md b/docs/sql-error-conditions-unsupported-feature-error-class.md index 0541b9d0589e..1143aff634c2 100644 --- a/docs/sql-error-conditions-unsupported-feature-error-class.md +++ b/docs/sql-error-conditions-unsupported-feature-error-class.md @@ -121,6 +121,10 @@ The target JDBC server hosting table `<tableName>` does not support ALTER TABLE Unable to convert `<orcType>` of Orc to data type `<toType>`. +## OVERWRITE_BY_SUBQUERY + +INSERT OVERWRITE with a subquery condition. + ## PANDAS_UDAF_IN_PIVOT Pandas user defined aggregate function in the PIVOT clause. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index 3843901a2e01..ea1af1d3c8cd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -271,6 +271,11 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB case _ => } + case o: OverwriteByExpression if o.deleteExpr.exists(_.isInstanceOf[SubqueryExpression]) => + o.deleteExpr.failAnalysis ( + errorClass = "UNSUPPORTED_FEATURE.OVERWRITE_BY_SUBQUERY", + messageParameters = Map.empty) + case operator: LogicalPlan => operator transformExpressionsDown { // Check argument data types of higher-order functions downwards first. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala index c2e759efe402..b92b512aa1d3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala @@ -3226,6 +3226,32 @@ class DataSourceV2SQLSuiteV1Filter } } + test("SPARK-46144: Fail overwrite statement if the condition contains subquery") { + val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data") + df.createOrReplaceTempView("source") + val t = "testcat.tbl" + withTable(t) { + spark.sql( + s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)") + spark.sql(s"INSERT INTO TABLE $t SELECT * FROM source") + val invalidQuery = s"INSERT INTO $t REPLACE WHERE id = (select c2 from values(1) as t(c2))" + + s" SELECT * FROM source" + val exception = intercept[AnalysisException] { + spark.sql(invalidQuery) + } + checkError( + exception, + errorClass = "UNSUPPORTED_FEATURE.OVERWRITE_BY_SUBQUERY", + sqlState = "0A000", + parameters = Map.empty, + context = ExpectedContext( + fragment = "id = (select c2 from values(1) as t(c2))", + start = 38, + stop = 77 + )) + } + } + test("SPARK-41154: Incorrect relation caching for queries with time travel spec") { sql("use testcat") val t1 = "testcat.t1" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org