This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 83c5107ba7f [SPARK-39643][SQL] Prohibit subquery expressions in DEFAULT values 83c5107ba7f is described below commit 83c5107ba7fc3345f8faf1e2a6e0c20c7da013d6 Author: Daniel Tenedorio <daniel.tenedo...@databricks.com> AuthorDate: Thu Jun 30 21:23:06 2022 -0700 [SPARK-39643][SQL] Prohibit subquery expressions in DEFAULT values ### What changes were proposed in this pull request? Prohibit subquery expressions in DEFAULT values. ### Why are the changes needed? This functionality is not part of the original feature idea, but we have not tested it so far. Here we explicitly check and prohibit this syntax. ### Does this PR introduce _any_ user-facing change? Yes. ### How was this patch tested? This PR adds new unit tests. Closes #37035 from dtenedor/no-subquery-exprs-in-defaults. Authored-by: Daniel Tenedorio <daniel.tenedo...@databricks.com> Signed-off-by: Gengliang Wang <gengli...@apache.org> --- .../catalyst/util/ResolveDefaultColumnsUtil.scala | 5 ++++ .../spark/sql/errors/QueryCompilationErrors.scala | 5 ++++ .../sql/catalyst/catalog/SessionCatalogSuite.scala | 2 +- .../org/apache/spark/sql/sources/InsertSuite.scala | 35 ++++++++++++++-------- 4 files changed, 34 insertions(+), 13 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala index 2885f986236..2c3b1f35fb4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala @@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.{Literal => ExprLiteral} import org.apache.spark.sql.catalyst.optimizer.ConstantFolding import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException} import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION import org.apache.spark.sql.connector.catalog.{CatalogManager, FunctionCatalog, Identifier} import org.apache.spark.sql.connector.catalog.functions.UnboundFunction import org.apache.spark.sql.errors.QueryCompilationErrors @@ -141,6 +142,10 @@ object ResolveDefaultColumns { s"${field.name} has a DEFAULT value of $colText which fails to parse as a valid " + s"expression: ${ex.getMessage}") } + // Check invariants before moving on to analysis. + if (parsed.containsPattern(PLAN_EXPRESSION)) { + throw QueryCompilationErrors.defaultValuesMayNotContainSubQueryExpressions() + } // Analyze the parse result. val plan = try { val analyzer: Analyzer = DefaultColumnAnalyzer diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 7ed5c785771..a909b362f68 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -2476,4 +2476,9 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase { s"Failed to execute command because DEFAULT values are not supported for target data " + "source with table provider: \"" + dataSource + "\"") } + + def defaultValuesMayNotContainSubQueryExpressions(): Throwable = { + new AnalysisException( + "Failed to execute command because subquery expressions are not allowed in DEFAULT values") + } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala index da5e07d33c6..1a3566e0c62 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala @@ -164,7 +164,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually { }.getMessage.contains("fails to parse as a valid expression")) assert(intercept[AnalysisException] { ResolveDefaultColumns.analyze(columnD, statementType) - }.getMessage.contains("fails to resolve as a valid expression")) + }.getMessage.contains("subquery expressions are not allowed in DEFAULT values")) assert(intercept[AnalysisException] { ResolveDefaultColumns.analyze(columnE, statementType) }.getMessage.contains("statement provided a value of incompatible type")) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala index 7370cf4f28b..576611cade5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala @@ -1025,7 +1025,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { object Errors { val COMMON_SUBSTRING = " has a DEFAULT value" val COLUMN_DEFAULT_NOT_FOUND = "`default` cannot be resolved." - val BAD_SUBQUERY = "cannot evaluate expression scalarsubquery() in inline table definition" + val BAD_SUBQUERY = "subquery expressions are not allowed in DEFAULT values" } // The default value fails to analyze. withTable("t") { @@ -1038,21 +1038,20 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { assert(intercept[AnalysisException] { sql("create table t(i boolean, s bigint default (select min(x) from badtable)) " + "using parquet") - }.getMessage.contains(Errors.COMMON_SUBSTRING)) + }.getMessage.contains(Errors.BAD_SUBQUERY)) } // The default value parses but refers to a table from the catalog. withTable("t", "other") { sql("create table other(x string) using parquet") assert(intercept[AnalysisException] { sql("create table t(i boolean, s bigint default (select min(x) from other)) using parquet") - }.getMessage.contains(Errors.COMMON_SUBSTRING)) + }.getMessage.contains(Errors.BAD_SUBQUERY)) } // The default value has an explicit alias. It fails to evaluate when inlined into the VALUES // list at the INSERT INTO time. withTable("t") { - sql("create table t(i boolean default (select false as alias), s bigint) using parquet") assert(intercept[AnalysisException] { - sql("insert into t values (default, default)") + sql("create table t(i boolean default (select false as alias), s bigint) using parquet") }.getMessage.contains(Errors.BAD_SUBQUERY)) } // Explicit default values may not participate in complex expressions in the VALUES list. @@ -1397,6 +1396,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { test("SPARK-38811 INSERT INTO on columns added with ALTER TABLE ADD COLUMNS: Negative tests") { object Errors { val COMMON_SUBSTRING = " has a DEFAULT value" + val BAD_SUBQUERY = "subquery expressions are not allowed in DEFAULT values" } // The default value fails to analyze. withTable("t") { @@ -1410,7 +1410,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { sql("create table t(i boolean) using parquet") assert(intercept[AnalysisException] { sql("alter table t add column s bigint default (select min(x) from badtable)") - }.getMessage.contains(Errors.COMMON_SUBSTRING)) + }.getMessage.contains(Errors.BAD_SUBQUERY)) } // The default value parses but refers to a table from the catalog. withTable("t", "other") { @@ -1418,7 +1418,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { sql("create table t(i boolean) using parquet") assert(intercept[AnalysisException] { sql("alter table t add column s bigint default (select min(x) from other)") - }.getMessage.contains(Errors.COMMON_SUBSTRING)) + }.getMessage.contains(Errors.BAD_SUBQUERY)) } // The default value parses but the type is not coercible. withTable("t") { @@ -1474,8 +1474,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { test("SPARK-38838 INSERT INTO with defaults set by ALTER TABLE ALTER COLUMN: negative tests") { object Errors { val COMMON_SUBSTRING = " has a DEFAULT value" - val BAD_SUBQUERY = - "cannot evaluate expression CAST(scalarsubquery() AS BIGINT) in inline table definition" + val BAD_SUBQUERY = "subquery expressions are not allowed in DEFAULT values" } val createTable = "create table t(i boolean, s bigint) using parquet" val insertDefaults = "insert into t values (default, default)" @@ -1488,12 +1487,11 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { // The default value analyzes to a table not in the catalog. assert(intercept[AnalysisException] { sql("alter table t alter column s set default (select min(x) from badtable)") - }.getMessage.contains(Errors.COMMON_SUBSTRING)) + }.getMessage.contains(Errors.BAD_SUBQUERY)) // The default value has an explicit alias. It fails to evaluate when inlined into the VALUES // list at the INSERT INTO time. - sql("alter table t alter column s set default (select 42 as alias)") assert(intercept[AnalysisException] { - sql(insertDefaults) + sql("alter table t alter column s set default (select 42 as alias)") }.getMessage.contains(Errors.BAD_SUBQUERY)) // The default value parses but the type is not coercible. assert(intercept[AnalysisException] { @@ -1673,6 +1671,19 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { } } + test("SPARK-39643 Prohibit subquery expressions in DEFAULT values") { + Seq( + "create table t(a string default (select 'abc')) using parquet", + "create table t(a string default exists(select 42 where true)) using parquet", + "create table t(a string default 1 in (select 1 union all select 2)) using parquet" + ).foreach { query => + assert(intercept[AnalysisException] { + sql(query) + }.getMessage.contains( + QueryCompilationErrors.defaultValuesMayNotContainSubQueryExpressions().getMessage)) + } + } + test("Stop task set if FileAlreadyExistsException was thrown") { Seq(true, false).foreach { fastFail => withSQLConf("fs.file.impl" -> classOf[FileExistingTestFileSystem].getName, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org