This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 1e13243ca394 [SPARK-46849][SQL][FOLLOWUP] Column default value cannot reference session variables 1e13243ca394 is described below commit 1e13243ca394b04e0b1d2972d7c8eab2c63414e5 Author: Wenchen Fan <wenc...@databricks.com> AuthorDate: Mon Feb 5 13:31:58 2024 -0800 [SPARK-46849][SQL][FOLLOWUP] Column default value cannot reference session variables ### What changes were proposed in this pull request? One more followup of https://github.com/apache/spark/pull/44876 . Previously, by using a fake analyzer, session variables can't be resolved and thus can't be in the default value expression. Now we use the actual analyzer and optimizer, session variables can be properly resolved and replaced with literals at the end. This is not expected as default value shouldn't references temporary things. This PR fixes this by explicitly failing the check if default value references session variables. ### Why are the changes needed? fix behavior changes. ### Does this PR introduce _any_ user-facing change? no, the behavior change is not released. ### How was this patch tested? new test ### Was this patch authored or co-authored using generative AI tooling? no Closes #45032 from cloud-fan/default-value. Authored-by: Wenchen Fan <wenc...@databricks.com> Signed-off-by: Gengliang Wang <gengli...@apache.org> --- .../catalyst/util/ResolveDefaultColumnsUtil.scala | 5 ++- .../org/apache/spark/sql/sources/InsertSuite.scala | 36 +++++++++++++++++++++- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala index da03de73557f..a2bfc6e08da8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala @@ -468,10 +468,13 @@ object ResolveDefaultColumns extends QueryErrorsBase } // Our analysis check passes here. We do not further inspect whether the // expression is `foldable` here, as the plan is not optimized yet. - } else if (default.references.nonEmpty) { + } + + if (default.references.nonEmpty || default.exists(_.isInstanceOf[VariableReference])) { // Ideally we should let the rest of `CheckAnalysis` report errors about why the default // expression is unresolved. But we should report a better error here if the default // expression references columns, which means it's not a constant for sure. + // Note that, session variable should be considered as non-constant as well. throw QueryCompilationErrors.defaultValueNotConstantError( statement, colName, default.originalSQL) } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala index 704df9d78ffa..2cc434318aa2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala @@ -28,6 +28,7 @@ import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.connector.FakeV2Provider import org.apache.spark.sql.execution.datasources.DataSourceUtils import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode @@ -1150,7 +1151,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { } test("SPARK-38336 INSERT INTO statements with tables with default columns: negative tests") { - // The default value fails to analyze. + // The default value references columns. withTable("t") { checkError( exception = intercept[AnalysisException] { @@ -1162,6 +1163,39 @@ class InsertSuite extends DataSourceTest with SharedSparkSession { "colName" -> "`s`", "defaultValue" -> "badvalue")) } + try { + // The default value references session variables. + sql("DECLARE test_var INT") + withTable("t") { + checkError( + exception = intercept[AnalysisException] { + sql("create table t(i boolean, s int default test_var) using parquet") + }, + // V1 command still use the fake Analyzer which can't resolve session variables and we + // can only report UNRESOLVED_EXPRESSION error. + errorClass = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION", + parameters = Map( + "statement" -> "CREATE TABLE", + "colName" -> "`s`", + "defaultValue" -> "test_var") + ) + val v2Source = classOf[FakeV2Provider].getName + checkError( + exception = intercept[AnalysisException] { + sql(s"create table t(i int, j int default test_var) using $v2Source") + }, + // V2 command uses the actual analyzer and can resolve session variables. We can report + // a more accurate NOT_CONSTANT error. + errorClass = "INVALID_DEFAULT_VALUE.NOT_CONSTANT", + parameters = Map( + "statement" -> "CREATE TABLE", + "colName" -> "`j`", + "defaultValue" -> "test_var") + ) + } + } finally { + sql("DROP TEMPORARY VARIABLE test_var") + } // The default value analyzes to a table not in the catalog. withTable("t") { checkError( --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org