(spark) branch master updated: [SPARK-46849][SQL][FOLLOWUP] Column default value cannot reference session variables

gengliang Mon, 05 Feb 2024 13:32:16 -0800

This is an automated email from the ASF dual-hosted git repository.

gengliang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 1e13243ca394 [SPARK-46849][SQL][FOLLOWUP] Column default value cannot 
reference session variables
1e13243ca394 is described below

commit 1e13243ca394b04e0b1d2972d7c8eab2c63414e5
Author: Wenchen Fan <wenc...@databricks.com>
AuthorDate: Mon Feb 5 13:31:58 2024 -0800

    [SPARK-46849][SQL][FOLLOWUP] Column default value cannot reference session 
variables
    
    ### What changes were proposed in this pull request?
    
    One more followup of https://github.com/apache/spark/pull/44876 .
    
    Previously, by using a fake analyzer, session variables can't be resolved 
and thus can't be in the default value expression. Now we use the actual 
analyzer and optimizer, session variables can be properly resolved and replaced 
with literals at the end. This is not expected as default value shouldn't 
references temporary things.
    
    This PR fixes this by explicitly failing the check if default value 
references session variables.
    
    ### Why are the changes needed?
    
    fix behavior changes.
    
    ### Does this PR introduce _any_ user-facing change?
    
    no, the behavior change is not released.
    
    ### How was this patch tested?
    
    new test
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    no
    
    Closes #45032 from cloud-fan/default-value.
    
    Authored-by: Wenchen Fan <wenc...@databricks.com>
    Signed-off-by: Gengliang Wang <gengli...@apache.org>
---
 .../catalyst/util/ResolveDefaultColumnsUtil.scala  |  5 ++-
 .../org/apache/spark/sql/sources/InsertSuite.scala | 36 +++++++++++++++++++++-
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
index da03de73557f..a2bfc6e08da8 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
@@ -468,10 +468,13 @@ object ResolveDefaultColumns extends QueryErrorsBase
       }
       // Our analysis check passes here. We do not further inspect whether the
       // expression is `foldable` here, as the plan is not optimized yet.
-    } else if (default.references.nonEmpty) {
+    }
+
+    if (default.references.nonEmpty || 
default.exists(_.isInstanceOf[VariableReference])) {
       // Ideally we should let the rest of `CheckAnalysis` report errors about 
why the default
       // expression is unresolved. But we should report a better error here if 
the default
       // expression references columns, which means it's not a constant for 
sure.
+      // Note that, session variable should be considered as non-constant as 
well.
       throw QueryCompilationErrors.defaultValueNotConstantError(
         statement, colName, default.originalSQL)
     }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 704df9d78ffa..2cc434318aa2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, 
CatalogTable, CatalogTableType}
 import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.connector.FakeV2Provider
 import org.apache.spark.sql.execution.datasources.DataSourceUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode
@@ -1150,7 +1151,7 @@ class InsertSuite extends DataSourceTest with 
SharedSparkSession {
   }
 
   test("SPARK-38336 INSERT INTO statements with tables with default columns: 
negative tests") {
-    // The default value fails to analyze.
+    // The default value references columns.
     withTable("t") {
       checkError(
         exception = intercept[AnalysisException] {
@@ -1162,6 +1163,39 @@ class InsertSuite extends DataSourceTest with 
SharedSparkSession {
           "colName" -> "`s`",
           "defaultValue" -> "badvalue"))
     }
+    try {
+      // The default value references session variables.
+      sql("DECLARE test_var INT")
+      withTable("t") {
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql("create table t(i boolean, s int default test_var) using 
parquet")
+          },
+          // V1 command still use the fake Analyzer which can't resolve 
session variables and we
+          // can only report UNRESOLVED_EXPRESSION error.
+          errorClass = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION",
+          parameters = Map(
+            "statement" -> "CREATE TABLE",
+            "colName" -> "`s`",
+            "defaultValue" -> "test_var")
+        )
+        val v2Source = classOf[FakeV2Provider].getName
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(s"create table t(i int, j int default test_var) using 
$v2Source")
+          },
+          // V2 command uses the actual analyzer and can resolve session 
variables. We can report
+          // a more accurate NOT_CONSTANT error.
+          errorClass = "INVALID_DEFAULT_VALUE.NOT_CONSTANT",
+          parameters = Map(
+            "statement" -> "CREATE TABLE",
+            "colName" -> "`j`",
+            "defaultValue" -> "test_var")
+        )
+      }
+    } finally {
+      sql("DROP TEMPORARY VARIABLE test_var")
+    }
     // The default value analyzes to a table not in the catalog.
     withTable("t") {
       checkError(


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-46849][SQL][FOLLOWUP] Column default value cannot reference session variables

Reply via email to