This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 4b1f8c3d779b [SPARK-47399][SQL] Disable generated columns on 
expressions with collations
4b1f8c3d779b is described below

commit 4b1f8c3d779b1391b414d6d6791bed5800b600bd
Author: Stefan Kandic <stefan.kan...@databricks.com>
AuthorDate: Fri Mar 15 16:12:40 2024 +0500

    [SPARK-47399][SQL] Disable generated columns on expressions with collations
    
    ### What changes were proposed in this pull request?
    Disable the ability to use collations in expressions for generated columns.
    
    ### Why are the changes needed?
    Changing the collation of a column or even just changing the ICU version 
could lead to a differences in the resulting expression so it would be best if 
we simply disable it for now.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    With new unit tests.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #45520 from stefankandic/disableGeneratedColumnsCollation.
    
    Authored-by: Stefan Kandic <stefan.kan...@databricks.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 .../spark/sql/catalyst/util/GeneratedColumn.scala  |  5 ++
 .../org/apache/spark/sql/CollationSuite.scala      | 53 ++++++++++++++++++++++
 2 files changed, 58 insertions(+)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala
index 28ddc16cf6b0..747a0e225a2f 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala
@@ -29,6 +29,7 @@ import 
org.apache.spark.sql.connector.catalog.{CatalogManager, Identifier, Table
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, StructField, StructType}
+import org.apache.spark.sql.util.SchemaUtils
 
 /**
  * This object contains utility methods and values for Generated Columns
@@ -162,6 +163,10 @@ object GeneratedColumn {
         s"generation expression data type ${analyzed.dataType.simpleString} " +
         s"is incompatible with column data type ${dataType.simpleString}")
     }
+    if (analyzed.exists(e => 
SchemaUtils.hasNonDefaultCollatedString(e.dataType))) {
+      throw unsupportedExpressionError(
+        "generation expression cannot contain non-default collated string 
type")
+    }
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
index 72e72a53c4f6..bef7417be36c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
@@ -622,4 +622,57 @@ class CollationSuite extends DatasourceV2SQLBase with 
AdaptiveSparkPlanHelper {
       case _: SortMergeJoinExec => ()
     }.nonEmpty)
   }
+
+  test("Generated column expressions using collations - errors out") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql(
+          s"""
+             |CREATE TABLE testcat.test_table(
+             |  c1 STRING COLLATE UNICODE,
+             |  c2 STRING COLLATE UNICODE GENERATED ALWAYS AS (SUBSTRING(c1, 
0, 1))
+             |)
+             |USING $v2Source
+             |""".stripMargin)
+      },
+      errorClass = "UNSUPPORTED_EXPRESSION_GENERATED_COLUMN",
+      parameters = Map(
+        "fieldName" -> "c2",
+        "expressionStr" -> "SUBSTRING(c1, 0, 1)",
+        "reason" -> "generation expression cannot contain non-default collated 
string type"))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql(
+          s"""
+             |CREATE TABLE testcat.test_table(
+             |  c1 STRING COLLATE UNICODE,
+             |  c2 STRING COLLATE UNICODE GENERATED ALWAYS AS (c1 || 'a' 
COLLATE UNICODE)
+             |)
+             |USING $v2Source
+             |""".stripMargin)
+      },
+      errorClass = "UNSUPPORTED_EXPRESSION_GENERATED_COLUMN",
+      parameters = Map(
+        "fieldName" -> "c2",
+        "expressionStr" -> "c1 || 'a' COLLATE UNICODE",
+        "reason" -> "generation expression cannot contain non-default collated 
string type"))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql(
+          s"""
+             |CREATE TABLE testcat.test_table(
+             |  struct1 STRUCT<a: STRING COLLATE UNICODE>,
+             |  c2 STRING COLLATE UNICODE GENERATED ALWAYS AS 
(SUBSTRING(struct1.a, 0, 1))
+             |)
+             |USING $v2Source
+             |""".stripMargin)
+      },
+      errorClass = "UNSUPPORTED_EXPRESSION_GENERATED_COLUMN",
+      parameters = Map(
+        "fieldName" -> "c2",
+        "expressionStr" -> "SUBSTRING(struct1.a, 0, 1)",
+        "reason" -> "generation expression cannot contain non-default collated 
string type"))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to