This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 4b1f8c3d779b [SPARK-47399][SQL] Disable generated columns on expressions with collations 4b1f8c3d779b is described below commit 4b1f8c3d779b1391b414d6d6791bed5800b600bd Author: Stefan Kandic <stefan.kan...@databricks.com> AuthorDate: Fri Mar 15 16:12:40 2024 +0500 [SPARK-47399][SQL] Disable generated columns on expressions with collations ### What changes were proposed in this pull request? Disable the ability to use collations in expressions for generated columns. ### Why are the changes needed? Changing the collation of a column or even just changing the ICU version could lead to a differences in the resulting expression so it would be best if we simply disable it for now. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? With new unit tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #45520 from stefankandic/disableGeneratedColumnsCollation. Authored-by: Stefan Kandic <stefan.kan...@databricks.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../spark/sql/catalyst/util/GeneratedColumn.scala | 5 ++ .../org/apache/spark/sql/CollationSuite.scala | 53 ++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala index 28ddc16cf6b0..747a0e225a2f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala @@ -29,6 +29,7 @@ import org.apache.spark.sql.connector.catalog.{CatalogManager, Identifier, Table import org.apache.spark.sql.errors.QueryCompilationErrors import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.{DataType, StructField, StructType} +import org.apache.spark.sql.util.SchemaUtils /** * This object contains utility methods and values for Generated Columns @@ -162,6 +163,10 @@ object GeneratedColumn { s"generation expression data type ${analyzed.dataType.simpleString} " + s"is incompatible with column data type ${dataType.simpleString}") } + if (analyzed.exists(e => SchemaUtils.hasNonDefaultCollatedString(e.dataType))) { + throw unsupportedExpressionError( + "generation expression cannot contain non-default collated string type") + } } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala index 72e72a53c4f6..bef7417be36c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala @@ -622,4 +622,57 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { case _: SortMergeJoinExec => () }.nonEmpty) } + + test("Generated column expressions using collations - errors out") { + checkError( + exception = intercept[AnalysisException] { + sql( + s""" + |CREATE TABLE testcat.test_table( + | c1 STRING COLLATE UNICODE, + | c2 STRING COLLATE UNICODE GENERATED ALWAYS AS (SUBSTRING(c1, 0, 1)) + |) + |USING $v2Source + |""".stripMargin) + }, + errorClass = "UNSUPPORTED_EXPRESSION_GENERATED_COLUMN", + parameters = Map( + "fieldName" -> "c2", + "expressionStr" -> "SUBSTRING(c1, 0, 1)", + "reason" -> "generation expression cannot contain non-default collated string type")) + + checkError( + exception = intercept[AnalysisException] { + sql( + s""" + |CREATE TABLE testcat.test_table( + | c1 STRING COLLATE UNICODE, + | c2 STRING COLLATE UNICODE GENERATED ALWAYS AS (c1 || 'a' COLLATE UNICODE) + |) + |USING $v2Source + |""".stripMargin) + }, + errorClass = "UNSUPPORTED_EXPRESSION_GENERATED_COLUMN", + parameters = Map( + "fieldName" -> "c2", + "expressionStr" -> "c1 || 'a' COLLATE UNICODE", + "reason" -> "generation expression cannot contain non-default collated string type")) + + checkError( + exception = intercept[AnalysisException] { + sql( + s""" + |CREATE TABLE testcat.test_table( + | struct1 STRUCT<a: STRING COLLATE UNICODE>, + | c2 STRING COLLATE UNICODE GENERATED ALWAYS AS (SUBSTRING(struct1.a, 0, 1)) + |) + |USING $v2Source + |""".stripMargin) + }, + errorClass = "UNSUPPORTED_EXPRESSION_GENERATED_COLUMN", + parameters = Map( + "fieldName" -> "c2", + "expressionStr" -> "SUBSTRING(struct1.a, 0, 1)", + "reason" -> "generation expression cannot contain non-default collated string type")) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org