This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new e04d3d7c430a [SPARK-48215][SQL] Extending support for collated strings on date_format expression e04d3d7c430a is described below commit e04d3d7c430a1fa446f0379680f619b8b14b5eb5 Author: Nebojsa Savic <nebojsa.sa...@databricks.com> AuthorDate: Wed May 22 04:28:06 2024 -0700 [SPARK-48215][SQL] Extending support for collated strings on date_format expression ### What changes were proposed in this pull request? We are extending support for collated strings on date_format function, since currently it throws DATATYPE_MISSMATCH exception when collated strings are passed as "format" parameter. https://docs.databricks.com/en/sql/language-manual/functions/date_format.html ### Why are the changes needed? Exception is thrown on invocation when collated strings are passed as arguments to date_format. ### Does this PR introduce _any_ user-facing change? No user facing changes, extending support. ### How was this patch tested? Tests are added with this PR. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #46561 from nebojsa-db/SPARK-48215. Authored-by: Nebojsa Savic <nebojsa.sa...@databricks.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- .../catalyst/expressions/datetimeExpressions.scala | 5 ++-- .../spark/sql/CollationSQLExpressionsSuite.scala | 32 ++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala index 081a42f5608e..8caf8c5d48c2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala @@ -36,6 +36,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils._ import org.apache.spark.sql.catalyst.util.LegacyDateFormats.SIMPLE_DATE_FORMAT import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors} import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.types.StringTypeAnyCollation import org.apache.spark.sql.types._ import org.apache.spark.sql.types.DayTimeIntervalType.DAY import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String} @@ -951,9 +952,9 @@ case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Opti def this(left: Expression, right: Expression) = this(left, right, None) - override def dataType: DataType = StringType + override def dataType: DataType = SQLConf.get.defaultStringType - override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringType) + override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringTypeAnyCollation) override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = copy(timeZoneId = Option(timeZoneId)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala index 0d48f9f0a88d..828245bb3fdd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala @@ -1600,6 +1600,38 @@ class CollationSQLExpressionsSuite }) } + test("DateFormat expression with collation") { + case class DateFormatTestCase[R](date: String, format: String, collation: String, result: R) + val testCases = Seq( + DateFormatTestCase("2021-01-01", "yyyy-MM-dd", "UTF8_BINARY", "2021-01-01"), + DateFormatTestCase("2021-01-01", "yyyy-dd", "UTF8_BINARY_LCASE", "2021-01"), + DateFormatTestCase("2021-01-01", "yyyy-MM-dd", "UNICODE", "2021-01-01"), + DateFormatTestCase("2021-01-01", "yyyy", "UNICODE_CI", "2021") + ) + + for { + collateDate <- Seq(true, false) + collateFormat <- Seq(true, false) + } { + testCases.foreach(t => { + val dateArg = if (collateDate) s"collate('${t.date}', '${t.collation}')" else s"'${t.date}'" + val formatArg = + if (collateFormat) { + s"collate('${t.format}', '${t.collation}')" + } else { + s"'${t.format}'" + } + + withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collation) { + val query = s"SELECT date_format(${dateArg}, ${formatArg})" + // Result & data type + checkAnswer(sql(query), Row(t.result)) + assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation))) + } + }) + } + } + // TODO: Add more tests for other SQL expressions } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org