This is an automated email from the ASF dual-hosted git repository. zhangzc pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push: new 233850c62 [GLUTEN-5651][CH] Fix error 'Illegal type of argument of function parseDateTimeInJodaSyntaxOrNull, expected String, got Date32' when executing to_date/to_timestamp (#5652) 233850c62 is described below commit 233850c625dd2274620a363214c2a694a1c09a10 Author: Zhichao Zhang <zhan...@apache.org> AuthorDate: Wed May 8 20:41:23 2024 +0800 [GLUTEN-5651][CH] Fix error 'Illegal type of argument of function parseDateTimeInJodaSyntaxOrNull, expected String, got Date32' when executing to_date/to_timestamp (#5652) Fix error 'Illegal type of argument of function parseDateTimeInJodaSyntaxOrNull, expected String, got Date32' when executing to_date/to_timestamp. RC: the spark function `to_date/to_timestamp` are mapping to the CH function `parseDateTimeInJodaSyntaxOrNull` when they execute with the specified format, but the CH function `parseDateTimeInJodaSyntaxOrNull` can not support the data type `DateType or TimestampType` as the input data type, and spark supports. Close #5651. --- .../GlutenClickHouseTPCHNullableSuite.scala | 10 +++++- .../GlutenClickHouseTPCHSaltNullParquetSuite.scala | 13 ++++--- .../extension/RewriteToDateExpresstionRule.scala | 42 ++++++++++++++++++++-- 3 files changed, 57 insertions(+), 8 deletions(-) diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala index fe6afedf4..0eb4de742 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala @@ -19,6 +19,7 @@ package org.apache.gluten.execution import org.apache.gluten.GlutenConfig import org.apache.spark.SparkConf +import org.apache.spark.sql.catalyst.expressions.Alias import org.apache.spark.sql.catalyst.optimizer.BuildLeft class GlutenClickHouseTPCHNullableSuite extends GlutenClickHouseTPCHAbstractSuite { @@ -235,7 +236,14 @@ class GlutenClickHouseTPCHNullableSuite extends GlutenClickHouseTPCHAbstractSuit case project: ProjectExecTransformer => project } assert(project.size == 1) - assert(project.apply(0).projectList.toString().contains("from_unixtime") == conf._2) + assert( + project + .apply(0) + .projectList(0) + .asInstanceOf[Alias] + .child + .toString() + .contains("from_unixtime") == conf._2) }) } }) diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala index 20638615d..a1bba300e 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala @@ -49,8 +49,6 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends GlutenClickHouseTPCHAbstr .set("spark.sql.shuffle.partitions", "5") .set("spark.sql.autoBroadcastJoinThreshold", "10MB") .set("spark.gluten.supported.scala.udfs", "my_add") -// .set("spark.gluten.sql.columnar.backend.ch.runtime_config.logger.level", "trace") -// .set("spark.sql.planChangeLog.level", "error") } override protected val createNullableTables = true @@ -1271,8 +1269,15 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends GlutenClickHouseTPCHAbstr } test("test 'to_date/to_timestamp'") { - val sql = "select to_date(concat('2022-01-0', cast(id+1 as String)), 'yyyy-MM-dd')," + - "to_timestamp(concat('2022-01-01 10:30:0', cast(id+1 as String)), 'yyyy-MM-dd HH:mm:ss') " + + val sql = "select to_date(concat('2022-01-0', cast(id+1 as String)), 'yyyy-MM-dd') as a1," + + "to_timestamp(concat('2022-01-01 10:30:0', cast(id+1 as String)), 'yyyy-MM-dd HH:mm:ss') as a2," + + "to_date(date_add(date'2024-05-07', cast(id as int)), 'yyyy-MM-dd') as a3, " + + "to_date(date_add(date'2024-05-07', cast(id as int)), 'yyyyMMdd') as a4, " + + "to_date(date_add(date'2024-05-07', cast(id as int)), 'yyyy-MM') as a5, " + + "to_date(date_add(date'2024-05-07', cast(id as int)), 'yyyy') as a6, " + + "to_date(to_timestamp(concat('2022-01-01 10:30:0', cast(id+1 as String))), 'yyyy-MM-dd HH:mm:ss') as a7, " + + "to_timestamp(date_add(date'2024-05-07', cast(id as int)), 'yyyy-MM') as a8, " + + "to_timestamp(to_timestamp(concat('2022-01-01 10:30:0', cast(id+1 as String))), 'yyyy-MM-dd HH:mm:ss') as a9 " + "from range(9)" runQueryAndCompare(sql)(checkGlutenOperatorMatch[ProjectExecTransformer]) } diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/RewriteToDateExpresstionRule.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/RewriteToDateExpresstionRule.scala index f809bb70f..34d162d71 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/extension/RewriteToDateExpresstionRule.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/extension/RewriteToDateExpresstionRule.scala @@ -64,10 +64,25 @@ class RewriteToDateExpresstionRule(session: SparkSession, conf: SQLConf) } private def visitExpression(expression: NamedExpression): NamedExpression = expression match { - case Alias(c, _) if c.isInstanceOf[ParseToDate] => + case a @ Alias(c, _) if c.isInstanceOf[ParseToDate] => val newToDate = rewriteParseToDate(c.asInstanceOf[ParseToDate]) if (!newToDate.fastEquals(c)) { - Alias(newToDate, newToDate.toString())() + a.copy(newToDate, a.name)( + a.exprId, + a.qualifier, + a.explicitMetadata, + a.nonInheritableMetadataKeys) + } else { + expression + } + case a @ Alias(c, _) if c.isInstanceOf[ParseToTimestamp] => + val newToTimestamp = rewriteParseToTimestamp(c.asInstanceOf[ParseToTimestamp]) + if (!newToTimestamp.fastEquals(c)) { + a.copy(newToTimestamp, a.name)( + a.exprId, + a.qualifier, + a.explicitMetadata, + a.nonInheritableMetadataKeys) } else { expression } @@ -81,11 +96,32 @@ class RewriteToDateExpresstionRule(session: SparkSession, conf: SQLConf) val unixTimestamp = fromUnixTime.left.asInstanceOf[UnixTimestamp] val newLeft = unixTimestamp.left new ParseToDate(newLeft) + case date: Expression + if date.dataType.isInstanceOf[DateType] || date.dataType.isInstanceOf[TimestampType] => + // When the data type of the left child in the ParseToDate is the DateType or TimestampType, + // it will not deal with the format, + // also CH backend can not support the DateType or TimestampType as input data type + Cast(date, toDate.dataType, Some(SQLConf.get.sessionLocalTimeZone)) case _ => toDate } + private def rewriteParseToTimestamp(toTimestamp: ParseToTimestamp): Expression = + toTimestamp.left match { + case timestamp: Expression + if (timestamp.dataType.isInstanceOf[DateType] || + timestamp.dataType.isInstanceOf[TimestampType]) => + // When the data type of the left child in the ParseToDate is the DateType or TimestampType, + // it will not deal with the format, + // also CH backend can not support the DateType or TimestampType as input data type + Cast(timestamp, toTimestamp.dataType, Some(SQLConf.get.sessionLocalTimeZone)) + case _ => toTimestamp + } + private def canRewrite(project: Project): Boolean = { project.projectList.exists( - expr => expr.isInstanceOf[Alias] && expr.asInstanceOf[Alias].child.isInstanceOf[ParseToDate]) + expr => + expr.isInstanceOf[Alias] && + (expr.asInstanceOf[Alias].child.isInstanceOf[ParseToDate] || + expr.asInstanceOf[Alias].child.isInstanceOf[ParseToTimestamp])) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org For additional commands, e-mail: commits-h...@gluten.apache.org