This is an automated email from the ASF dual-hosted git repository.

zhangzc pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 233850c62 [GLUTEN-5651][CH] Fix error 'Illegal type of argument of 
function parseDateTimeInJodaSyntaxOrNull, expected String, got Date32' when 
executing to_date/to_timestamp (#5652)
233850c62 is described below

commit 233850c625dd2274620a363214c2a694a1c09a10
Author: Zhichao Zhang <zhan...@apache.org>
AuthorDate: Wed May 8 20:41:23 2024 +0800

    [GLUTEN-5651][CH] Fix error 'Illegal type of argument of function 
parseDateTimeInJodaSyntaxOrNull, expected String, got Date32' when executing 
to_date/to_timestamp (#5652)
    
    Fix error 'Illegal type of argument of function 
parseDateTimeInJodaSyntaxOrNull, expected String, got Date32' when executing 
to_date/to_timestamp.
    
    RC:
    the spark function `to_date/to_timestamp` are mapping to the CH function 
`parseDateTimeInJodaSyntaxOrNull` when they execute with the specified format, 
but the CH function `parseDateTimeInJodaSyntaxOrNull` can not support the data 
type `DateType or TimestampType` as the input data type, and spark supports.
    
    Close #5651.
---
 .../GlutenClickHouseTPCHNullableSuite.scala        | 10 +++++-
 .../GlutenClickHouseTPCHSaltNullParquetSuite.scala | 13 ++++---
 .../extension/RewriteToDateExpresstionRule.scala   | 42 ++++++++++++++++++++--
 3 files changed, 57 insertions(+), 8 deletions(-)

diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala
index fe6afedf4..0eb4de742 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHNullableSuite.scala
@@ -19,6 +19,7 @@ package org.apache.gluten.execution
 import org.apache.gluten.GlutenConfig
 
 import org.apache.spark.SparkConf
+import org.apache.spark.sql.catalyst.expressions.Alias
 import org.apache.spark.sql.catalyst.optimizer.BuildLeft
 
 class GlutenClickHouseTPCHNullableSuite extends 
GlutenClickHouseTPCHAbstractSuite {
@@ -235,7 +236,14 @@ class GlutenClickHouseTPCHNullableSuite extends 
GlutenClickHouseTPCHAbstractSuit
                 case project: ProjectExecTransformer => project
               }
               assert(project.size == 1)
-              
assert(project.apply(0).projectList.toString().contains("from_unixtime") == 
conf._2)
+              assert(
+                project
+                  .apply(0)
+                  .projectList(0)
+                  .asInstanceOf[Alias]
+                  .child
+                  .toString()
+                  .contains("from_unixtime") == conf._2)
             })
         }
       })
diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
index 20638615d..a1bba300e 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseTPCHSaltNullParquetSuite.scala
@@ -49,8 +49,6 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends 
GlutenClickHouseTPCHAbstr
       .set("spark.sql.shuffle.partitions", "5")
       .set("spark.sql.autoBroadcastJoinThreshold", "10MB")
       .set("spark.gluten.supported.scala.udfs", "my_add")
-//      
.set("spark.gluten.sql.columnar.backend.ch.runtime_config.logger.level", 
"trace")
-//      .set("spark.sql.planChangeLog.level", "error")
   }
 
   override protected val createNullableTables = true
@@ -1271,8 +1269,15 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends 
GlutenClickHouseTPCHAbstr
   }
 
   test("test 'to_date/to_timestamp'") {
-    val sql = "select to_date(concat('2022-01-0', cast(id+1 as String)), 
'yyyy-MM-dd')," +
-      "to_timestamp(concat('2022-01-01 10:30:0', cast(id+1 as String)), 
'yyyy-MM-dd HH:mm:ss') " +
+    val sql = "select to_date(concat('2022-01-0', cast(id+1 as String)), 
'yyyy-MM-dd') as a1," +
+      "to_timestamp(concat('2022-01-01 10:30:0', cast(id+1 as String)), 
'yyyy-MM-dd HH:mm:ss') as a2," +
+      "to_date(date_add(date'2024-05-07', cast(id as int)), 'yyyy-MM-dd') as 
a3, " +
+      "to_date(date_add(date'2024-05-07', cast(id as int)), 'yyyyMMdd') as a4, 
" +
+      "to_date(date_add(date'2024-05-07', cast(id as int)), 'yyyy-MM') as a5, 
" +
+      "to_date(date_add(date'2024-05-07', cast(id as int)), 'yyyy') as a6, " +
+      "to_date(to_timestamp(concat('2022-01-01 10:30:0', cast(id+1 as 
String))), 'yyyy-MM-dd HH:mm:ss') as a7, " +
+      "to_timestamp(date_add(date'2024-05-07', cast(id as int)), 'yyyy-MM') as 
a8, " +
+      "to_timestamp(to_timestamp(concat('2022-01-01 10:30:0', cast(id+1 as 
String))), 'yyyy-MM-dd HH:mm:ss') as a9 " +
       "from range(9)"
     runQueryAndCompare(sql)(checkGlutenOperatorMatch[ProjectExecTransformer])
   }
diff --git 
a/gluten-core/src/main/scala/org/apache/gluten/extension/RewriteToDateExpresstionRule.scala
 
b/gluten-core/src/main/scala/org/apache/gluten/extension/RewriteToDateExpresstionRule.scala
index f809bb70f..34d162d71 100644
--- 
a/gluten-core/src/main/scala/org/apache/gluten/extension/RewriteToDateExpresstionRule.scala
+++ 
b/gluten-core/src/main/scala/org/apache/gluten/extension/RewriteToDateExpresstionRule.scala
@@ -64,10 +64,25 @@ class RewriteToDateExpresstionRule(session: SparkSession, 
conf: SQLConf)
   }
 
   private def visitExpression(expression: NamedExpression): NamedExpression = 
expression match {
-    case Alias(c, _) if c.isInstanceOf[ParseToDate] =>
+    case a @ Alias(c, _) if c.isInstanceOf[ParseToDate] =>
       val newToDate = rewriteParseToDate(c.asInstanceOf[ParseToDate])
       if (!newToDate.fastEquals(c)) {
-        Alias(newToDate, newToDate.toString())()
+        a.copy(newToDate, a.name)(
+          a.exprId,
+          a.qualifier,
+          a.explicitMetadata,
+          a.nonInheritableMetadataKeys)
+      } else {
+        expression
+      }
+    case a @ Alias(c, _) if c.isInstanceOf[ParseToTimestamp] =>
+      val newToTimestamp = 
rewriteParseToTimestamp(c.asInstanceOf[ParseToTimestamp])
+      if (!newToTimestamp.fastEquals(c)) {
+        a.copy(newToTimestamp, a.name)(
+          a.exprId,
+          a.qualifier,
+          a.explicitMetadata,
+          a.nonInheritableMetadataKeys)
       } else {
         expression
       }
@@ -81,11 +96,32 @@ class RewriteToDateExpresstionRule(session: SparkSession, 
conf: SQLConf)
       val unixTimestamp = fromUnixTime.left.asInstanceOf[UnixTimestamp]
       val newLeft = unixTimestamp.left
       new ParseToDate(newLeft)
+    case date: Expression
+        if date.dataType.isInstanceOf[DateType] || 
date.dataType.isInstanceOf[TimestampType] =>
+      // When the data type of the left child in the ParseToDate is the 
DateType or TimestampType,
+      // it will not deal with the format,
+      // also CH backend can not support the DateType or TimestampType as 
input data type
+      Cast(date, toDate.dataType, Some(SQLConf.get.sessionLocalTimeZone))
     case _ => toDate
   }
 
+  private def rewriteParseToTimestamp(toTimestamp: ParseToTimestamp): 
Expression =
+    toTimestamp.left match {
+      case timestamp: Expression
+          if (timestamp.dataType.isInstanceOf[DateType] ||
+            timestamp.dataType.isInstanceOf[TimestampType]) =>
+        // When the data type of the left child in the ParseToDate is the 
DateType or TimestampType,
+        // it will not deal with the format,
+        // also CH backend can not support the DateType or TimestampType as 
input data type
+        Cast(timestamp, toTimestamp.dataType, 
Some(SQLConf.get.sessionLocalTimeZone))
+      case _ => toTimestamp
+    }
+
   private def canRewrite(project: Project): Boolean = {
     project.projectList.exists(
-      expr => expr.isInstanceOf[Alias] && 
expr.asInstanceOf[Alias].child.isInstanceOf[ParseToDate])
+      expr =>
+        expr.isInstanceOf[Alias] &&
+          (expr.asInstanceOf[Alias].child.isInstanceOf[ParseToDate] ||
+            expr.asInstanceOf[Alias].child.isInstanceOf[ParseToTimestamp]))
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@gluten.apache.org
For additional commands, e-mail: commits-h...@gluten.apache.org

Reply via email to