This is an automated email from the ASF dual-hosted git repository.

changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 3480b1b871 [GLUTEN-7657][CH]Fix to_unix_timestamp when input parameter 
is timestamp type (#7660)
3480b1b871 is described below

commit 3480b1b8718b30c8f6b6e5f93852d7eb49877630
Author: kevinyhzou <[email protected]>
AuthorDate: Tue Oct 29 12:24:47 2024 +0800

    [GLUTEN-7657][CH]Fix to_unix_timestamp when input parameter is timestamp 
type (#7660)
    
    * fix to_unix_timestamp when input timestamp
    
    * remove useless code
    
    * consider PDT
    
    * fix to_timestamp
    
    * fix ci
---
 .../GlutenClickHouseTPCHSaltNullParquetSuite.scala | 14 +++++++++--
 .../Functions/SparkFunctionDateToUnixTimestamp.h   | 18 +++++---------
 .../CommonScalarFunctionParser.cpp                 |  3 +--
 .../scalar_function_parser/unixTimestamp.cpp       | 28 ++++++++++++++++------
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  1 -
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  3 ---
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  3 ---
 .../utils/clickhouse/ClickHouseTestSettings.scala  |  3 ---
 8 files changed, 40 insertions(+), 33 deletions(-)

diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
index 3d86e8c12d..4c9bca4422 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
@@ -1318,7 +1318,8 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends 
GlutenClickHouseTPCHAbstr
       "to_date(date_add(date'2024-05-07', cast(id as int)), 'yyyy') as a6, " +
       "to_date(to_timestamp(concat('2022-01-01 10:30:0', cast(id+1 as 
String))), 'yyyy-MM-dd HH:mm:ss') as a7, " +
       "to_timestamp(date_add(date'2024-05-07', cast(id as int)), 'yyyy-MM') as 
a8, " +
-      "to_timestamp(to_timestamp(concat('2022-01-01 10:30:0', cast(id+1 as 
String))), 'yyyy-MM-dd HH:mm:ss') as a9 " +
+      "to_timestamp(to_timestamp(concat('2022-01-01 10:30:0', cast(id+1 as 
String))), 'yyyy-MM-dd HH:mm:ss') as a9," +
+      "to_timestamp('2024-10-09 11:22:33.123', 'yyyy-MM-dd HH:mm:ss.SSS') " +
       "from range(9)"
     runQueryAndCompare(sql)(checkGlutenOperatorMatch[ProjectExecTransformer])
   }
@@ -2486,15 +2487,24 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends 
GlutenClickHouseTPCHAbstr
     runQueryAndCompare(sql)({ _ => })
   }
 
-  test("GLUTEN-4085: Fix unix_timestamp") {
+  test("GLUTEN-4085: Fix unix_timestamp/to_unix_timestamp") {
     val tbl_create_sql = "create table test_tbl_4085(id bigint, data string) 
using parquet"
     val data_insert_sql =
       "insert into test_tbl_4085 values(1, '2023-12-18'),(2, '2023-12-19'), 
(3, '2023-12-20')"
     val select_sql =
       "select id, unix_timestamp(to_date(data), 'yyyy-MM-dd') from 
test_tbl_4085"
+    val select_sql_1 = "select id, to_unix_timestamp(to_date(data)) from 
test_tbl_4085"
+    val select_sql_2 = "select id, to_unix_timestamp(to_timestamp(data)) from 
test_tbl_4085"
+    val select_sql_3 =
+      "select id, unix_timestamp('2024-10-15 07:35:26.486', 'yyyy-MM-dd 
HH:mm:ss') from test_tbl_4085"
     spark.sql(tbl_create_sql)
     spark.sql(data_insert_sql)
     compareResultsAgainstVanillaSpark(select_sql, true, { _ => })
+    compareResultsAgainstVanillaSpark(select_sql_1, true, { _ => })
+    compareResultsAgainstVanillaSpark(select_sql_2, true, { _ => })
+    withSQLConf("spark.sql.legacy.timeParserPolicy" -> "LEGACY") {
+      compareResultsAgainstVanillaSpark(select_sql_3, true, { _ => })
+    }
     spark.sql("drop table test_tbl_4085")
   }
 
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionDateToUnixTimestamp.h 
b/cpp-ch/local-engine/Functions/SparkFunctionDateToUnixTimestamp.h
index cdf0460e0e..7c95993bea 100644
--- a/cpp-ch/local-engine/Functions/SparkFunctionDateToUnixTimestamp.h
+++ b/cpp-ch/local-engine/Functions/SparkFunctionDateToUnixTimestamp.h
@@ -42,14 +42,7 @@ class SparkFunctionDateToUnixTimestamp : public IFunction
 public:
     static constexpr auto name = "sparkDateToUnixTimestamp";
     static FunctionPtr create(ContextPtr) { return 
std::make_shared<SparkFunctionDateToUnixTimestamp>(); }
-    SparkFunctionDateToUnixTimestamp()
-    {
-        const DateLUTImpl * date_lut = &DateLUT::instance("UTC");
-        UInt32 utc_timestamp = static_cast<UInt32>(0);
-        LocalDateTime date_time(utc_timestamp, *date_lut);
-        UInt32 unix_timestamp = date_time.to_time_t();
-        delta_timestamp_from_utc = unix_timestamp - utc_timestamp;
-    }
+    SparkFunctionDateToUnixTimestamp() {}
     ~SparkFunctionDateToUnixTimestamp() override = default;
     String getName() const override { return name; }
     bool isSuitableForShortCircuitArgumentsExecution(const 
DB::DataTypesWithConstInfo &) const override { return true; }
@@ -82,16 +75,17 @@ public:
         if (col->size() == 0)
             return res;
         
+        const DateLUTImpl * local_date_lut = &DateLUT::instance();
         for (size_t i = 0; i < input_rows; ++i)
         {
             const T t = col_src->getElement(i);
-            data[i] = static_cast<UInt32>(t * DATE_SECONDS_PER_DAY) + 
delta_timestamp_from_utc;
+            if constexpr (std::is_same_v<T, UInt16>)
+                data[i] = local_date_lut->fromDayNum(DayNum(t));
+            else
+                data[i] = local_date_lut->fromDayNum(ExtendedDayNum(t));
         }
         return res;
     }
-
-private:
-    UInt32 delta_timestamp_from_utc;
 };
 
 }
diff --git 
a/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
 
b/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
index 9ada4b1ed6..d658426745 100644
--- 
a/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
+++ 
b/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
@@ -57,9 +57,8 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Not, not, not );
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Xor, xor, xor);
 
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Cast, cast, CAST);
-REGISTER_COMMON_SCALAR_FUNCTION_PARSER(GetTimestamp, get_timestamp, 
parseDateTimeInJodaSyntaxOrNull);
+REGISTER_COMMON_SCALAR_FUNCTION_PARSER(GetTimestamp, get_timestamp, 
parseDateTime64InJodaSyntaxOrNull);
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Quarter, quarter, toQuarter);
-REGISTER_COMMON_SCALAR_FUNCTION_PARSER(ToUnixTimestamp, to_unix_timestamp, 
parseDateTimeInJodaSyntaxOrNull);
 
 // math functions
 REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Position, positive, identity);
diff --git 
a/cpp-ch/local-engine/Parser/scalar_function_parser/unixTimestamp.cpp 
b/cpp-ch/local-engine/Parser/scalar_function_parser/unixTimestamp.cpp
index 18286c6add..622237da97 100644
--- a/cpp-ch/local-engine/Parser/scalar_function_parser/unixTimestamp.cpp
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/unixTimestamp.cpp
@@ -33,23 +33,23 @@ namespace ErrorCodes
 namespace local_engine
 {
 
+template<typename Name>
 class FunctionParserUnixTimestamp : public FunctionParser
 {
 public:
     explicit FunctionParserUnixTimestamp(ParserContextPtr parser_context_) : 
FunctionParser(parser_context_) {}
     ~FunctionParserUnixTimestamp() override = default;
 
-    static constexpr auto name = "unix_timestamp";
-
-    String getName() const override { return name; }
+    static constexpr auto name = Name::name;
+    String getName() const override { return Name::name; }
 
     const ActionsDAG::Node * parse(
         const substrait::Expression_ScalarFunction & substrait_func,
         ActionsDAG & actions_dag) const override
     {
         /*
-        spark function: unix_timestamp(expr, fmt)
-        1. If expr type is string, ch function = 
parseDateTimeInJodaSyntaxOrNull(expr, format)
+        spark function: unix_timestamp(expr, fmt) / to_unix_timestamp(expr, 
fmt)
+        1. If expr type is string, ch function = 
parseDateTime64InJodaSyntaxOrNull(expr, format)
         2. If expr type is date/TIMESTAMP, ch function = toUnixTimestamp(expr, 
format)
         3. Otherwise, throw exception
         */
@@ -65,7 +65,7 @@ public:
 
         const DB::ActionsDAG::Node * result_node = nullptr;
         if (isString(expr_type))
-            result_node = toFunctionNode(actions_dag, 
"parseDateTimeInJodaSyntaxOrNull", {expr_arg, fmt_arg, time_zone_node});
+            result_node = toFunctionNode(actions_dag, 
"parseDateTime64InJodaSyntaxOrNull", {expr_arg, fmt_arg, time_zone_node});
         else if (isDateOrDate32(expr_type))
             result_node = toFunctionNode(actions_dag, 
"sparkDateToUnixTimestamp", {expr_arg, time_zone_node});
         else if (isDateTime(expr_type) || isDateTime64(expr_type))
@@ -76,5 +76,19 @@ public:
         return convertNodeTypeIfNeeded(substrait_func, result_node, 
actions_dag);
     }
 };
-static FunctionParserRegister<FunctionParserUnixTimestamp> 
register_unix_timestamp;
+
+struct FunctionNameUnixTimestamp
+{
+    static constexpr auto name = "unix_timestamp";
+};
+
+struct FunctionNameToUnixTimestamp
+{
+    static constexpr auto name = "to_unix_timestamp";
+};
+
+using FunctionParserForUnixTimestamp = 
FunctionParserUnixTimestamp<FunctionNameUnixTimestamp>;
+using FunctionParseToUnixTimestamp = 
FunctionParserUnixTimestamp<FunctionNameToUnixTimestamp>;
+static FunctionParserRegister<FunctionParserForUnixTimestamp> 
register_unix_timestamp;
+static FunctionParserRegister<FunctionParseToUnixTimestamp> 
register_to_unix_timestamp;
 }
diff --git 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index b8171bc633..4bf153f8bf 100644
--- 
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -282,7 +282,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("groupBy.as")
   enableSuite[GlutenDateFunctionsSuite]
     .exclude("function to_date")
-    .exclude("from_unixtime")
     .exclude("unix_timestamp")
     .exclude("to_unix_timestamp")
     .exclude("to_timestamp")
diff --git 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 731f66c530..2eeec0b544 100644
--- 
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -308,11 +308,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-40660: Switch to XORShiftRandom to distribute elements")
   enableSuite[GlutenDateFunctionsSuite]
     .exclude("function to_date")
-    .exclude("from_unixtime")
     .exclude("unix_timestamp")
     .exclude("to_unix_timestamp")
-    .exclude("to_timestamp")
-    .excludeGlutenTest("to_timestamp")
     .exclude("to_timestamp with microseconds precision")
     .exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
     .exclude("SPARK-30766: date_trunc of old timestamps to hours and days")
diff --git 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 86d235dc88..b13bb2abc9 100644
--- 
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -306,11 +306,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-40660: Switch to XORShiftRandom to distribute elements")
   enableSuite[GlutenDateFunctionsSuite]
     .exclude("function to_date")
-    .exclude("from_unixtime")
     .exclude("unix_timestamp")
     .exclude("to_unix_timestamp")
-    .exclude("to_timestamp")
-    .excludeGlutenTest("to_timestamp")
     .exclude("to_timestamp with microseconds precision")
     .exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
     .exclude("SPARK-30766: date_trunc of old timestamps to hours and days")
diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 3fe2764c74..9836cb27f5 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -306,11 +306,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
     .exclude("SPARK-40660: Switch to XORShiftRandom to distribute elements")
   enableSuite[GlutenDateFunctionsSuite]
     .exclude("function to_date")
-    .exclude("from_unixtime")
     .exclude("unix_timestamp")
     .exclude("to_unix_timestamp")
-    .exclude("to_timestamp")
-    .excludeGlutenTest("to_timestamp")
     .exclude("to_timestamp with microseconds precision")
     .exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
     .exclude("SPARK-30766: date_trunc of old timestamps to hours and days")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to