This is an automated email from the ASF dual-hosted git repository.
changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 3480b1b871 [GLUTEN-7657][CH]Fix to_unix_timestamp when input parameter
is timestamp type (#7660)
3480b1b871 is described below
commit 3480b1b8718b30c8f6b6e5f93852d7eb49877630
Author: kevinyhzou <[email protected]>
AuthorDate: Tue Oct 29 12:24:47 2024 +0800
[GLUTEN-7657][CH]Fix to_unix_timestamp when input parameter is timestamp
type (#7660)
* fix to_unix_timestamp when input timestamp
* remove useless code
* consider PDT
* fix to_timestamp
* fix ci
---
.../GlutenClickHouseTPCHSaltNullParquetSuite.scala | 14 +++++++++--
.../Functions/SparkFunctionDateToUnixTimestamp.h | 18 +++++---------
.../CommonScalarFunctionParser.cpp | 3 +--
.../scalar_function_parser/unixTimestamp.cpp | 28 ++++++++++++++++------
.../utils/clickhouse/ClickHouseTestSettings.scala | 1 -
.../utils/clickhouse/ClickHouseTestSettings.scala | 3 ---
.../utils/clickhouse/ClickHouseTestSettings.scala | 3 ---
.../utils/clickhouse/ClickHouseTestSettings.scala | 3 ---
8 files changed, 40 insertions(+), 33 deletions(-)
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
index 3d86e8c12d..4c9bca4422 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/tpch/GlutenClickHouseTPCHSaltNullParquetSuite.scala
@@ -1318,7 +1318,8 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends
GlutenClickHouseTPCHAbstr
"to_date(date_add(date'2024-05-07', cast(id as int)), 'yyyy') as a6, " +
"to_date(to_timestamp(concat('2022-01-01 10:30:0', cast(id+1 as
String))), 'yyyy-MM-dd HH:mm:ss') as a7, " +
"to_timestamp(date_add(date'2024-05-07', cast(id as int)), 'yyyy-MM') as
a8, " +
- "to_timestamp(to_timestamp(concat('2022-01-01 10:30:0', cast(id+1 as
String))), 'yyyy-MM-dd HH:mm:ss') as a9 " +
+ "to_timestamp(to_timestamp(concat('2022-01-01 10:30:0', cast(id+1 as
String))), 'yyyy-MM-dd HH:mm:ss') as a9," +
+ "to_timestamp('2024-10-09 11:22:33.123', 'yyyy-MM-dd HH:mm:ss.SSS') " +
"from range(9)"
runQueryAndCompare(sql)(checkGlutenOperatorMatch[ProjectExecTransformer])
}
@@ -2486,15 +2487,24 @@ class GlutenClickHouseTPCHSaltNullParquetSuite extends
GlutenClickHouseTPCHAbstr
runQueryAndCompare(sql)({ _ => })
}
- test("GLUTEN-4085: Fix unix_timestamp") {
+ test("GLUTEN-4085: Fix unix_timestamp/to_unix_timestamp") {
val tbl_create_sql = "create table test_tbl_4085(id bigint, data string)
using parquet"
val data_insert_sql =
"insert into test_tbl_4085 values(1, '2023-12-18'),(2, '2023-12-19'),
(3, '2023-12-20')"
val select_sql =
"select id, unix_timestamp(to_date(data), 'yyyy-MM-dd') from
test_tbl_4085"
+ val select_sql_1 = "select id, to_unix_timestamp(to_date(data)) from
test_tbl_4085"
+ val select_sql_2 = "select id, to_unix_timestamp(to_timestamp(data)) from
test_tbl_4085"
+ val select_sql_3 =
+ "select id, unix_timestamp('2024-10-15 07:35:26.486', 'yyyy-MM-dd
HH:mm:ss') from test_tbl_4085"
spark.sql(tbl_create_sql)
spark.sql(data_insert_sql)
compareResultsAgainstVanillaSpark(select_sql, true, { _ => })
+ compareResultsAgainstVanillaSpark(select_sql_1, true, { _ => })
+ compareResultsAgainstVanillaSpark(select_sql_2, true, { _ => })
+ withSQLConf("spark.sql.legacy.timeParserPolicy" -> "LEGACY") {
+ compareResultsAgainstVanillaSpark(select_sql_3, true, { _ => })
+ }
spark.sql("drop table test_tbl_4085")
}
diff --git a/cpp-ch/local-engine/Functions/SparkFunctionDateToUnixTimestamp.h
b/cpp-ch/local-engine/Functions/SparkFunctionDateToUnixTimestamp.h
index cdf0460e0e..7c95993bea 100644
--- a/cpp-ch/local-engine/Functions/SparkFunctionDateToUnixTimestamp.h
+++ b/cpp-ch/local-engine/Functions/SparkFunctionDateToUnixTimestamp.h
@@ -42,14 +42,7 @@ class SparkFunctionDateToUnixTimestamp : public IFunction
public:
static constexpr auto name = "sparkDateToUnixTimestamp";
static FunctionPtr create(ContextPtr) { return
std::make_shared<SparkFunctionDateToUnixTimestamp>(); }
- SparkFunctionDateToUnixTimestamp()
- {
- const DateLUTImpl * date_lut = &DateLUT::instance("UTC");
- UInt32 utc_timestamp = static_cast<UInt32>(0);
- LocalDateTime date_time(utc_timestamp, *date_lut);
- UInt32 unix_timestamp = date_time.to_time_t();
- delta_timestamp_from_utc = unix_timestamp - utc_timestamp;
- }
+ SparkFunctionDateToUnixTimestamp() {}
~SparkFunctionDateToUnixTimestamp() override = default;
String getName() const override { return name; }
bool isSuitableForShortCircuitArgumentsExecution(const
DB::DataTypesWithConstInfo &) const override { return true; }
@@ -82,16 +75,17 @@ public:
if (col->size() == 0)
return res;
+ const DateLUTImpl * local_date_lut = &DateLUT::instance();
for (size_t i = 0; i < input_rows; ++i)
{
const T t = col_src->getElement(i);
- data[i] = static_cast<UInt32>(t * DATE_SECONDS_PER_DAY) +
delta_timestamp_from_utc;
+ if constexpr (std::is_same_v<T, UInt16>)
+ data[i] = local_date_lut->fromDayNum(DayNum(t));
+ else
+ data[i] = local_date_lut->fromDayNum(ExtendedDayNum(t));
}
return res;
}
-
-private:
- UInt32 delta_timestamp_from_utc;
};
}
diff --git
a/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
b/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
index 9ada4b1ed6..d658426745 100644
---
a/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
+++
b/cpp-ch/local-engine/Parser/scalar_function_parser/CommonScalarFunctionParser.cpp
@@ -57,9 +57,8 @@ REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Not, not, not );
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Xor, xor, xor);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Cast, cast, CAST);
-REGISTER_COMMON_SCALAR_FUNCTION_PARSER(GetTimestamp, get_timestamp,
parseDateTimeInJodaSyntaxOrNull);
+REGISTER_COMMON_SCALAR_FUNCTION_PARSER(GetTimestamp, get_timestamp,
parseDateTime64InJodaSyntaxOrNull);
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Quarter, quarter, toQuarter);
-REGISTER_COMMON_SCALAR_FUNCTION_PARSER(ToUnixTimestamp, to_unix_timestamp,
parseDateTimeInJodaSyntaxOrNull);
// math functions
REGISTER_COMMON_SCALAR_FUNCTION_PARSER(Position, positive, identity);
diff --git
a/cpp-ch/local-engine/Parser/scalar_function_parser/unixTimestamp.cpp
b/cpp-ch/local-engine/Parser/scalar_function_parser/unixTimestamp.cpp
index 18286c6add..622237da97 100644
--- a/cpp-ch/local-engine/Parser/scalar_function_parser/unixTimestamp.cpp
+++ b/cpp-ch/local-engine/Parser/scalar_function_parser/unixTimestamp.cpp
@@ -33,23 +33,23 @@ namespace ErrorCodes
namespace local_engine
{
+template<typename Name>
class FunctionParserUnixTimestamp : public FunctionParser
{
public:
explicit FunctionParserUnixTimestamp(ParserContextPtr parser_context_) :
FunctionParser(parser_context_) {}
~FunctionParserUnixTimestamp() override = default;
- static constexpr auto name = "unix_timestamp";
-
- String getName() const override { return name; }
+ static constexpr auto name = Name::name;
+ String getName() const override { return Name::name; }
const ActionsDAG::Node * parse(
const substrait::Expression_ScalarFunction & substrait_func,
ActionsDAG & actions_dag) const override
{
/*
- spark function: unix_timestamp(expr, fmt)
- 1. If expr type is string, ch function =
parseDateTimeInJodaSyntaxOrNull(expr, format)
+ spark function: unix_timestamp(expr, fmt) / to_unix_timestamp(expr,
fmt)
+ 1. If expr type is string, ch function =
parseDateTime64InJodaSyntaxOrNull(expr, format)
2. If expr type is date/TIMESTAMP, ch function = toUnixTimestamp(expr,
format)
3. Otherwise, throw exception
*/
@@ -65,7 +65,7 @@ public:
const DB::ActionsDAG::Node * result_node = nullptr;
if (isString(expr_type))
- result_node = toFunctionNode(actions_dag,
"parseDateTimeInJodaSyntaxOrNull", {expr_arg, fmt_arg, time_zone_node});
+ result_node = toFunctionNode(actions_dag,
"parseDateTime64InJodaSyntaxOrNull", {expr_arg, fmt_arg, time_zone_node});
else if (isDateOrDate32(expr_type))
result_node = toFunctionNode(actions_dag,
"sparkDateToUnixTimestamp", {expr_arg, time_zone_node});
else if (isDateTime(expr_type) || isDateTime64(expr_type))
@@ -76,5 +76,19 @@ public:
return convertNodeTypeIfNeeded(substrait_func, result_node,
actions_dag);
}
};
-static FunctionParserRegister<FunctionParserUnixTimestamp>
register_unix_timestamp;
+
+struct FunctionNameUnixTimestamp
+{
+ static constexpr auto name = "unix_timestamp";
+};
+
+struct FunctionNameToUnixTimestamp
+{
+ static constexpr auto name = "to_unix_timestamp";
+};
+
+using FunctionParserForUnixTimestamp =
FunctionParserUnixTimestamp<FunctionNameUnixTimestamp>;
+using FunctionParseToUnixTimestamp =
FunctionParserUnixTimestamp<FunctionNameToUnixTimestamp>;
+static FunctionParserRegister<FunctionParserForUnixTimestamp>
register_unix_timestamp;
+static FunctionParserRegister<FunctionParseToUnixTimestamp>
register_to_unix_timestamp;
}
diff --git
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index b8171bc633..4bf153f8bf 100644
---
a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -282,7 +282,6 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("groupBy.as")
enableSuite[GlutenDateFunctionsSuite]
.exclude("function to_date")
- .exclude("from_unixtime")
.exclude("unix_timestamp")
.exclude("to_unix_timestamp")
.exclude("to_timestamp")
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 731f66c530..2eeec0b544 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -308,11 +308,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-40660: Switch to XORShiftRandom to distribute elements")
enableSuite[GlutenDateFunctionsSuite]
.exclude("function to_date")
- .exclude("from_unixtime")
.exclude("unix_timestamp")
.exclude("to_unix_timestamp")
- .exclude("to_timestamp")
- .excludeGlutenTest("to_timestamp")
.exclude("to_timestamp with microseconds precision")
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
.exclude("SPARK-30766: date_trunc of old timestamps to hours and days")
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 86d235dc88..b13bb2abc9 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -306,11 +306,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-40660: Switch to XORShiftRandom to distribute elements")
enableSuite[GlutenDateFunctionsSuite]
.exclude("function to_date")
- .exclude("from_unixtime")
.exclude("unix_timestamp")
.exclude("to_unix_timestamp")
- .exclude("to_timestamp")
- .excludeGlutenTest("to_timestamp")
.exclude("to_timestamp with microseconds precision")
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
.exclude("SPARK-30766: date_trunc of old timestamps to hours and days")
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 3fe2764c74..9836cb27f5 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -306,11 +306,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("SPARK-40660: Switch to XORShiftRandom to distribute elements")
enableSuite[GlutenDateFunctionsSuite]
.exclude("function to_date")
- .exclude("from_unixtime")
.exclude("unix_timestamp")
.exclude("to_unix_timestamp")
- .exclude("to_timestamp")
- .excludeGlutenTest("to_timestamp")
.exclude("to_timestamp with microseconds precision")
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
.exclude("SPARK-30766: date_trunc of old timestamps to hours and days")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]