This is an automated email from the ASF dual-hosted git repository.
felixybw pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 8399ac7676 Use Substrait timestamp_tz for Spark TimestampType to
preserve timezone-aware semantics (#11074)
8399ac7676 is described below
commit 8399ac7676555bf9918d9a304c882aa88f188525
Author: Joey <[email protected]>
AuthorDate: Sun Nov 16 07:52:10 2025 +0800
Use Substrait timestamp_tz for Spark TimestampType to preserve
timezone-aware semantics (#11074)
Spark’s TimestampType is timezone-aware: it internally stores timestamps in
UTC (by converting input values to UTC based on the session time zone or just
read UTC timestamp from parquet file) and represents an absolute point in time.
This semantics aligns with Substrait’s timestamp_tz type, which also denotes a
timezone-aware timestamp that can be unambiguously mapped to a moment on the
timeline.
To maintain semantic consistency between Spark and Substrait, this PR maps
Spark’s TimestampType to Substrait’s timestamp_tz.
---
cpp-ch/local-engine/Builder/SerializedPlanBuilder.cpp | 6 +++---
cpp-ch/local-engine/Parser/ExpressionParser.cpp | 4 ++--
cpp-ch/local-engine/Parser/TypeParser.cpp | 4 ++--
cpp/velox/substrait/SubstraitParser.cc | 4 ++--
cpp/velox/substrait/SubstraitToVeloxExpr.cc | 2 +-
cpp/velox/substrait/VeloxToSubstraitExpr.cc | 4 ++--
cpp/velox/substrait/VeloxToSubstraitType.cc | 6 +++---
.../apache/gluten/substrait/expression/TimestampLiteralNode.java | 2 +-
.../java/org/apache/gluten/substrait/type/TimestampTypeNode.java | 4 ++--
.../main/scala/org/apache/gluten/expression/ConverterUtils.scala | 4 ++--
10 files changed, 20 insertions(+), 20 deletions(-)
diff --git a/cpp-ch/local-engine/Builder/SerializedPlanBuilder.cpp
b/cpp-ch/local-engine/Builder/SerializedPlanBuilder.cpp
index f90bd12176..3d80670087 100644
--- a/cpp-ch/local-engine/Builder/SerializedPlanBuilder.cpp
+++ b/cpp-ch/local-engine/Builder/SerializedPlanBuilder.cpp
@@ -107,7 +107,7 @@ SchemaPtr SerializedSchemaBuilder::build()
else if (type == "Timestamp")
{
auto * t = type_struct->mutable_types()->Add();
- t->mutable_timestamp()->set_nullability(
+ t->mutable_timestamp_tz()->set_nullability(
this->nullability_map[name] ?
substrait::Type_Nullability_NULLABILITY_NULLABLE
:
substrait::Type_Nullability_NULLABILITY_REQUIRED);
}
@@ -256,7 +256,7 @@ std::shared_ptr<substrait::Type>
SerializedPlanBuilder::buildType(const DB::Data
const auto * ch_type_datetime64 =
checkAndGetDataType<DataTypeDateTime64>(ch_type_without_nullable.get());
if (ch_type_datetime64->getScale() != 6)
throw Exception(ErrorCodes::UNKNOWN_TYPE, "Spark doesn't support
converting from {}", ch_type->getName());
- res->mutable_timestamp()->set_nullability(type_nullability);
+ res->mutable_timestamp_tz()->set_nullability(type_nullability);
}
else if (which.isDate32())
res->mutable_date()->set_nullability(type_nullability);
@@ -365,7 +365,7 @@ substrait::Expression * literalTimestamp(int64_t value)
{
substrait::Expression * rel = new substrait::Expression();
auto * literal = rel->mutable_literal();
- literal->set_timestamp(value);
+ literal->set_timestamp_tz(value);
return rel;
}
diff --git a/cpp-ch/local-engine/Parser/ExpressionParser.cpp
b/cpp-ch/local-engine/Parser/ExpressionParser.cpp
index 39aabcf2d2..12392a9dfe 100644
--- a/cpp-ch/local-engine/Parser/ExpressionParser.cpp
+++ b/cpp-ch/local-engine/Parser/ExpressionParser.cpp
@@ -112,9 +112,9 @@ std::pair<DB::DataTypePtr, DB::Field>
LiteralParser::parse(const substrait::Expr
field = literal.date();
break;
}
- case substrait::Expression_Literal::kTimestamp: {
+ case substrait::Expression_Literal::kTimestampTz: {
type = std::make_shared<DB::DataTypeDateTime64>(6);
- field = DecimalField<DB::DateTime64>(literal.timestamp(), 6);
+ field = DecimalField<DB::DateTime64>(literal.timestamp_tz(), 6);
break;
}
case substrait::Expression_Literal::kDecimal: {
diff --git a/cpp-ch/local-engine/Parser/TypeParser.cpp
b/cpp-ch/local-engine/Parser/TypeParser.cpp
index 49e76fdb31..bdb8c52e9c 100644
--- a/cpp-ch/local-engine/Parser/TypeParser.cpp
+++ b/cpp-ch/local-engine/Parser/TypeParser.cpp
@@ -153,10 +153,10 @@ DB::DataTypePtr TypeParser::parseType(const
substrait::Type & substrait_type, st
ch_type = std::make_shared<DB::DataTypeFloat64>();
ch_type = tryWrapNullable(substrait_type.fp64().nullability(),
ch_type);
}
- else if (substrait_type.has_timestamp())
+ else if (substrait_type.has_timestamp_tz())
{
ch_type = std::make_shared<DB::DataTypeDateTime64>(6);
- ch_type = tryWrapNullable(substrait_type.timestamp().nullability(),
ch_type);
+ ch_type = tryWrapNullable(substrait_type.timestamp_tz().nullability(),
ch_type);
}
else if (substrait_type.has_date())
{
diff --git a/cpp/velox/substrait/SubstraitParser.cc
b/cpp/velox/substrait/SubstraitParser.cc
index ed4ad36c68..2bc1dd71c3 100644
--- a/cpp/velox/substrait/SubstraitParser.cc
+++ b/cpp/velox/substrait/SubstraitParser.cc
@@ -76,7 +76,7 @@ TypePtr SubstraitParser::parseType(const ::substrait::Type&
substraitType, bool
return UNKNOWN();
case ::substrait::Type::KindCase::kDate:
return DATE();
- case ::substrait::Type::KindCase::kTimestamp:
+ case ::substrait::Type::KindCase::kTimestampTz:
return TIMESTAMP();
case ::substrait::Type::KindCase::kDecimal: {
auto precision = substraitType.decimal().precision();
@@ -368,7 +368,7 @@ bool SubstraitParser::getLiteralValue(const
::substrait::Expression::Literal& li
template <>
Timestamp SubstraitParser::getLiteralValue(const
::substrait::Expression::Literal& literal) {
- return Timestamp::fromMicros(literal.timestamp());
+ return Timestamp::fromMicros(literal.timestamp_tz());
}
template <>
diff --git a/cpp/velox/substrait/SubstraitToVeloxExpr.cc
b/cpp/velox/substrait/SubstraitToVeloxExpr.cc
index fdee942eaa..25e78de709 100755
--- a/cpp/velox/substrait/SubstraitToVeloxExpr.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxExpr.cc
@@ -131,7 +131,7 @@ TypePtr getScalarType(const
::substrait::Expression::Literal& literal) {
}
case ::substrait::Expression_Literal::LiteralTypeCase::kDate:
return DATE();
- case ::substrait::Expression_Literal::LiteralTypeCase::kTimestamp:
+ case ::substrait::Expression_Literal::LiteralTypeCase::kTimestampTz:
return TIMESTAMP();
case ::substrait::Expression_Literal::LiteralTypeCase::kString:
return VARCHAR();
diff --git a/cpp/velox/substrait/VeloxToSubstraitExpr.cc
b/cpp/velox/substrait/VeloxToSubstraitExpr.cc
index f17fda06a2..66a2b4ff80 100644
--- a/cpp/velox/substrait/VeloxToSubstraitExpr.cc
+++ b/cpp/velox/substrait/VeloxToSubstraitExpr.cc
@@ -136,7 +136,7 @@ const ::substrait::Expression_Literal&
toSubstraitNotNullLiteral(
case velox::TypeKind::TIMESTAMP: {
auto vTimeStamp = variantValue.value<TypeKind::TIMESTAMP>();
auto micros = vTimeStamp.getSeconds() * 1000000 + vTimeStamp.getNanos()
/ 1000;
- literalExpr->set_timestamp(micros);
+ literalExpr->set_timestamp_tz(micros);
break;
}
case velox::TypeKind::VARCHAR: {
@@ -250,7 +250,7 @@ const ::substrait::Expression_Literal&
toSubstraitNotNullLiteral<TypeKind::TIMES
::substrait::Expression_Literal* literalExpr =
google::protobuf::Arena::CreateMessage<::substrait::Expression_Literal>(&arena);
auto micros = value.getSeconds() * 1000000 + value.getNanos() / 1000;
- literalExpr->set_timestamp(micros);
+ literalExpr->set_timestamp_tz(micros);
literalExpr->set_nullable(false);
return *literalExpr;
}
diff --git a/cpp/velox/substrait/VeloxToSubstraitType.cc
b/cpp/velox/substrait/VeloxToSubstraitType.cc
index b08fe83db6..b6bcf3bcc9 100644
--- a/cpp/velox/substrait/VeloxToSubstraitType.cc
+++ b/cpp/velox/substrait/VeloxToSubstraitType.cc
@@ -88,9 +88,9 @@ const ::substrait::Type&
VeloxToSubstraitTypeConvertor::toSubstraitType(
break;
}
case velox::TypeKind::TIMESTAMP: {
- auto substraitTimestamp =
google::protobuf::Arena::CreateMessage<::substrait::Type_Timestamp>(&arena);
-
substraitTimestamp->set_nullability(::substrait::Type_Nullability_NULLABILITY_NULLABLE);
- substraitType->set_allocated_timestamp(substraitTimestamp);
+ auto substraitTimestampTZ =
google::protobuf::Arena::CreateMessage<::substrait::Type_TimestampTZ>(&arena);
+
substraitTimestampTZ->set_nullability(::substrait::Type_Nullability_NULLABILITY_NULLABLE);
+ substraitType->set_allocated_timestamp_tz(substraitTimestampTZ);
break;
}
case velox::TypeKind::ARRAY: {
diff --git
a/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/TimestampLiteralNode.java
b/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/TimestampLiteralNode.java
index ec253edbc4..15e7254e17 100644
---
a/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/TimestampLiteralNode.java
+++
b/gluten-substrait/src/main/java/org/apache/gluten/substrait/expression/TimestampLiteralNode.java
@@ -32,6 +32,6 @@ public class TimestampLiteralNode extends
LiteralNodeWithValue<Long> {
@Override
protected void updateLiteralBuilder(Builder literalBuilder, Long value) {
- literalBuilder.setTimestamp(value);
+ literalBuilder.setTimestampTz(value);
}
}
diff --git
a/gluten-substrait/src/main/java/org/apache/gluten/substrait/type/TimestampTypeNode.java
b/gluten-substrait/src/main/java/org/apache/gluten/substrait/type/TimestampTypeNode.java
index f25b5e900e..472df5da97 100644
---
a/gluten-substrait/src/main/java/org/apache/gluten/substrait/type/TimestampTypeNode.java
+++
b/gluten-substrait/src/main/java/org/apache/gluten/substrait/type/TimestampTypeNode.java
@@ -26,7 +26,7 @@ public class TimestampTypeNode extends TypeNode {
@Override
public Type toProtobuf() {
- Type.Timestamp.Builder timestampBuilder = Type.Timestamp.newBuilder();
+ Type.TimestampTZ.Builder timestampBuilder = Type.TimestampTZ.newBuilder();
if (nullable) {
timestampBuilder.setNullability(Type.Nullability.NULLABILITY_NULLABLE);
} else {
@@ -34,7 +34,7 @@ public class TimestampTypeNode extends TypeNode {
}
Type.Builder builder = Type.newBuilder();
- builder.setTimestamp(timestampBuilder.build());
+ builder.setTimestampTz(timestampBuilder.build());
return builder.build();
}
}
diff --git
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ConverterUtils.scala
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ConverterUtils.scala
index 1e217eb564..6db1f188d8 100644
---
a/gluten-substrait/src/main/scala/org/apache/gluten/expression/ConverterUtils.scala
+++
b/gluten-substrait/src/main/scala/org/apache/gluten/expression/ConverterUtils.scala
@@ -160,8 +160,8 @@ object ConverterUtils extends Logging {
(StringType, isNullable(substraitType.getString.getNullability))
case Type.KindCase.BINARY =>
(BinaryType, isNullable(substraitType.getBinary.getNullability))
- case Type.KindCase.TIMESTAMP =>
- (TimestampType, isNullable(substraitType.getTimestamp.getNullability))
+ case Type.KindCase.TIMESTAMP_TZ =>
+ (TimestampType,
isNullable(substraitType.getTimestampTz.getNullability))
case Type.KindCase.DATE =>
(DateType, isNullable(substraitType.getDate.getNullability))
case Type.KindCase.DECIMAL =>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]