This is an automated email from the ASF dual-hosted git repository.
wgtmac pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git
The following commit(s) were added to refs/heads/main by this push:
new 35dde9a6 feat: Add v3 timestamp nanosecond primitive types (#653)
35dde9a6 is described below
commit 35dde9a66e0c50f1ac19ae7e9ab4cf09d574e3f0
Author: Junwang Zhao <[email protected]>
AuthorDate: Sat May 23 11:16:03 2026 +0800
feat: Add v3 timestamp nanosecond primitive types (#653)
---
src/iceberg/avro/avro_data_util.cc | 13 ++
src/iceberg/avro/avro_direct_decoder.cc | 14 ++
src/iceberg/avro/avro_schema_util.cc | 32 ++++
src/iceberg/avro/avro_schema_util_internal.h | 2 +
src/iceberg/expression/json_serde.cc | 25 +++
src/iceberg/expression/literal.cc | 108 +++++++++--
src/iceberg/expression/literal.h | 4 +
src/iceberg/expression/predicate.cc | 6 +-
src/iceberg/json_serde.cc | 8 +
src/iceberg/manifest/manifest_adapter.cc | 2 +
src/iceberg/parquet/parquet_schema_util.cc | 20 ++
src/iceberg/row/partition_values.cc | 2 +
src/iceberg/row/struct_like.cc | 6 +
src/iceberg/schema_internal.cc | 19 +-
src/iceberg/table_metadata.h | 5 +-
src/iceberg/test/arrow_test.cc | 15 +-
src/iceberg/test/avro_data_test.cc | 48 +++++
src/iceberg/test/avro_schema_test.cc | 44 +++++
src/iceberg/test/bucket_util_test.cc | 74 ++++++++
src/iceberg/test/literal_test.cc | 86 ++++++++-
src/iceberg/test/schema_json_test.cc | 2 +
src/iceberg/test/schema_test.cc | 27 +++
src/iceberg/test/transform_util_test.cc | 86 +++++++++
src/iceberg/test/type_test.cc | 16 +-
src/iceberg/test/visit_type_test.cc | 16 +-
src/iceberg/transform.cc | 13 ++
src/iceberg/transform_function.cc | 10 +
src/iceberg/type.cc | 22 +++
src/iceberg/type.h | 42 +++++
src/iceberg/type_fwd.h | 5 +
src/iceberg/util/bucket_util.cc | 15 ++
src/iceberg/util/conversions.cc | 6 +-
src/iceberg/util/projection_util_internal.h | 8 +
src/iceberg/util/struct_like_set.cc | 2 +
src/iceberg/util/temporal_util.cc | 96 ++++++++++
src/iceberg/util/temporal_util.h | 8 +
src/iceberg/util/transform_util.cc | 261 +++++++++++++++++++++------
src/iceberg/util/transform_util.h | 58 ++++++
src/iceberg/util/visitor_generate.h | 2 +
39 files changed, 1140 insertions(+), 88 deletions(-)
diff --git a/src/iceberg/avro/avro_data_util.cc
b/src/iceberg/avro/avro_data_util.cc
index 5ac565f6..17bbb394 100644
--- a/src/iceberg/avro/avro_data_util.cc
+++ b/src/iceberg/avro/avro_data_util.cc
@@ -431,6 +431,19 @@ Status AppendPrimitiveValueToBuilder(const
::avro::NodePtr& avro_node,
return {};
}
+ case TypeId::kTimestampNs:
+ case TypeId::kTimestampTzNs: {
+ if (avro_node->type() != ::avro::AVRO_LONG ||
+ avro_node->logicalType().type() !=
::avro::LogicalType::TIMESTAMP_NANOS) {
+ return InvalidArgument(
+ "Expected Avro long with TIMESTAMP_NANOS for timestamp field, got:
{}",
+ ToString(avro_node));
+ }
+ auto* builder =
internal::checked_cast<::arrow::TimestampBuilder*>(array_builder);
+
ICEBERG_ARROW_RETURN_NOT_OK(builder->Append(avro_datum.value<int64_t>()));
+ return {};
+ }
+
default:
return InvalidArgument("Unsupported primitive type {} to append avro
node {}",
projected_field.type()->ToString(),
ToString(avro_node));
diff --git a/src/iceberg/avro/avro_direct_decoder.cc
b/src/iceberg/avro/avro_direct_decoder.cc
index 335b6064..cb4e869c 100644
--- a/src/iceberg/avro/avro_direct_decoder.cc
+++ b/src/iceberg/avro/avro_direct_decoder.cc
@@ -562,6 +562,20 @@ Status DecodePrimitiveValueToBuilder(const
::avro::NodePtr& avro_node,
return {};
}
+ case TypeId::kTimestampNs:
+ case TypeId::kTimestampTzNs: {
+ if (avro_node->type() != ::avro::AVRO_LONG ||
+ avro_node->logicalType().type() !=
::avro::LogicalType::TIMESTAMP_NANOS) {
+ return InvalidArgument(
+ "Expected Avro long with TIMESTAMP_NANOS for timestamp field, got:
{}",
+ ToString(avro_node));
+ }
+ auto* builder =
internal::checked_cast<::arrow::TimestampBuilder*>(array_builder);
+ int64_t value = decoder.decodeLong();
+ ICEBERG_ARROW_RETURN_NOT_OK(builder->Append(value));
+ return {};
+ }
+
default:
return InvalidArgument("Unsupported primitive type {} to decode from
avro node {}",
projected_field.type()->ToString(),
ToString(avro_node));
diff --git a/src/iceberg/avro/avro_schema_util.cc
b/src/iceberg/avro/avro_schema_util.cc
index 75db6d8d..3d61d283 100644
--- a/src/iceberg/avro/avro_schema_util.cc
+++ b/src/iceberg/avro/avro_schema_util.cc
@@ -194,6 +194,24 @@ Status ToAvroNodeVisitor::Visit(const TimestampTzType&
type, ::avro::NodePtr* no
return {};
}
+Status ToAvroNodeVisitor::Visit(const TimestampNsType& type, ::avro::NodePtr*
node) {
+ *node = std::make_shared<::avro::NodePrimitive>(::avro::AVRO_LONG);
+
(*node)->setLogicalType(::avro::LogicalType{::avro::LogicalType::TIMESTAMP_NANOS});
+ ::avro::CustomAttributes attributes;
+ attributes.addAttribute(std::string(kAdjustToUtcProp), "false",
/*addQuotes=*/false);
+ (*node)->addCustomAttributesForField(attributes);
+ return {};
+}
+
+Status ToAvroNodeVisitor::Visit(const TimestampTzNsType& type,
::avro::NodePtr* node) {
+ *node = std::make_shared<::avro::NodePrimitive>(::avro::AVRO_LONG);
+
(*node)->setLogicalType(::avro::LogicalType{::avro::LogicalType::TIMESTAMP_NANOS});
+ ::avro::CustomAttributes attributes;
+ attributes.addAttribute(std::string(kAdjustToUtcProp), "true",
/*addQuotes=*/false);
+ (*node)->addCustomAttributesForField(attributes);
+ return {};
+}
+
Status ToAvroNodeVisitor::Visit(const StringType& type, ::avro::NodePtr* node)
{
*node = std::make_shared<::avro::NodePrimitive>(::avro::AVRO_STRING);
return {};
@@ -548,6 +566,20 @@ Status ValidateAvroSchemaEvolution(const Type&
expected_type,
return {};
}
break;
+ case TypeId::kTimestampNs:
+ if (avro_node->type() == ::avro::AVRO_LONG &&
+ HasLogicalType(avro_node, ::avro::LogicalType::TIMESTAMP_NANOS) &&
+ GetAdjustToUtc(avro_node).value_or("false") == "false") {
+ return {};
+ }
+ break;
+ case TypeId::kTimestampTzNs:
+ if (avro_node->type() == ::avro::AVRO_LONG &&
+ HasLogicalType(avro_node, ::avro::LogicalType::TIMESTAMP_NANOS) &&
+ GetAdjustToUtc(avro_node).value_or("false") == "true") {
+ return {};
+ }
+ break;
case TypeId::kString:
if (avro_node->type() == ::avro::AVRO_STRING) {
return {};
diff --git a/src/iceberg/avro/avro_schema_util_internal.h
b/src/iceberg/avro/avro_schema_util_internal.h
index bdfbf135..e3b7a7ff 100644
--- a/src/iceberg/avro/avro_schema_util_internal.h
+++ b/src/iceberg/avro/avro_schema_util_internal.h
@@ -52,6 +52,8 @@ class ToAvroNodeVisitor {
Status Visit(const TimeType& type, ::avro::NodePtr* node);
Status Visit(const TimestampType& type, ::avro::NodePtr* node);
Status Visit(const TimestampTzType& type, ::avro::NodePtr* node);
+ Status Visit(const TimestampNsType& type, ::avro::NodePtr* node);
+ Status Visit(const TimestampTzNsType& type, ::avro::NodePtr* node);
Status Visit(const StringType& type, ::avro::NodePtr* node);
Status Visit(const UuidType& type, ::avro::NodePtr* node);
Status Visit(const FixedType& type, ::avro::NodePtr* node);
diff --git a/src/iceberg/expression/json_serde.cc
b/src/iceberg/expression/json_serde.cc
index 38e7a8e2..065f41cf 100644
--- a/src/iceberg/expression/json_serde.cc
+++ b/src/iceberg/expression/json_serde.cc
@@ -272,6 +272,11 @@ Result<nlohmann::json> ToJson(const Literal& literal) {
case TypeId::kTimestampTz:
return nlohmann::json(
TransformUtil::HumanTimestampWithZone(std::get<int64_t>(value)));
+ case TypeId::kTimestampNs:
+ return
nlohmann::json(TransformUtil::HumanTimestampNs(std::get<int64_t>(value)));
+ case TypeId::kTimestampTzNs:
+ return nlohmann::json(
+ TransformUtil::HumanTimestampNsWithZone(std::get<int64_t>(value)));
case TypeId::kFloat:
return nlohmann::json(std::get<float>(value));
case TypeId::kDouble:
@@ -390,6 +395,26 @@ Result<Literal> LiteralFromJson(const nlohmann::json&
json, const Type* type) {
return Literal::TimestampTz(micros);
}
+ case TypeId::kTimestampNs: {
+ if (!json.is_string()) [[unlikely]] {
+ return JsonParseError("Cannot parse {} as a timestamp_ns value",
+ SafeDumpJson(json));
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto nanos,
+
TransformUtil::ParseTimestampNs(json.get<std::string>()));
+ return Literal::TimestampNs(nanos);
+ }
+
+ case TypeId::kTimestampTzNs: {
+ if (!json.is_string()) [[unlikely]] {
+ return JsonParseError("Cannot parse {} as a timestamptz_ns value",
+ SafeDumpJson(json));
+ }
+ ICEBERG_ASSIGN_OR_RAISE(
+ auto nanos,
TransformUtil::ParseTimestampNsWithZone(json.get<std::string>()));
+ return Literal::TimestampTzNs(nanos);
+ }
+
case TypeId::kUuid: {
if (!json.is_string()) [[unlikely]] {
return JsonParseError("Cannot parse {} as a uuid value",
SafeDumpJson(json));
diff --git a/src/iceberg/expression/literal.cc
b/src/iceberg/expression/literal.cc
index 9b8060a1..e14d3def 100644
--- a/src/iceberg/expression/literal.cc
+++ b/src/iceberg/expression/literal.cc
@@ -150,6 +150,10 @@ Result<Literal> LiteralCaster::CastFromLong(
return Literal::Timestamp(long_val);
case TypeId::kTimestampTz:
return Literal::TimestampTz(long_val);
+ case TypeId::kTimestampNs:
+ return Literal::TimestampNs(long_val);
+ case TypeId::kTimestampTzNs:
+ return Literal::TimestampTzNs(long_val);
default:
return NotSupported("Cast from Long to {} is not supported",
target_type->ToString());
@@ -215,6 +219,15 @@ Result<Literal> LiteralCaster::CastFromString(
TransformUtil::ParseTimestampWithZone(str_val));
return Literal::TimestampTz(micros);
}
+ case TypeId::kTimestampNs: {
+ ICEBERG_ASSIGN_OR_RAISE(auto nanos,
TransformUtil::ParseTimestampNs(str_val));
+ return Literal::TimestampNs(nanos);
+ }
+ case TypeId::kTimestampTzNs: {
+ ICEBERG_ASSIGN_OR_RAISE(auto nanos,
+
TransformUtil::ParseTimestampNsWithZone(str_val));
+ return Literal::TimestampTzNs(nanos);
+ }
case TypeId::kBinary: {
ICEBERG_ASSIGN_OR_RAISE(auto bytes,
StringUtils::HexStringToBytes(str_val));
return Literal::Binary(std::move(bytes));
@@ -250,14 +263,37 @@ Result<Literal> LiteralCaster::CastFromString(
Result<Literal> LiteralCaster::CastFromTimestamp(
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type)
{
auto timestamp_val = std::get<int64_t>(literal.value_);
+ const auto& source_timestamp =
+ internal::checked_cast<const TimestampBase&>(*literal.type());
+ const bool source_is_nanos = source_timestamp.time_unit() ==
TimeUnit::kNanosecond;
switch (target_type->type_id()) {
case TypeId::kDate: {
ICEBERG_ASSIGN_OR_RAISE(auto days, TemporalUtils::ExtractDay(literal));
return Literal::Date(std::get<int32_t>(days.value()));
}
+ case TypeId::kTimestamp:
+ return source_is_nanos
+ ?
Literal::Timestamp(TemporalUtils::NanosToMicros(timestamp_val))
+ : Literal::Timestamp(timestamp_val);
case TypeId::kTimestampTz:
- return Literal::TimestampTz(timestamp_val);
+ return source_is_nanos
+ ?
Literal::TimestampTz(TemporalUtils::NanosToMicros(timestamp_val))
+ : Literal::TimestampTz(timestamp_val);
+ case TypeId::kTimestampNs: {
+ if (source_is_nanos) {
+ return Literal::TimestampNs(timestamp_val);
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto nanos,
TemporalUtils::MicrosToNanos(timestamp_val));
+ return Literal::TimestampNs(nanos);
+ }
+ case TypeId::kTimestampTzNs: {
+ if (source_is_nanos) {
+ return Literal::TimestampTzNs(timestamp_val);
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto nanos,
TemporalUtils::MicrosToNanos(timestamp_val));
+ return Literal::TimestampTzNs(nanos);
+ }
default:
return NotSupported("Cast from Timestamp to {} is not supported",
target_type->ToString());
@@ -266,15 +302,38 @@ Result<Literal> LiteralCaster::CastFromTimestamp(
Result<Literal> LiteralCaster::CastFromTimestampTz(
const Literal& literal, const std::shared_ptr<PrimitiveType>& target_type)
{
- auto micros = std::get<int64_t>(literal.value_);
+ auto timestamp_val = std::get<int64_t>(literal.value_);
+ const auto& source_timestamp =
+ internal::checked_cast<const TimestampBase&>(*literal.type());
+ const bool source_is_nanos = source_timestamp.time_unit() ==
TimeUnit::kNanosecond;
switch (target_type->type_id()) {
case TypeId::kDate: {
ICEBERG_ASSIGN_OR_RAISE(auto days, TemporalUtils::ExtractDay(literal));
return Literal::Date(std::get<int32_t>(days.value()));
}
+ case TypeId::kTimestampTz:
+ return source_is_nanos
+ ?
Literal::TimestampTz(TemporalUtils::NanosToMicros(timestamp_val))
+ : Literal::TimestampTz(timestamp_val);
case TypeId::kTimestamp:
- return Literal::Timestamp(micros);
+ return source_is_nanos
+ ?
Literal::Timestamp(TemporalUtils::NanosToMicros(timestamp_val))
+ : Literal::Timestamp(timestamp_val);
+ case TypeId::kTimestampNs: {
+ if (source_is_nanos) {
+ return Literal::TimestampNs(timestamp_val);
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto nanos,
TemporalUtils::MicrosToNanos(timestamp_val));
+ return Literal::TimestampNs(nanos);
+ }
+ case TypeId::kTimestampTzNs: {
+ if (source_is_nanos) {
+ return Literal::TimestampTzNs(timestamp_val);
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto nanos,
TemporalUtils::MicrosToNanos(timestamp_val));
+ return Literal::TimestampTzNs(nanos);
+ }
default:
return NotSupported("Cast from TimestampTz to {} is not supported",
target_type->ToString());
@@ -329,6 +388,10 @@ Literal Literal::Timestamp(int64_t value) { return
{Value{value}, timestamp()};
Literal Literal::TimestampTz(int64_t value) { return {Value{value},
timestamp_tz()}; }
+Literal Literal::TimestampNs(int64_t value) { return {Value{value},
timestamp_ns()}; }
+
+Literal Literal::TimestampTzNs(int64_t value) { return {Value{value},
timestamptz_ns()}; }
+
Literal Literal::Float(float value) { return {Value{value}, float32()}; }
Literal Literal::Double(double value) { return {Value{value}, float64()}; }
@@ -388,18 +451,19 @@ std::strong_ordering CompareFloat(T lhs, T rhs) {
namespace {
bool Comparable(TypeId lhs, TypeId rhs) {
- switch (lhs) {
- case TypeId::kInt:
- case TypeId::kDate:
- return rhs == TypeId::kInt || rhs == TypeId::kDate;
- case TypeId::kLong:
- case TypeId::kTimestamp:
- case TypeId::kTimestampTz:
- return rhs == TypeId::kLong || rhs == TypeId::kTimestamp ||
- rhs == TypeId::kTimestampTz;
- default:
- return lhs == rhs;
+ if (lhs == rhs) {
+ return true;
+ }
+ if ((lhs == TypeId::kInt || lhs == TypeId::kDate) &&
+ (rhs == TypeId::kInt || rhs == TypeId::kDate)) {
+ return true;
}
+ if ((lhs == TypeId::kTimestamp || lhs == TypeId::kTimestampTz) &&
+ (rhs == TypeId::kTimestamp || rhs == TypeId::kTimestampTz)) {
+ return true;
+ }
+ return (lhs == TypeId::kTimestampNs || lhs == TypeId::kTimestampTzNs) &&
+ (rhs == TypeId::kTimestampNs || rhs == TypeId::kTimestampTzNs);
}
} // namespace
@@ -408,8 +472,8 @@ bool Literal::operator==(const Literal& other) const {
return (*this <=> other)
// Three-way comparison operator
std::partial_ordering Literal::operator<=>(const Literal& other) const {
- // If types are different, comparison is unordered
- // (Int & Date) (Timestamp & Long) were excluded from this check to allow
comparison
+ // Allow date/int comparisons for transformed date bounds. Otherwise,
comparisons
+ // are limited to identical logical values.
if (!Comparable(type_->type_id(), other.type_->type_id())) {
return std::partial_ordering::unordered;
}
@@ -439,7 +503,9 @@ std::partial_ordering Literal::operator<=>(const Literal&
other) const {
case TypeId::kLong:
case TypeId::kTime:
case TypeId::kTimestamp:
- case TypeId::kTimestampTz: {
+ case TypeId::kTimestampTz:
+ case TypeId::kTimestampNs:
+ case TypeId::kTimestampTzNs: {
auto this_val = std::get<int64_t>(value_);
auto other_val = std::get<int64_t>(other.value_);
return this_val <=> other_val;
@@ -548,7 +614,9 @@ std::string Literal::ToString() const {
}
case TypeId::kTime:
case TypeId::kTimestamp:
- case TypeId::kTimestampTz: {
+ case TypeId::kTimestampTz:
+ case TypeId::kTimestampNs:
+ case TypeId::kTimestampTzNs: {
return std::to_string(std::get<int64_t>(value_));
}
case TypeId::kDate: {
@@ -613,6 +681,10 @@ Result<Literal> LiteralCaster::CastTo(const Literal&
literal,
return CastFromTimestamp(literal, target_type);
case TypeId::kTimestampTz:
return CastFromTimestampTz(literal, target_type);
+ case TypeId::kTimestampNs:
+ return CastFromTimestamp(literal, target_type);
+ case TypeId::kTimestampTzNs:
+ return CastFromTimestampTz(literal, target_type);
default:
break;
}
diff --git a/src/iceberg/expression/literal.h b/src/iceberg/expression/literal.h
index b07aaa5e..dbcabb52 100644
--- a/src/iceberg/expression/literal.h
+++ b/src/iceberg/expression/literal.h
@@ -73,6 +73,8 @@ class ICEBERG_EXPORT Literal : public util::Formattable {
static Literal Time(int64_t value);
static Literal Timestamp(int64_t value);
static Literal TimestampTz(int64_t value);
+ static Literal TimestampNs(int64_t value);
+ static Literal TimestampTzNs(int64_t value);
static Literal Float(float value);
static Literal Double(double value);
static Literal String(std::string value);
@@ -199,6 +201,8 @@ DEFINE_LITERAL_TRAIT(kLong, int64_t)
DEFINE_LITERAL_TRAIT(kTime, int64_t)
DEFINE_LITERAL_TRAIT(kTimestamp, int64_t)
DEFINE_LITERAL_TRAIT(kTimestampTz, int64_t)
+DEFINE_LITERAL_TRAIT(kTimestampNs, int64_t)
+DEFINE_LITERAL_TRAIT(kTimestampTzNs, int64_t)
DEFINE_LITERAL_TRAIT(kFloat, float)
DEFINE_LITERAL_TRAIT(kDouble, double)
DEFINE_LITERAL_TRAIT(kDecimal, Decimal)
diff --git a/src/iceberg/expression/predicate.cc
b/src/iceberg/expression/predicate.cc
index 3c92c2fc..307c3c60 100644
--- a/src/iceberg/expression/predicate.cc
+++ b/src/iceberg/expression/predicate.cc
@@ -494,10 +494,10 @@ bool BoundLiteralPredicate::Equals(const Expression&
other) const {
}
}
- // TODO(gangwu): add TypeId::kTimestampNano
static const std::unordered_set<TypeId> kIntegralTypes = {
- TypeId::kInt, TypeId::kLong, TypeId::kDate,
- TypeId::kTime, TypeId::kTimestamp, TypeId::kTimestampTz};
+ TypeId::kInt, TypeId::kLong, TypeId::kDate,
+ TypeId::kTime, TypeId::kTimestamp, TypeId::kTimestampTz,
+ TypeId::kTimestampNs, TypeId::kTimestampTzNs};
if (kIntegralTypes.contains(term_->type()->type_id()) &&
term_->Equals(*other_pred->term())) {
diff --git a/src/iceberg/json_serde.cc b/src/iceberg/json_serde.cc
index 2d8c2225..3944e510 100644
--- a/src/iceberg/json_serde.cc
+++ b/src/iceberg/json_serde.cc
@@ -363,6 +363,10 @@ nlohmann::json ToJson(const Type& type) {
return "timestamp";
case TypeId::kTimestampTz:
return "timestamptz";
+ case TypeId::kTimestampNs:
+ return "timestamp_ns";
+ case TypeId::kTimestampTzNs:
+ return "timestamptz_ns";
case TypeId::kString:
return "string";
case TypeId::kBinary:
@@ -488,6 +492,10 @@ Result<std::unique_ptr<Type>> TypeFromJson(const
nlohmann::json& json) {
return std::make_unique<TimestampType>();
} else if (type_str == "timestamptz") {
return std::make_unique<TimestampTzType>();
+ } else if (type_str == "timestamp_ns") {
+ return std::make_unique<TimestampNsType>();
+ } else if (type_str == "timestamptz_ns") {
+ return std::make_unique<TimestampTzNsType>();
} else if (type_str == "string") {
return std::make_unique<StringType>();
} else if (type_str == "binary") {
diff --git a/src/iceberg/manifest/manifest_adapter.cc
b/src/iceberg/manifest/manifest_adapter.cc
index cf0a0515..b37d8270 100644
--- a/src/iceberg/manifest/manifest_adapter.cc
+++ b/src/iceberg/manifest/manifest_adapter.cc
@@ -222,6 +222,8 @@ Status ManifestEntryAdapter::AppendPartitionValues(
case TypeId::kTime:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
+ case TypeId::kTimestampNs:
+ case TypeId::kTimestampTzNs:
ICEBERG_RETURN_UNEXPECTED(
AppendField(child_array,
std::get<int64_t>(partition_value.value())));
break;
diff --git a/src/iceberg/parquet/parquet_schema_util.cc
b/src/iceberg/parquet/parquet_schema_util.cc
index e9574a48..849bbd1f 100644
--- a/src/iceberg/parquet/parquet_schema_util.cc
+++ b/src/iceberg/parquet/parquet_schema_util.cc
@@ -125,6 +125,26 @@ Status ValidateParquetSchemaEvolution(
}
}
break;
+ case TypeId::kTimestampNs:
+ if (arrow_type->id() == ::arrow::Type::TIMESTAMP) {
+ const auto& timestamp_type =
+ internal::checked_cast<const ::arrow::TimestampType&>(*arrow_type);
+ if (timestamp_type.unit() == ::arrow::TimeUnit::NANO &&
+ timestamp_type.timezone().empty()) {
+ return {};
+ }
+ }
+ break;
+ case TypeId::kTimestampTzNs:
+ if (arrow_type->id() == ::arrow::Type::TIMESTAMP) {
+ const auto& timestamp_type =
+ internal::checked_cast<const ::arrow::TimestampType&>(*arrow_type);
+ if (timestamp_type.unit() == ::arrow::TimeUnit::NANO &&
+ !timestamp_type.timezone().empty()) {
+ return {};
+ }
+ }
+ break;
case TypeId::kString:
if (arrow_type->id() == ::arrow::Type::STRING) {
return {};
diff --git a/src/iceberg/row/partition_values.cc
b/src/iceberg/row/partition_values.cc
index 712c801a..fcd4691d 100644
--- a/src/iceberg/row/partition_values.cc
+++ b/src/iceberg/row/partition_values.cc
@@ -57,6 +57,8 @@ Result<Scalar> PartitionValues::GetField(size_t pos) const {
case TypeId::kTime:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
+ case TypeId::kTimestampNs:
+ case TypeId::kTimestampTzNs:
return Scalar{std::get<int64_t>(literal.value())};
case TypeId::kFloat:
return Scalar{std::get<float>(literal.value())};
diff --git a/src/iceberg/row/struct_like.cc b/src/iceberg/row/struct_like.cc
index 5b814204..355af84c 100644
--- a/src/iceberg/row/struct_like.cc
+++ b/src/iceberg/row/struct_like.cc
@@ -45,6 +45,8 @@ Result<Scalar> LiteralToScalar(const Literal& literal) {
case TypeId::kTime:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
+ case TypeId::kTimestampNs:
+ case TypeId::kTimestampTzNs:
return Scalar{std::get<int64_t>(literal.value())};
case TypeId::kFloat:
return Scalar{std::get<float>(literal.value())};
@@ -152,6 +154,10 @@ Result<Literal> StructLikeAccessor::GetLiteral(const
StructLike& struct_like) co
return Literal::Timestamp(std::get<int64_t>(scalar));
case TypeId::kTimestampTz:
return Literal::TimestampTz(std::get<int64_t>(scalar));
+ case TypeId::kTimestampNs:
+ return Literal::TimestampNs(std::get<int64_t>(scalar));
+ case TypeId::kTimestampTzNs:
+ return Literal::TimestampTzNs(std::get<int64_t>(scalar));
case TypeId::kFixed: {
const auto& fixed_data = std::get<std::string_view>(scalar);
return Literal::Fixed(std::vector<uint8_t>(fixed_data.cbegin(),
fixed_data.cend()));
diff --git a/src/iceberg/schema_internal.cc b/src/iceberg/schema_internal.cc
index dedb603e..bdd5b859 100644
--- a/src/iceberg/schema_internal.cc
+++ b/src/iceberg/schema_internal.cc
@@ -123,6 +123,15 @@ ArrowErrorCode ToArrowSchema(const Type& type, bool
optional, std::string_view n
NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeDateTime(
schema, NANOARROW_TYPE_TIMESTAMP, NANOARROW_TIME_UNIT_MICRO, "UTC"));
} break;
+ case TypeId::kTimestampNs: {
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeDateTime(schema,
NANOARROW_TYPE_TIMESTAMP,
+
NANOARROW_TIME_UNIT_NANO,
+
/*timezone=*/nullptr));
+ } break;
+ case TypeId::kTimestampTzNs: {
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaSetTypeDateTime(
+ schema, NANOARROW_TYPE_TIMESTAMP, NANOARROW_TIME_UNIT_NANO, "UTC"));
+ } break;
case TypeId::kString:
NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema,
NANOARROW_TYPE_STRING));
break;
@@ -270,10 +279,18 @@ Result<std::shared_ptr<Type>> FromArrowSchema(const
ArrowSchema& schema) {
case NANOARROW_TYPE_TIMESTAMP: {
bool with_timezone =
schema_view.timezone != nullptr && std::strlen(schema_view.timezone)
> 0;
- if (schema_view.time_unit != NANOARROW_TIME_UNIT_MICRO) {
+ if (schema_view.time_unit != NANOARROW_TIME_UNIT_MICRO &&
+ schema_view.time_unit != NANOARROW_TIME_UNIT_NANO) {
return InvalidSchema("Unsupported time unit for Arrow timestamp type:
{}",
static_cast<int>(schema_view.time_unit));
}
+ if (schema_view.time_unit == NANOARROW_TIME_UNIT_NANO) {
+ if (with_timezone) {
+ return iceberg::timestamptz_ns();
+ } else {
+ return iceberg::timestamp_ns();
+ }
+ }
if (with_timezone) {
return iceberg::timestamp_tz();
} else {
diff --git a/src/iceberg/table_metadata.h b/src/iceberg/table_metadata.h
index cfae0ce2..2f0c7e18 100644
--- a/src/iceberg/table_metadata.h
+++ b/src/iceberg/table_metadata.h
@@ -77,7 +77,10 @@ struct ICEBERG_EXPORT TableMetadata {
static constexpr int64_t kInitialSequenceNumber = 0;
static constexpr int64_t kInitialRowId = 0;
- static inline const std::unordered_map<TypeId, int8_t> kMinFormatVersions =
{};
+ static inline const std::unordered_map<TypeId, int8_t> kMinFormatVersions = {
+ {TypeId::kTimestampNs, 3},
+ {TypeId::kTimestampTzNs, 3},
+ };
/// An integer version number for the format
int8_t format_version;
diff --git a/src/iceberg/test/arrow_test.cc b/src/iceberg/test/arrow_test.cc
index 12039280..dcfdb6b5 100644
--- a/src/iceberg/test/arrow_test.cc
+++ b/src/iceberg/test/arrow_test.cc
@@ -92,8 +92,14 @@ INSTANTIATE_TEST_SUITE_P(
.arrow_type =
::arrow::time64(arrow::TimeUnit::MICRO)},
ToArrowSchemaParam{.iceberg_type = iceberg::timestamp(),
.arrow_type =
::arrow::timestamp(arrow::TimeUnit::MICRO)},
- ToArrowSchemaParam{.iceberg_type = iceberg::timestamp(),
- .arrow_type =
::arrow::timestamp(arrow::TimeUnit::MICRO)},
+ ToArrowSchemaParam{
+ .iceberg_type = iceberg::timestamp_tz(),
+ .arrow_type = ::arrow::timestamp(arrow::TimeUnit::MICRO, "UTC")},
+ ToArrowSchemaParam{.iceberg_type = iceberg::timestamp_ns(),
+ .arrow_type =
::arrow::timestamp(arrow::TimeUnit::NANO)},
+ ToArrowSchemaParam{
+ .iceberg_type = iceberg::timestamptz_ns(),
+ .arrow_type = ::arrow::timestamp(arrow::TimeUnit::NANO, "UTC")},
ToArrowSchemaParam{.iceberg_type = iceberg::string(),
.arrow_type = ::arrow::utf8()},
ToArrowSchemaParam{.iceberg_type = iceberg::binary(),
@@ -289,6 +295,11 @@ INSTANTIATE_TEST_SUITE_P(
FromArrowSchemaParam{
.arrow_type = ::arrow::timestamp(arrow::TimeUnit::MICRO, "UTC"),
.iceberg_type = std::make_shared<TimestampTzType>()},
+ FromArrowSchemaParam{.arrow_type =
::arrow::timestamp(arrow::TimeUnit::NANO),
+ .iceberg_type = iceberg::timestamp_ns()},
+ FromArrowSchemaParam{
+ .arrow_type = ::arrow::timestamp(arrow::TimeUnit::NANO, "UTC"),
+ .iceberg_type = iceberg::timestamptz_ns()},
FromArrowSchemaParam{.arrow_type = ::arrow::utf8(),
.iceberg_type = iceberg::string()},
FromArrowSchemaParam{.arrow_type = ::arrow::binary(),
diff --git a/src/iceberg/test/avro_data_test.cc
b/src/iceberg/test/avro_data_test.cc
index c0e42f67..2979ad9b 100644
--- a/src/iceberg/test/avro_data_test.cc
+++ b/src/iceberg/test/avro_data_test.cc
@@ -298,6 +298,30 @@ const std::vector<AppendDatumParam> kPrimitiveTestCases = {
.expected_json =
R"([{"a": 1672531200000000}, {"a": 1672531201000000}, {"a":
1672531202000000}])",
},
+ {
+ .name = "TimestampNs",
+ .projected_type = iceberg::timestamp_ns(),
+ .source_type = iceberg::timestamp_ns(),
+ .value_setter =
+ [](::avro::GenericDatum& datum, int i) {
+ datum.value<::avro::GenericRecord>().fieldAt(0).value<int64_t>()
=
+ 1672531200000000000LL + i * 1000000000LL + i;
+ },
+ .expected_json =
+ R"([{"a": 1672531200000000000}, {"a": 1672531201000000001}, {"a":
1672531202000000002}])",
+ },
+ {
+ .name = "TimestampTzNs",
+ .projected_type = iceberg::timestamptz_ns(),
+ .source_type = iceberg::timestamptz_ns(),
+ .value_setter =
+ [](::avro::GenericDatum& datum, int i) {
+ datum.value<::avro::GenericRecord>().fieldAt(0).value<int64_t>()
=
+ 1672531200000000000LL + i * 1000000000LL + i;
+ },
+ .expected_json =
+ R"([{"a": 1672531200000000000}, {"a": 1672531201000000001}, {"a":
1672531202000000002}])",
+ },
{
.name = "IntToLongPromotion",
.projected_type = iceberg::int64(),
@@ -948,6 +972,30 @@ const std::vector<ExtractDatumParam>
kExtractDatumTestCases = {
1672531200000000LL + i * 1000000LL);
},
},
+ {
+ .name = "TimestampNs",
+ .iceberg_type = timestamp_ns(),
+ .arrow_json =
+ R"([{"a": 1672531200000000000}, {"a": 1672531201000000001}, {"a":
1672531202000000002}])",
+ .value_verifier =
+ [](const ::avro::GenericDatum& datum, int i) {
+ const auto& record = datum.value<::avro::GenericRecord>();
+ EXPECT_EQ(record.fieldAt(0).value<int64_t>(),
+ 1672531200000000000LL + i * 1000000000LL + i);
+ },
+ },
+ {
+ .name = "TimestampTzNs",
+ .iceberg_type = timestamptz_ns(),
+ .arrow_json =
+ R"([{"a": 1672531200000000000}, {"a": 1672531201000000001}, {"a":
1672531202000000002}])",
+ .value_verifier =
+ [](const ::avro::GenericDatum& datum, int i) {
+ const auto& record = datum.value<::avro::GenericRecord>();
+ EXPECT_EQ(record.fieldAt(0).value<int64_t>(),
+ 1672531200000000000LL + i * 1000000000LL + i);
+ },
+ },
};
INSTANTIATE_TEST_SUITE_P(AllPrimitiveTypes, ExtractDatumFromArrayTest,
diff --git a/src/iceberg/test/avro_schema_test.cc
b/src/iceberg/test/avro_schema_test.cc
index 2c1ee8a9..dc2cb0a5 100644
--- a/src/iceberg/test/avro_schema_test.cc
+++ b/src/iceberg/test/avro_schema_test.cc
@@ -199,6 +199,26 @@ TEST(ToAvroNodeVisitorTest, TimestampTzType) {
EXPECT_EQ(node->customAttributesAt(0).getAttribute("adjust-to-utc"), "true");
}
+TEST(ToAvroNodeVisitorTest, TimestampNsType) {
+ ::avro::NodePtr node;
+ EXPECT_THAT(ToAvroNodeVisitor{}.Visit(TimestampNsType{}, &node), IsOk());
+ EXPECT_EQ(node->type(), ::avro::AVRO_LONG);
+ EXPECT_EQ(node->logicalType().type(), ::avro::LogicalType::TIMESTAMP_NANOS);
+
+ ASSERT_EQ(node->customAttributes(), 1);
+ EXPECT_EQ(node->customAttributesAt(0).getAttribute("adjust-to-utc"),
"false");
+}
+
+TEST(ToAvroNodeVisitorTest, TimestampTzNsType) {
+ ::avro::NodePtr node;
+ EXPECT_THAT(ToAvroNodeVisitor{}.Visit(TimestampTzNsType{}, &node), IsOk());
+ EXPECT_EQ(node->type(), ::avro::AVRO_LONG);
+ EXPECT_EQ(node->logicalType().type(), ::avro::LogicalType::TIMESTAMP_NANOS);
+
+ ASSERT_EQ(node->customAttributes(), 1);
+ EXPECT_EQ(node->customAttributesAt(0).getAttribute("adjust-to-utc"), "true");
+}
+
TEST(ToAvroNodeVisitorTest, StringType) {
::avro::NodePtr node;
EXPECT_THAT(ToAvroNodeVisitor{}.Visit(StringType{}, &node), IsOk());
@@ -1023,6 +1043,30 @@ TEST(AvroSchemaProjectionTest, ProjectMapType) {
ASSERT_EQ(projection.fields[0].children.size(), 2);
}
+TEST(AvroSchemaProjectionTest, RejectTimestampNsFromMicrosType) {
+ Schema expected_schema({
+ SchemaField::MakeRequired(/*field_id=*/1, "ts", iceberg::timestamp_ns()),
+ });
+
+ std::string avro_schema_json = R"({
+ "type": "record",
+ "name": "iceberg_schema",
+ "fields": [
+ {"name": "ts", "type": {
+ "type": "long",
+ "logicalType": "timestamp-micros",
+ "adjust-to-utc": false
+ }, "field-id": 1}
+ ]
+ })";
+ auto avro_schema = ::avro::compileJsonSchemaFromString(avro_schema_json);
+
+ auto projection_result =
+ Project(expected_schema, avro_schema.root(), /*prune_source=*/false);
+ ASSERT_THAT(projection_result, IsError(ErrorKind::kInvalidSchema));
+ ASSERT_THAT(projection_result, HasErrorMessage("Cannot read"));
+}
+
TEST(AvroSchemaProjectionTest, ProjectMapTypeWithNonStringKey) {
::iceberg::avro::RegisterLogicalTypes();
diff --git a/src/iceberg/test/bucket_util_test.cc
b/src/iceberg/test/bucket_util_test.cc
index 9c0d46f0..bee84a7c 100644
--- a/src/iceberg/test/bucket_util_test.cc
+++ b/src/iceberg/test/bucket_util_test.cc
@@ -23,6 +23,7 @@
#include <gtest/gtest.h>
+#include "iceberg/expression/literal.h"
#include "iceberg/test/temporal_test_helper.h"
#include "iceberg/util/decimal.h"
#include "iceberg/util/uuid.h"
@@ -107,4 +108,77 @@ TEST(BucketUtilsTest, HashHelper) {
EXPECT_EQ(BucketUtils::HashBytes(fixed), -188683207);
}
+TEST(BucketUtilsTest, BucketTimestampNanosMatchesMicros) {
+ constexpr int32_t kNumBuckets = 1000;
+ const auto ts_micros = TemporalTestHelper::CreateTimestamp({.year = 2017,
+ .month = 11,
+ .day = 16,
+ .hour = 22,
+ .minute = 31,
+ .second = 8,
+ .microsecond =
1});
+ const auto ts_nanos = TemporalTestHelper::CreateTimestampNanos({.year = 2017,
+ .month = 11,
+ .day = 16,
+ .hour = 22,
+ .minute = 31,
+ .second = 8,
+ .nanosecond
= 1000});
+
+ const auto micros_bucket =
+ BucketUtils::BucketIndex(Literal::Timestamp(ts_micros), kNumBuckets);
+ const auto nanos_bucket =
+ BucketUtils::BucketIndex(Literal::TimestampNs(ts_nanos), kNumBuckets);
+
+ ASSERT_TRUE(micros_bucket.has_value());
+ ASSERT_TRUE(nanos_bucket.has_value());
+ EXPECT_EQ(micros_bucket.value(), nanos_bucket.value());
+
+ const auto ts_tz_micros =
+ TemporalTestHelper::CreateTimestampTz({.year = 2017,
+ .month = 11,
+ .day = 16,
+ .hour = 14,
+ .minute = 31,
+ .second = 8,
+ .microsecond = 1,
+ .tz_offset_minutes = -480});
+ const auto ts_tz_nanos =
+ TemporalTestHelper::CreateTimestampTzNanos({.year = 2017,
+ .month = 11,
+ .day = 16,
+ .hour = 14,
+ .minute = 31,
+ .second = 8,
+ .nanosecond = 1000,
+ .tz_offset_minutes = -480});
+
+ const auto tz_micros_bucket =
+ BucketUtils::BucketIndex(Literal::TimestampTz(ts_tz_micros),
kNumBuckets);
+ const auto tz_nanos_bucket =
+ BucketUtils::BucketIndex(Literal::TimestampTzNs(ts_tz_nanos),
kNumBuckets);
+
+ ASSERT_TRUE(tz_micros_bucket.has_value());
+ ASSERT_TRUE(tz_nanos_bucket.has_value());
+ EXPECT_EQ(tz_micros_bucket.value(), tz_nanos_bucket.value());
+
+ const auto pre_epoch_micros_bucket =
+ BucketUtils::BucketIndex(Literal::Timestamp(-876544), kNumBuckets);
+ const auto pre_epoch_nanos_bucket =
+ BucketUtils::BucketIndex(Literal::TimestampNs(-876543211), kNumBuckets);
+
+ ASSERT_TRUE(pre_epoch_micros_bucket.has_value());
+ ASSERT_TRUE(pre_epoch_nanos_bucket.has_value());
+ EXPECT_EQ(pre_epoch_micros_bucket.value(), pre_epoch_nanos_bucket.value());
+
+ const auto pre_epoch_tz_micros_bucket =
+ BucketUtils::BucketIndex(Literal::TimestampTz(-876544), kNumBuckets);
+ const auto pre_epoch_tz_nanos_bucket =
+ BucketUtils::BucketIndex(Literal::TimestampTzNs(-876543211),
kNumBuckets);
+
+ ASSERT_TRUE(pre_epoch_tz_micros_bucket.has_value());
+ ASSERT_TRUE(pre_epoch_tz_nanos_bucket.has_value());
+ EXPECT_EQ(pre_epoch_tz_micros_bucket.value(),
pre_epoch_tz_nanos_bucket.value());
+}
+
} // namespace iceberg
diff --git a/src/iceberg/test/literal_test.cc b/src/iceberg/test/literal_test.cc
index 97724aad..86b89237 100644
--- a/src/iceberg/test/literal_test.cc
+++ b/src/iceberg/test/literal_test.cc
@@ -91,9 +91,21 @@ TEST_P(CastLiteralTest, CastTest) {
TEST(LiteralTest, CrossTypeComparison) {
auto int_literal = Literal::Int(42);
auto string_literal = Literal::String("42");
+ auto long_literal = Literal::Long(42);
+ auto timestamp_literal = Literal::Timestamp(42);
+ auto timestamp_tz_literal = Literal::TimestampTz(42);
+ auto timestamp_ns_literal = Literal::TimestampNs(42);
+ auto timestamp_tz_ns_literal = Literal::TimestampTzNs(42);
// Different types should return unordered
EXPECT_EQ(int_literal <=> string_literal, std::partial_ordering::unordered);
+ EXPECT_EQ(long_literal <=> timestamp_literal,
std::partial_ordering::unordered);
+ EXPECT_EQ(timestamp_literal <=> timestamp_ns_literal,
std::partial_ordering::unordered);
+ EXPECT_EQ(int_literal <=> Literal::Date(42),
std::partial_ordering::equivalent);
+ EXPECT_EQ(timestamp_literal <=> timestamp_tz_literal,
+ std::partial_ordering::equivalent);
+ EXPECT_EQ(timestamp_ns_literal <=> timestamp_tz_ns_literal,
+ std::partial_ordering::equivalent);
}
// Overflow tests
@@ -148,6 +160,18 @@ TEST(LiteralTest, CastToError) {
// Cast to Fixed with different length should fail
EXPECT_THAT(fixed_literal.CastTo(fixed(5)),
IsError(ErrorKind::kNotSupported));
+
+ constexpr auto max_micros = std::numeric_limits<int64_t>::max() / 1000;
+ EXPECT_THAT(Literal::Timestamp(max_micros + 1).CastTo(timestamp_ns()),
+ IsError(ErrorKind::kInvalidArgument));
+ EXPECT_THAT(Literal::Timestamp(max_micros + 1).CastTo(timestamptz_ns()),
+ IsError(ErrorKind::kInvalidArgument));
+
+ constexpr auto min_micros = std::numeric_limits<int64_t>::min() / 1000;
+ EXPECT_THAT(Literal::TimestampTz(min_micros - 1).CastTo(timestamp_ns()),
+ IsError(ErrorKind::kInvalidArgument));
+ EXPECT_THAT(Literal::TimestampTz(min_micros - 1).CastTo(timestamptz_ns()),
+ IsError(ErrorKind::kInvalidArgument));
}
// Special value tests
@@ -580,7 +604,15 @@ INSTANTIATE_TEST_SUITE_P(
BasicLiteralTestParam{.test_name = "TimestampTz",
.literal =
Literal::TimestampTz(1684137600000000LL),
.expected_type_id = TypeId::kTimestampTz,
- .expected_string = "1684137600000000"}),
+ .expected_string = "1684137600000000"},
+ BasicLiteralTestParam{.test_name = "TimestampNs",
+ .literal =
Literal::TimestampNs(1684137600000000001LL),
+ .expected_type_id = TypeId::kTimestampNs,
+ .expected_string = "1684137600000000001"},
+ BasicLiteralTestParam{.test_name = "TimestampTzNs",
+ .literal =
Literal::TimestampTzNs(1684137600000000001LL),
+ .expected_type_id = TypeId::kTimestampTzNs,
+ .expected_string = "1684137600000000001"}),
[](const ::testing::TestParamInfo<BasicLiteralTestParam>& info) {
return info.param.test_name;
});
@@ -655,7 +687,15 @@ INSTANTIATE_TEST_SUITE_P(
ComparisonLiteralTestParam{.test_name = "TimestampTz",
.small_literal =
Literal::TimestampTz(1000000LL),
.large_literal =
Literal::TimestampTz(2000000LL),
- .equal_literal =
Literal::TimestampTz(1000000LL)}),
+ .equal_literal =
Literal::TimestampTz(1000000LL)},
+ ComparisonLiteralTestParam{.test_name = "TimestampNs",
+ .small_literal =
Literal::TimestampNs(1000000LL),
+ .large_literal =
Literal::TimestampNs(2000000LL),
+ .equal_literal =
Literal::TimestampNs(1000000LL)},
+ ComparisonLiteralTestParam{.test_name = "TimestampTzNs",
+ .small_literal =
Literal::TimestampTzNs(1000000LL),
+ .large_literal =
Literal::TimestampTzNs(2000000LL),
+ .equal_literal =
Literal::TimestampTzNs(1000000LL)}),
[](const ::testing::TestParamInfo<ComparisonLiteralTestParam>& info) {
return info.param.test_name;
});
@@ -705,6 +745,14 @@ INSTANTIATE_TEST_SUITE_P(
.source_literal = Literal::Long(42L),
.target_type = timestamp_tz(),
.expected_literal = Literal::TimestampTz(42L)},
+ CastLiteralTestParam{.test_name = "LongToTimestampNs",
+ .source_literal = Literal::Long(42L),
+ .target_type = timestamp_ns(),
+ .expected_literal = Literal::TimestampNs(42L)},
+ CastLiteralTestParam{.test_name = "LongToTimestampTzNs",
+ .source_literal = Literal::Long(42L),
+ .target_type = timestamptz_ns(),
+ .expected_literal = Literal::TimestampTzNs(42L)},
CastLiteralTestParam{
.test_name = "TimestampToDate",
.source_literal =
@@ -750,6 +798,30 @@ INSTANTIATE_TEST_SUITE_P(
.second
= 59})),
.target_type = date(),
.expected_literal = Literal::Date(-1)},
+ CastLiteralTestParam{.test_name = "TimestampNsToTimestampBeforeEpoch",
+ .source_literal =
Literal::TimestampNs(-876543211),
+ .target_type = timestamp(),
+ .expected_literal = Literal::Timestamp(-876544)},
+ CastLiteralTestParam{.test_name =
"TimestampNsToTimestampTzBeforeEpoch",
+ .source_literal =
Literal::TimestampNs(-876543211),
+ .target_type = timestamp_tz(),
+ .expected_literal =
Literal::TimestampTz(-876544)},
+ CastLiteralTestParam{.test_name = "TimestampToTimestampNsBeforeEpoch",
+ .source_literal = Literal::Timestamp(-876544),
+ .target_type = timestamp_ns(),
+ .expected_literal =
Literal::TimestampNs(-876544000)},
+ CastLiteralTestParam{.test_name =
"TimestampToTimestampTzNsBeforeEpoch",
+ .source_literal = Literal::Timestamp(-876544),
+ .target_type = timestamptz_ns(),
+ .expected_literal =
Literal::TimestampTzNs(-876544000)},
+ CastLiteralTestParam{.test_name =
"TimestampTzNsToTimestampTzBeforeEpoch",
+ .source_literal =
Literal::TimestampTzNs(-876543211),
+ .target_type = timestamp_tz(),
+ .expected_literal =
Literal::TimestampTz(-876544)},
+ CastLiteralTestParam{.test_name =
"TimestampTzToTimestampTzNsBeforeEpoch",
+ .source_literal = Literal::TimestampTz(-876544),
+ .target_type = timestamptz_ns(),
+ .expected_literal =
Literal::TimestampTzNs(-876544000)},
// Float cast tests
CastLiteralTestParam{.test_name = "FloatToDouble",
.source_literal = Literal::Float(2.0f),
@@ -804,6 +876,16 @@ INSTANTIATE_TEST_SUITE_P(
.source_literal = Literal::String("2026-01-01T00:00:01.500+00:00"),
.target_type = timestamp_tz(),
.expected_literal = Literal::TimestampTz(1767225601500000L)},
+ CastLiteralTestParam{
+ .test_name = "StringToTimestampNsBeforeEpoch",
+ .source_literal = Literal::String("1969-12-31T23:59:59.123456789"),
+ .target_type = timestamp_ns(),
+ .expected_literal = Literal::TimestampNs(-876543211)},
+ CastLiteralTestParam{
+ .test_name = "StringToTimestampTzNsBeforeEpoch",
+ .source_literal =
Literal::String("1969-12-31T23:59:59.123456789+00:00"),
+ .target_type = timestamptz_ns(),
+ .expected_literal = Literal::TimestampTzNs(-876543211)},
CastLiteralTestParam{.test_name = "StringToBinary",
.source_literal = Literal::String("010203FF"),
.target_type = binary(),
diff --git a/src/iceberg/test/schema_json_test.cc
b/src/iceberg/test/schema_json_test.cc
index 87388cbb..c9532eeb 100644
--- a/src/iceberg/test/schema_json_test.cc
+++ b/src/iceberg/test/schema_json_test.cc
@@ -71,6 +71,8 @@ INSTANTIATE_TEST_SUITE_P(
SchemaJsonParam{.json = "\"timestamp\"", .type = iceberg::timestamp()},
SchemaJsonParam{.json = "\"timestamptz\"",
.type = std::make_shared<TimestampTzType>()},
+ SchemaJsonParam{.json = "\"timestamp_ns\"", .type =
iceberg::timestamp_ns()},
+ SchemaJsonParam{.json = "\"timestamptz_ns\"", .type =
iceberg::timestamptz_ns()},
SchemaJsonParam{
.json =
R"({"element":"string","element-id":3,"element-required":true,"type":"list"})",
diff --git a/src/iceberg/test/schema_test.cc b/src/iceberg/test/schema_test.cc
index 9dab35fa..838b5760 100644
--- a/src/iceberg/test/schema_test.cc
+++ b/src/iceberg/test/schema_test.cc
@@ -27,6 +27,7 @@
#include "iceberg/result.h"
#include "iceberg/schema_field.h"
+#include "iceberg/table_metadata.h"
#include "iceberg/test/matchers.h"
#include "iceberg/util/formatter.h" // IWYU pragma: keep
@@ -96,6 +97,32 @@ TEST(SchemaTest, Equality) {
ASSERT_EQ(schema5, schema1);
}
+TEST(SchemaTest, ValidateRejectsV3TypesBeforeFormatV3) {
+ iceberg::Schema timestamp_ns_schema(
+ {iceberg::SchemaField(1, "timestamp_ns", iceberg::timestamp_ns(),
false)});
+ iceberg::Schema timestamptz_ns_schema(
+ {iceberg::SchemaField(1, "timestamptz_ns", iceberg::timestamptz_ns(),
false)});
+
+ auto status = timestamp_ns_schema.Validate(2);
+ ASSERT_THAT(status, iceberg::IsError(iceberg::ErrorKind::kInvalidSchema));
+ EXPECT_THAT(status, iceberg::HasErrorMessage(
+ "Invalid type for timestamp_ns: timestamp_ns is not "
+ "supported until v3"));
+
+ status = timestamptz_ns_schema.Validate(2);
+ ASSERT_THAT(status, iceberg::IsError(iceberg::ErrorKind::kInvalidSchema));
+ EXPECT_THAT(status, iceberg::HasErrorMessage(
+ "Invalid type for timestamptz_ns: timestamptz_ns is
not "
+ "supported until v3"));
+
+ EXPECT_THAT(
+
timestamp_ns_schema.Validate(iceberg::TableMetadata::kSupportedTableFormatVersion),
+ iceberg::IsOk());
+ EXPECT_THAT(timestamptz_ns_schema.Validate(
+ iceberg::TableMetadata::kSupportedTableFormatVersion),
+ iceberg::IsOk());
+}
+
TEST(SchemaTest, IdentifierFields) {
using iceberg::ErrorKind;
using iceberg::Schema;
diff --git a/src/iceberg/test/transform_util_test.cc
b/src/iceberg/test/transform_util_test.cc
index 54f36cd0..5b64bb33 100644
--- a/src/iceberg/test/transform_util_test.cc
+++ b/src/iceberg/test/transform_util_test.cc
@@ -19,6 +19,8 @@
#include "iceberg/util/transform_util.h"
+#include <limits>
+
#include <gtest/gtest.h>
#include "iceberg/test/matchers.h"
@@ -111,6 +113,8 @@ TEST(TransformUtilTest, HumanTimestamp) {
// precision with 1 microsecond
EXPECT_EQ("2026-01-01T00:00:01.000001",
TransformUtil::HumanTimestamp(1767225601000001L));
+ // pre-epoch timestamp with fractional microseconds
+ EXPECT_EQ("1969-12-31T23:59:59.123456",
TransformUtil::HumanTimestamp(-876544));
}
TEST(TransformUtilTest, HumanTimestampWithZone) {
@@ -132,6 +136,88 @@ TEST(TransformUtilTest, HumanTimestampWithZone) {
// precision with 1 microsecond
EXPECT_EQ("2026-01-01T00:00:01.000001+00:00",
TransformUtil::HumanTimestampWithZone(1767225601000001L));
+ // pre-epoch timestamp with fractional microseconds
+ EXPECT_EQ("1969-12-31T23:59:59.123456+00:00",
+ TransformUtil::HumanTimestampWithZone(-876544));
+}
+
+TEST(TransformUtilTest, HumanTimestampNs) {
+ EXPECT_EQ("1970-01-01T00:00:00.000000001",
TransformUtil::HumanTimestampNs(1));
+ EXPECT_EQ("2026-01-01T00:00:01.000001001",
+ TransformUtil::HumanTimestampNs(1767225601000001001L));
+ EXPECT_EQ("1969-12-31T23:59:59.123456789",
TransformUtil::HumanTimestampNs(-876543211));
+}
+
+TEST(TransformUtilTest, HumanTimestampNsWithZone) {
+ EXPECT_EQ("1970-01-01T00:00:00.000000001+00:00",
+ TransformUtil::HumanTimestampNsWithZone(1));
+ EXPECT_EQ("2026-01-01T00:00:01.000001001+00:00",
+ TransformUtil::HumanTimestampNsWithZone(1767225601000001001L));
+ EXPECT_EQ("1969-12-31T23:59:59.123456789+00:00",
+ TransformUtil::HumanTimestampNsWithZone(-876543211));
+}
+
+TEST(TransformUtilTest, ParseTimestampNs) {
+ ICEBERG_UNWRAP_OR_FAIL(
+ auto nanos,
TransformUtil::ParseTimestampNs("2026-01-01T00:00:01.000001001"));
+ EXPECT_EQ(nanos, 1767225601000001001L);
+ ICEBERG_UNWRAP_OR_FAIL(auto pre_epoch_nanos, TransformUtil::ParseTimestampNs(
+
"1969-12-31T23:59:59.123456789"));
+ EXPECT_EQ(pre_epoch_nanos, -876543211);
+ EXPECT_EQ(TransformUtil::HumanTimestampNs(pre_epoch_nanos),
+ "1969-12-31T23:59:59.123456789");
+}
+
+TEST(TransformUtilTest, ParseTimestampNsChecksInt64Bounds) {
+ ICEBERG_UNWRAP_OR_FAIL(
+ auto max_nanos,
TransformUtil::ParseTimestampNs("2262-04-11T23:47:16.854775807"));
+ EXPECT_EQ(max_nanos, std::numeric_limits<int64_t>::max());
+
+ ICEBERG_UNWRAP_OR_FAIL(
+ auto min_nanos,
TransformUtil::ParseTimestampNs("1677-09-21T00:12:43.145224192"));
+ EXPECT_EQ(min_nanos, std::numeric_limits<int64_t>::min());
+
+ EXPECT_THAT(TransformUtil::ParseTimestampNs("2262-04-11T23:47:16.854775808"),
+ IsError(ErrorKind::kInvalidArgument));
+ EXPECT_THAT(TransformUtil::ParseTimestampNs("1677-09-21T00:12:43.145224191"),
+ IsError(ErrorKind::kInvalidArgument));
+}
+
+TEST(TransformUtilTest, ParseTimestampNsRejectsMoreThanNineFractionalDigits) {
+
EXPECT_THAT(TransformUtil::ParseTimestampNs("2026-01-01T00:00:01.0000010011"),
+ IsError(ErrorKind::kInvalidArgument));
+}
+
+TEST(TransformUtilTest, ParseTimestampNsWithZone) {
+ ICEBERG_UNWRAP_OR_FAIL(auto nanos, TransformUtil::ParseTimestampNsWithZone(
+
"2026-01-01T00:00:01.000001001+00:00"));
+ EXPECT_EQ(nanos, 1767225601000001001L);
+}
+
+TEST(TransformUtilTest, ParseTimestampNsWithZoneChecksInt64BoundsAfterOffset) {
+ ICEBERG_UNWRAP_OR_FAIL(auto max_nanos,
TransformUtil::ParseTimestampNsWithZone(
+
"2262-04-12T00:47:16.854775807+01:00"));
+ EXPECT_EQ(max_nanos, std::numeric_limits<int64_t>::max());
+
+ ICEBERG_UNWRAP_OR_FAIL(auto min_nanos,
TransformUtil::ParseTimestampNsWithZone(
+
"1677-09-20T23:12:43.145224192-01:00"));
+ EXPECT_EQ(min_nanos, std::numeric_limits<int64_t>::min());
+
+ EXPECT_THAT(
+
TransformUtil::ParseTimestampNsWithZone("2262-04-11T23:47:16.854775807-00:01"),
+ IsError(ErrorKind::kInvalidArgument));
+ EXPECT_THAT(
+
TransformUtil::ParseTimestampNsWithZone("1677-09-21T00:12:43.145224192+00:01"),
+ IsError(ErrorKind::kInvalidArgument));
+}
+
+TEST(TransformUtilTest,
ParseTimestampNsWithZoneRejectsOffsetPastPlusMinus1800) {
+ EXPECT_THAT(
+
TransformUtil::ParseTimestampNsWithZone("2026-01-01T00:00:01.000001001+18:01"),
+ IsError(ErrorKind::kInvalidArgument));
+ EXPECT_THAT(
+
TransformUtil::ParseTimestampNsWithZone("2026-01-01T00:00:01.000001001-18:30"),
+ IsError(ErrorKind::kInvalidArgument));
}
TEST(TransformUtilTest, Base64Encode) {
diff --git a/src/iceberg/test/type_test.cc b/src/iceberg/test/type_test.cc
index 266ff610..e68843be 100644
--- a/src/iceberg/test/type_test.cc
+++ b/src/iceberg/test/type_test.cc
@@ -90,7 +90,7 @@ TEST_P(TypeTest, StdFormat) {
ASSERT_EQ(test_case.repr, std::format("{}", *test_case.type));
}
-const static std::array<TypeTestCase, 16> kPrimitiveTypes = {{
+const static std::array<TypeTestCase, 18> kPrimitiveTypes = {{
{
.name = "boolean",
.type = iceberg::boolean(),
@@ -168,6 +168,20 @@ const static std::array<TypeTestCase, 16> kPrimitiveTypes
= {{
.primitive = true,
.repr = "timestamptz",
},
+ {
+ .name = "timestamp_ns",
+ .type = iceberg::timestamp_ns(),
+ .type_id = iceberg::TypeId::kTimestampNs,
+ .primitive = true,
+ .repr = "timestamp_ns",
+ },
+ {
+ .name = "timestamptz_ns",
+ .type = iceberg::timestamptz_ns(),
+ .type_id = iceberg::TypeId::kTimestampTzNs,
+ .primitive = true,
+ .repr = "timestamptz_ns",
+ },
{
.name = "binary",
.type = iceberg::binary(),
diff --git a/src/iceberg/test/visit_type_test.cc
b/src/iceberg/test/visit_type_test.cc
index 786e1fd2..7104581f 100644
--- a/src/iceberg/test/visit_type_test.cc
+++ b/src/iceberg/test/visit_type_test.cc
@@ -53,7 +53,7 @@ std::string TypeTestCaseToString(const
::testing::TestParamInfo<TypeTestCase>& i
return info.param.name;
}
-const static std::array<TypeTestCase, 16> kPrimitiveTypes = {{
+const static std::array<TypeTestCase, 18> kPrimitiveTypes = {{
{
.name = "boolean",
.type = iceberg::boolean(),
@@ -131,6 +131,20 @@ const static std::array<TypeTestCase, 16> kPrimitiveTypes
= {{
.primitive = true,
.repr = "timestamptz",
},
+ {
+ .name = "timestamp_ns",
+ .type = iceberg::timestamp_ns(),
+ .type_id = iceberg::TypeId::kTimestampNs,
+ .primitive = true,
+ .repr = "timestamp_ns",
+ },
+ {
+ .name = "timestamptz_ns",
+ .type = iceberg::timestamptz_ns(),
+ .type_id = iceberg::TypeId::kTimestampTzNs,
+ .primitive = true,
+ .repr = "timestamptz_ns",
+ },
{
.name = "binary",
.type = iceberg::binary(),
diff --git a/src/iceberg/transform.cc b/src/iceberg/transform.cc
index c210f9ed..8a7d4b3e 100644
--- a/src/iceberg/transform.cc
+++ b/src/iceberg/transform.cc
@@ -152,6 +152,8 @@ bool Transform::CanTransform(const Type& source_type) const
{
case TypeId::kTime:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
+ case TypeId::kTimestampNs:
+ case TypeId::kTimestampTzNs:
case TypeId::kString:
case TypeId::kUuid:
case TypeId::kFixed:
@@ -177,6 +179,8 @@ bool Transform::CanTransform(const Type& source_type) const
{
case TypeId::kDate:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
+ case TypeId::kTimestampNs:
+ case TypeId::kTimestampTzNs:
return true;
default:
return false;
@@ -186,6 +190,8 @@ bool Transform::CanTransform(const Type& source_type) const
{
case TypeId::kDate:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
+ case TypeId::kTimestampNs:
+ case TypeId::kTimestampTzNs:
return true;
default:
return false;
@@ -194,6 +200,8 @@ bool Transform::CanTransform(const Type& source_type) const
{
switch (source_type.type_id()) {
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
+ case TypeId::kTimestampNs:
+ case TypeId::kTimestampTzNs:
return true;
default:
return false;
@@ -420,6 +428,11 @@ Result<std::string> Transform::ToHumanString(const
Literal& value) {
return
TransformUtil::HumanTimestamp(std::get<int64_t>(value.value()));
case TypeId::kTimestampTz:
return
TransformUtil::HumanTimestampWithZone(std::get<int64_t>(value.value()));
+ case TypeId::kTimestampNs:
+ return
TransformUtil::HumanTimestampNs(std::get<int64_t>(value.value()));
+ case TypeId::kTimestampTzNs:
+ return TransformUtil::HumanTimestampNsWithZone(
+ std::get<int64_t>(value.value()));
case TypeId::kFixed:
case TypeId::kBinary: {
const auto& binary_data =
std::get<std::vector<uint8_t>>(value.value());
diff --git a/src/iceberg/transform_function.cc
b/src/iceberg/transform_function.cc
index 9213d2ce..d7c18555 100644
--- a/src/iceberg/transform_function.cc
+++ b/src/iceberg/transform_function.cc
@@ -79,6 +79,8 @@ Result<std::unique_ptr<TransformFunction>>
BucketTransform::Make(
case TypeId::kTime:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
+ case TypeId::kTimestampNs:
+ case TypeId::kTimestampTzNs:
case TypeId::kString:
case TypeId::kUuid:
case TypeId::kFixed:
@@ -148,6 +150,8 @@ Result<std::unique_ptr<TransformFunction>>
YearTransform::Make(
case TypeId::kDate:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
+ case TypeId::kTimestampNs:
+ case TypeId::kTimestampTzNs:
break;
default:
return NotSupported("{} is not a valid input type for year transform",
@@ -176,6 +180,8 @@ Result<std::unique_ptr<TransformFunction>>
MonthTransform::Make(
case TypeId::kDate:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
+ case TypeId::kTimestampNs:
+ case TypeId::kTimestampTzNs:
break;
default:
return NotSupported("{} is not a valid input type for month transform",
@@ -204,6 +210,8 @@ Result<std::unique_ptr<TransformFunction>>
DayTransform::Make(
case TypeId::kDate:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
+ case TypeId::kTimestampNs:
+ case TypeId::kTimestampTzNs:
break;
default:
return NotSupported("{} is not a valid input type for day transform",
@@ -231,6 +239,8 @@ Result<std::unique_ptr<TransformFunction>>
HourTransform::Make(
switch (source_type->type_id()) {
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
+ case TypeId::kTimestampNs:
+ case TypeId::kTimestampTzNs:
break;
default:
return NotSupported("{} is not a valid input type for hour transform",
diff --git a/src/iceberg/type.cc b/src/iceberg/type.cc
index f008ad90..b5bee37e 100644
--- a/src/iceberg/type.cc
+++ b/src/iceberg/type.cc
@@ -326,6 +326,22 @@ bool TimestampTzType::Equals(const Type& other) const {
return other.type_id() == kTypeId;
}
+bool TimestampNsType::is_zoned() const { return false; }
+TimeUnit TimestampNsType::time_unit() const { return TimeUnit::kNanosecond; }
+TypeId TimestampNsType::type_id() const { return kTypeId; }
+std::string TimestampNsType::ToString() const { return "timestamp_ns"; }
+bool TimestampNsType::Equals(const Type& other) const {
+ return other.type_id() == kTypeId;
+}
+
+bool TimestampTzNsType::is_zoned() const { return true; }
+TimeUnit TimestampTzNsType::time_unit() const { return TimeUnit::kNanosecond; }
+TypeId TimestampTzNsType::type_id() const { return kTypeId; }
+std::string TimestampTzNsType::ToString() const { return "timestamptz_ns"; }
+bool TimestampTzNsType::Equals(const Type& other) const {
+ return other.type_id() == kTypeId;
+}
+
TypeId StringType::type_id() const { return kTypeId; }
std::string StringType::ToString() const { return "string"; }
bool StringType::Equals(const Type& other) const { return other.type_id() ==
kTypeId; }
@@ -371,6 +387,8 @@ TYPE_FACTORY(date, DateType)
TYPE_FACTORY(time, TimeType)
TYPE_FACTORY(timestamp, TimestampType)
TYPE_FACTORY(timestamp_tz, TimestampTzType)
+TYPE_FACTORY(timestamp_ns, TimestampNsType)
+TYPE_FACTORY(timestamptz_ns, TimestampTzNsType)
TYPE_FACTORY(binary, BinaryType)
TYPE_FACTORY(string, StringType)
TYPE_FACTORY(uuid, UuidType)
@@ -425,6 +443,10 @@ std::string_view ToString(TypeId id) {
return "timestamp";
case TypeId::kTimestampTz:
return "timestamptz";
+ case TypeId::kTimestampNs:
+ return "timestamp_ns";
+ case TypeId::kTimestampTzNs:
+ return "timestamptz_ns";
case TypeId::kString:
return "string";
case TypeId::kUuid:
diff --git a/src/iceberg/type.h b/src/iceberg/type.h
index 1c50135d..53237cdb 100644
--- a/src/iceberg/type.h
+++ b/src/iceberg/type.h
@@ -396,6 +396,44 @@ class ICEBERG_EXPORT TimestampTzType : public
TimestampBase {
bool Equals(const Type& other) const override;
};
+/// \brief A data type representing a timestamp in nanoseconds without
+/// reference to a timezone.
+class ICEBERG_EXPORT TimestampNsType : public TimestampBase {
+ public:
+ constexpr static const TypeId kTypeId = TypeId::kTimestampNs;
+
+ TimestampNsType() = default;
+ ~TimestampNsType() override = default;
+
+ bool is_zoned() const override;
+ TimeUnit time_unit() const override;
+
+ TypeId type_id() const override;
+ std::string ToString() const override;
+
+ protected:
+ bool Equals(const Type& other) const override;
+};
+
+/// \brief A data type representing a timestamp as nanoseconds since the
+/// epoch in UTC. A time zone or offset is not stored.
+class ICEBERG_EXPORT TimestampTzNsType : public TimestampBase {
+ public:
+ constexpr static const TypeId kTypeId = TypeId::kTimestampTzNs;
+
+ TimestampTzNsType() = default;
+ ~TimestampTzNsType() override = default;
+
+ bool is_zoned() const override;
+ TimeUnit time_unit() const override;
+
+ TypeId type_id() const override;
+ std::string ToString() const override;
+
+ protected:
+ bool Equals(const Type& other) const override;
+};
+
/// \brief A data type representing an arbitrary-length byte sequence.
class ICEBERG_EXPORT BinaryType : public PrimitiveType {
public:
@@ -490,6 +528,10 @@ ICEBERG_EXPORT const std::shared_ptr<TimeType>& time();
ICEBERG_EXPORT const std::shared_ptr<TimestampType>& timestamp();
/// \brief Return a TimestampTzType instance.
ICEBERG_EXPORT const std::shared_ptr<TimestampTzType>& timestamp_tz();
+/// \brief Return a TimestampNsType instance.
+ICEBERG_EXPORT const std::shared_ptr<TimestampNsType>& timestamp_ns();
+/// \brief Return a TimestampTzNsType instance.
+ICEBERG_EXPORT const std::shared_ptr<TimestampTzNsType>& timestamptz_ns();
/// \brief Return a BinaryType instance.
ICEBERG_EXPORT const std::shared_ptr<BinaryType>& binary();
/// \brief Return a StringType instance.
diff --git a/src/iceberg/type_fwd.h b/src/iceberg/type_fwd.h
index 3fe199d8..144a9e33 100644
--- a/src/iceberg/type_fwd.h
+++ b/src/iceberg/type_fwd.h
@@ -46,6 +46,8 @@ enum class TypeId {
kTime,
kTimestamp,
kTimestampTz,
+ kTimestampNs,
+ kTimestampTzNs,
kString,
kUuid,
kFixed,
@@ -55,6 +57,7 @@ enum class TypeId {
/// \brief The time unit. In Iceberg V3 nanoseconds are also supported.
enum class TimeUnit {
kMicrosecond,
+ kNanosecond,
};
/// \brief Data type family.
@@ -77,6 +80,8 @@ class TimeType;
class TimestampBase;
class TimestampType;
class TimestampTzType;
+class TimestampNsType;
+class TimestampTzNsType;
class Type;
class UuidType;
diff --git a/src/iceberg/util/bucket_util.cc b/src/iceberg/util/bucket_util.cc
index 88b240de..2c0718f5 100644
--- a/src/iceberg/util/bucket_util.cc
+++ b/src/iceberg/util/bucket_util.cc
@@ -24,6 +24,7 @@
#include "iceberg/expression/literal.h"
#include "iceberg/util/endian.h"
#include "iceberg/util/murmurhash3_internal.h"
+#include "iceberg/util/temporal_util.h"
namespace iceberg {
@@ -63,6 +64,18 @@ int32_t HashLiteral<TypeId::kTimestampTz>(const Literal&
literal) {
return BucketUtils::HashLong(std::get<int64_t>(literal.value()));
}
+template <>
+int32_t HashLiteral<TypeId::kTimestampNs>(const Literal& literal) {
+ return BucketUtils::HashLong(
+ TemporalUtils::NanosToMicros(std::get<int64_t>(literal.value())));
+}
+
+template <>
+int32_t HashLiteral<TypeId::kTimestampTzNs>(const Literal& literal) {
+ return BucketUtils::HashLong(
+ TemporalUtils::NanosToMicros(std::get<int64_t>(literal.value())));
+}
+
template <>
int32_t HashLiteral<TypeId::kDecimal>(const Literal& literal) {
const auto& decimal = std::get<Decimal>(literal.value());
@@ -131,6 +144,8 @@ Result<int32_t> BucketUtils::BucketIndex(const Literal&
literal, int32_t num_buc
DISPATCH_HASH_LITERAL(TypeId::kTime)
DISPATCH_HASH_LITERAL(TypeId::kTimestamp)
DISPATCH_HASH_LITERAL(TypeId::kTimestampTz)
+ DISPATCH_HASH_LITERAL(TypeId::kTimestampNs)
+ DISPATCH_HASH_LITERAL(TypeId::kTimestampTzNs)
DISPATCH_HASH_LITERAL(TypeId::kDecimal)
DISPATCH_HASH_LITERAL(TypeId::kString)
DISPATCH_HASH_LITERAL(TypeId::kUuid)
diff --git a/src/iceberg/util/conversions.cc b/src/iceberg/util/conversions.cc
index 0cc7c55d..5c535610 100644
--- a/src/iceberg/util/conversions.cc
+++ b/src/iceberg/util/conversions.cc
@@ -109,6 +109,8 @@ Result<std::vector<uint8_t>> Conversions::ToBytes(const
PrimitiveType& type,
DISPATCH_LITERAL_TO_BYTES(TypeId::kTime)
DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestamp)
DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestampTz)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestampNs)
+ DISPATCH_LITERAL_TO_BYTES(TypeId::kTimestampTzNs)
DISPATCH_LITERAL_TO_BYTES(TypeId::kFloat)
DISPATCH_LITERAL_TO_BYTES(TypeId::kDouble)
DISPATCH_LITERAL_TO_BYTES(TypeId::kDecimal)
@@ -158,7 +160,9 @@ Result<Literal::Value> Conversions::FromBytes(const
PrimitiveType& type,
case TypeId::kLong:
case TypeId::kTime:
case TypeId::kTimestamp:
- case TypeId::kTimestampTz: {
+ case TypeId::kTimestampTz:
+ case TypeId::kTimestampNs:
+ case TypeId::kTimestampTzNs: {
int64_t value;
if (data.size() < 8) {
// Type was promoted from int to long
diff --git a/src/iceberg/util/projection_util_internal.h
b/src/iceberg/util/projection_util_internal.h
index df4fe978..e9cda56c 100644
--- a/src/iceberg/util/projection_util_internal.h
+++ b/src/iceberg/util/projection_util_internal.h
@@ -54,6 +54,10 @@ class ProjectionUtil {
return Literal::Timestamp(std::get<int64_t>(literal.value()) +
adjustment);
case TypeId::kTimestampTz:
return Literal::TimestampTz(std::get<int64_t>(literal.value()) +
adjustment);
+ case TypeId::kTimestampNs:
+ return Literal::TimestampNs(std::get<int64_t>(literal.value()) +
adjustment);
+ case TypeId::kTimestampTzNs:
+ return Literal::TimestampTzNs(std::get<int64_t>(literal.value()) +
adjustment);
case TypeId::kDecimal: {
const auto& decimal_type =
internal::checked_cast<const DecimalType&>(*literal.type());
@@ -143,6 +147,8 @@ class ProjectionUtil {
case TypeId::kDate:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
+ case TypeId::kTimestampNs:
+ case TypeId::kTimestampTzNs:
break;
default:
return NotSupported("{} is not a valid input type for numeric
transform",
@@ -179,6 +185,8 @@ class ProjectionUtil {
case TypeId::kDate:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
+ case TypeId::kTimestampNs:
+ case TypeId::kTimestampTzNs:
break;
default:
return NotSupported("{} is not a valid input type for numeric
transform",
diff --git a/src/iceberg/util/struct_like_set.cc
b/src/iceberg/util/struct_like_set.cc
index cc3de529..433cfa68 100644
--- a/src/iceberg/util/struct_like_set.cc
+++ b/src/iceberg/util/struct_like_set.cc
@@ -276,6 +276,8 @@ Status ValidateScalarAgainstType(const Scalar& scalar,
const Type& type) {
case TypeId::kTime:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
+ case TypeId::kTimestampNs:
+ case TypeId::kTimestampTzNs:
ICEBERG_PRECHECK(std::holds_alternative<int64_t>(scalar), "Expected {}
but got {}",
type.ToString(), ScalarTypeName(scalar));
return {};
diff --git a/src/iceberg/util/temporal_util.cc
b/src/iceberg/util/temporal_util.cc
index 05aafb96..b91fcec7 100644
--- a/src/iceberg/util/temporal_util.cc
+++ b/src/iceberg/util/temporal_util.cc
@@ -21,9 +21,11 @@
#include <chrono>
#include <cstdint>
+#include <limits>
#include <utility>
#include "iceberg/expression/literal.h"
+#include "iceberg/util/int128.h"
namespace iceberg {
@@ -31,9 +33,28 @@ namespace {
using namespace std::chrono; // NOLINT
+constexpr int64_t kNanosPerMicro = 1000;
+
constexpr auto kEpochYmd = year{1970} / January / 1;
constexpr auto kEpochDays = sys_days(kEpochYmd);
+inline constexpr int64_t FloorDiv(int64_t dividend, int64_t divisor) {
+ const auto quotient = dividend / divisor;
+ if ((dividend ^ divisor) < 0 && quotient * divisor != dividend) {
+ return quotient - 1;
+ }
+ return quotient;
+}
+
+Result<int64_t> MultiplyExact(int64_t lhs, int64_t rhs) {
+ const auto result = static_cast<int128_t>(lhs) * static_cast<int128_t>(rhs);
+ if (result > std::numeric_limits<int64_t>::max() ||
+ result < std::numeric_limits<int64_t>::min()) [[unlikely]] {
+ return InvalidArgument("Long overflow when multiplying {} by {}", lhs,
rhs);
+ }
+ return static_cast<int64_t>(result);
+}
+
inline constexpr year_month_day DateToYmd(int32_t days_since_epoch) {
return {kEpochDays + days{days_since_epoch}};
}
@@ -42,6 +63,10 @@ inline constexpr year_month_day TimestampToYmd(int64_t
micros_since_epoch) {
return
{floor<days>(sys_time<microseconds>(microseconds{micros_since_epoch}))};
}
+inline constexpr year_month_day TimestampNsToYmd(int64_t nanos_since_epoch) {
+ return {floor<days>(sys_time<nanoseconds>(nanoseconds{nanos_since_epoch}))};
+}
+
template <typename Duration>
requires std::is_same_v<Duration, days> || std::is_same_v<Duration, hours>
inline constexpr int32_t TimestampToDuration(int64_t micros_since_epoch) {
@@ -51,6 +76,15 @@ inline constexpr int32_t TimestampToDuration(int64_t
micros_since_epoch) {
.count());
}
+template <typename Duration>
+ requires std::is_same_v<Duration, days> || std::is_same_v<Duration, hours>
+inline constexpr int32_t TimestampNsToDuration(int64_t nanos_since_epoch) {
+ return static_cast<int32_t>(
+ floor<Duration>(
+
sys_time<nanoseconds>(nanoseconds{nanos_since_epoch}).time_since_epoch())
+ .count());
+}
+
inline constexpr int32_t MonthsSinceEpoch(const year_month_day& ymd) {
auto delta = ymd.year() - kEpochYmd.year();
// Calculate the month as months from 1970-01
@@ -78,11 +112,23 @@ Result<Literal> ExtractYearImpl<TypeId::kTimestamp>(const
Literal& literal) {
return Literal::Int((ymd.year() - kEpochYmd.year()).count());
}
+template <>
+Result<Literal> ExtractYearImpl<TypeId::kTimestampNs>(const Literal& literal) {
+ auto value = std::get<int64_t>(literal.value());
+ auto ymd = TimestampNsToYmd(value);
+ return Literal::Int((ymd.year() - kEpochYmd.year()).count());
+}
+
template <>
Result<Literal> ExtractYearImpl<TypeId::kTimestampTz>(const Literal& literal) {
return ExtractYearImpl<TypeId::kTimestamp>(literal);
}
+template <>
+Result<Literal> ExtractYearImpl<TypeId::kTimestampTzNs>(const Literal&
literal) {
+ return ExtractYearImpl<TypeId::kTimestampNs>(literal);
+}
+
template <TypeId type_id>
Result<Literal> ExtractMonthImpl(const Literal& literal) {
std::unreachable();
@@ -102,11 +148,23 @@ Result<Literal>
ExtractMonthImpl<TypeId::kTimestamp>(const Literal& literal) {
return Literal::Int(MonthsSinceEpoch(ymd));
}
+template <>
+Result<Literal> ExtractMonthImpl<TypeId::kTimestampNs>(const Literal& literal)
{
+ auto value = std::get<int64_t>(literal.value());
+ auto ymd = TimestampNsToYmd(value);
+ return Literal::Int(MonthsSinceEpoch(ymd));
+}
+
template <>
Result<Literal> ExtractMonthImpl<TypeId::kTimestampTz>(const Literal& literal)
{
return ExtractMonthImpl<TypeId::kTimestamp>(literal);
}
+template <>
+Result<Literal> ExtractMonthImpl<TypeId::kTimestampTzNs>(const Literal&
literal) {
+ return ExtractMonthImpl<TypeId::kTimestampNs>(literal);
+}
+
template <TypeId type_id>
Result<Literal> ExtractDayImpl(const Literal& literal) {
std::unreachable();
@@ -123,11 +181,22 @@ Result<Literal> ExtractDayImpl<TypeId::kTimestamp>(const
Literal& literal) {
return Literal::Int(TimestampToDuration<days>(value));
}
+template <>
+Result<Literal> ExtractDayImpl<TypeId::kTimestampNs>(const Literal& literal) {
+ auto value = std::get<int64_t>(literal.value());
+ return Literal::Int(TimestampNsToDuration<days>(value));
+}
+
template <>
Result<Literal> ExtractDayImpl<TypeId::kTimestampTz>(const Literal& literal) {
return ExtractDayImpl<TypeId::kTimestamp>(literal);
}
+template <>
+Result<Literal> ExtractDayImpl<TypeId::kTimestampTzNs>(const Literal& literal)
{
+ return ExtractDayImpl<TypeId::kTimestampNs>(literal);
+}
+
template <TypeId type_id>
Result<Literal> ExtractHourImpl(const Literal& literal) {
std::unreachable();
@@ -139,13 +208,32 @@ Result<Literal> ExtractHourImpl<TypeId::kTimestamp>(const
Literal& literal) {
return Literal::Int(TimestampToDuration<hours>(value));
}
+template <>
+Result<Literal> ExtractHourImpl<TypeId::kTimestampNs>(const Literal& literal) {
+ auto value = std::get<int64_t>(literal.value());
+ return Literal::Int(TimestampNsToDuration<hours>(value));
+}
+
template <>
Result<Literal> ExtractHourImpl<TypeId::kTimestampTz>(const Literal& literal) {
return ExtractHourImpl<TypeId::kTimestamp>(literal);
}
+template <>
+Result<Literal> ExtractHourImpl<TypeId::kTimestampTzNs>(const Literal&
literal) {
+ return ExtractHourImpl<TypeId::kTimestampNs>(literal);
+}
+
} // namespace
+int64_t TemporalUtils::NanosToMicros(int64_t nanos) {
+ return FloorDiv(nanos, kNanosPerMicro);
+}
+
+Result<int64_t> TemporalUtils::MicrosToNanos(int64_t micros) {
+ return MultiplyExact(micros, kNanosPerMicro);
+}
+
#define DISPATCH_EXTRACT_YEAR(type_id) \
case type_id: \
return ExtractYearImpl<type_id>(literal);
@@ -163,6 +251,8 @@ Result<Literal> TemporalUtils::ExtractYear(const Literal&
literal) {
DISPATCH_EXTRACT_YEAR(TypeId::kDate)
DISPATCH_EXTRACT_YEAR(TypeId::kTimestamp)
DISPATCH_EXTRACT_YEAR(TypeId::kTimestampTz)
+ DISPATCH_EXTRACT_YEAR(TypeId::kTimestampNs)
+ DISPATCH_EXTRACT_YEAR(TypeId::kTimestampTzNs)
default:
return NotSupported("Extract year from type {} is not supported",
literal.type()->ToString());
@@ -186,6 +276,8 @@ Result<Literal> TemporalUtils::ExtractMonth(const Literal&
literal) {
DISPATCH_EXTRACT_MONTH(TypeId::kDate)
DISPATCH_EXTRACT_MONTH(TypeId::kTimestamp)
DISPATCH_EXTRACT_MONTH(TypeId::kTimestampTz)
+ DISPATCH_EXTRACT_MONTH(TypeId::kTimestampNs)
+ DISPATCH_EXTRACT_MONTH(TypeId::kTimestampTzNs)
default:
return NotSupported("Extract month from type {} is not supported",
literal.type()->ToString());
@@ -209,6 +301,8 @@ Result<Literal> TemporalUtils::ExtractDay(const Literal&
literal) {
DISPATCH_EXTRACT_DAY(TypeId::kDate)
DISPATCH_EXTRACT_DAY(TypeId::kTimestamp)
DISPATCH_EXTRACT_DAY(TypeId::kTimestampTz)
+ DISPATCH_EXTRACT_DAY(TypeId::kTimestampNs)
+ DISPATCH_EXTRACT_DAY(TypeId::kTimestampTzNs)
default:
return NotSupported("Extract day from type {} is not supported",
literal.type()->ToString());
@@ -231,6 +325,8 @@ Result<Literal> TemporalUtils::ExtractHour(const Literal&
literal) {
switch (literal.type()->type_id()) {
DISPATCH_EXTRACT_HOUR(TypeId::kTimestamp)
DISPATCH_EXTRACT_HOUR(TypeId::kTimestampTz)
+ DISPATCH_EXTRACT_HOUR(TypeId::kTimestampNs)
+ DISPATCH_EXTRACT_HOUR(TypeId::kTimestampTzNs)
default:
return NotSupported("Extract hour from type {} is not supported",
literal.type()->ToString());
diff --git a/src/iceberg/util/temporal_util.h b/src/iceberg/util/temporal_util.h
index 750c3d8b..414e4fd2 100644
--- a/src/iceberg/util/temporal_util.h
+++ b/src/iceberg/util/temporal_util.h
@@ -19,6 +19,8 @@
#pragma once
+#include <cstdint>
+
#include "iceberg/iceberg_export.h"
#include "iceberg/result.h"
#include "iceberg/type_fwd.h"
@@ -27,6 +29,12 @@ namespace iceberg {
class ICEBERG_EXPORT TemporalUtils {
public:
+ /// \brief Convert nanoseconds since epoch to microseconds using floor
division.
+ static int64_t NanosToMicros(int64_t nanos);
+
+ /// \brief Convert microseconds since epoch to nanoseconds, failing on
overflow.
+ static Result<int64_t> MicrosToNanos(int64_t micros);
+
/// \brief Extract a date or timestamp year, as years from 1970
static Result<Literal> ExtractYear(const Literal& literal);
diff --git a/src/iceberg/util/transform_util.cc
b/src/iceberg/util/transform_util.cc
index a9221310..fc1b104e 100644
--- a/src/iceberg/util/transform_util.cc
+++ b/src/iceberg/util/transform_util.cc
@@ -21,7 +21,9 @@
#include <array>
#include <chrono>
+#include <limits>
+#include "iceberg/util/int128.h"
#include "iceberg/util/macros.h"
#include "iceberg/util/string_util.h"
@@ -32,6 +34,9 @@ constexpr auto kEpochDate = std::chrono::year{1970} /
std::chrono::January / 1;
constexpr int64_t kMicrosPerMillis = 1'000;
constexpr int64_t kMicrosPerSecond = 1'000'000;
constexpr int64_t kMicrosPerDay = 86'400'000'000LL;
+constexpr int64_t kNanosPerMillis = 1'000'000;
+constexpr int64_t kNanosPerSecond = 1'000'000'000;
+constexpr int64_t kNanosPerDay = 86'400'000'000'000LL;
/// Parse a timezone offset of the form "+HH:mm" or "-HH:mm" and return the
/// offset in microseconds (positive for east of UTC, negative for west).
@@ -44,28 +49,124 @@ Result<int64_t> ParseTimezoneOffset(std::string_view
offset) {
StringUtils::ParseNumber<int64_t>(offset.substr(1,
2)));
ICEBERG_ASSIGN_OR_RAISE(auto minutes,
StringUtils::ParseNumber<int64_t>(offset.substr(4,
2)));
- if (hours > 18 || minutes > 59) {
+ if (hours > 18 || minutes > 59) [[unlikely]] {
return InvalidArgument("Invalid timezone offset: '{}'", offset);
}
+
+ if (hours == 18 && minutes != 0) [[unlikely]] {
+ return InvalidArgument("Timezone offset '{}' not in range [-18:00,
+18:00]", offset);
+ }
+
auto micros = hours * 3'600 * kMicrosPerSecond + minutes * 60 *
kMicrosPerSecond;
return negative ? -micros : micros;
}
+Result<std::pair<std::string_view, int64_t>> ParseTimestampWithZoneSuffix(
+ std::string_view str) {
+ if (str.empty()) [[unlikely]] {
+ return InvalidArgument("Invalid timestamptz string: '{}'", str);
+ }
+
+ int64_t offset_micros = 0;
+ std::string_view timestamp_part;
+
+ if (str.back() == 'Z') {
+ timestamp_part = str.substr(0, str.size() - 1);
+ } else if (str.size() >= 6 &&
+ (str[str.size() - 6] == '+' || str[str.size() - 6] == '-')) {
+ // Parse "+HH:mm" or "-HH:mm" offset suffix
+ ICEBERG_ASSIGN_OR_RAISE(offset_micros,
+ ParseTimezoneOffset(str.substr(str.size() - 6)));
+ timestamp_part = str.substr(0, str.size() - 6);
+ } else {
+ return InvalidArgument("Invalid timestamptz string (missing timezone
suffix): '{}'",
+ str);
+ }
+
+ return std::make_pair(timestamp_part, offset_micros);
+}
+
+Result<int64_t> TimestampFromDayTime(int32_t days, int64_t time_units,
+ int64_t units_per_day, int64_t
offset_micros,
+ int64_t units_per_micro) {
+ const auto offset_units =
+ static_cast<int128_t>(offset_micros) *
static_cast<int128_t>(units_per_micro);
+ const auto timestamp =
+ static_cast<int128_t>(days) * static_cast<int128_t>(units_per_day) +
+ static_cast<int128_t>(time_units) - offset_units;
+
+ if (timestamp > std::numeric_limits<int64_t>::max() ||
+ timestamp < std::numeric_limits<int64_t>::min()) [[unlikely]] {
+ return InvalidArgument("Timestamp value is out of int64 range");
+ }
+
+ return static_cast<int64_t>(timestamp);
+}
+
/// Parse fractional seconds (after '.') and return micros.
/// Digits beyond 6 are truncated (nanosecond precision).
Result<int64_t> ParseFractionalMicros(std::string_view frac) {
- if (frac.empty()) {
+ if (frac.empty() || frac.size() > 9) [[unlikely]] {
return InvalidArgument("Invalid fractional seconds: '{}'", frac);
}
// Truncate to microsecond precision (6 digits), matching Java
ISO_LOCAL_TIME behavior
if (frac.size() > 6) frac = frac.substr(0, 6);
ICEBERG_ASSIGN_OR_RAISE(auto val, StringUtils::ParseNumber<int32_t>(frac));
- // Right-pad to 6 digits: "500" → 500000, "001" → 1000, "000001" → 1
+ // Right-pad to 6 digits: "500" -> 500000, "001" -> 1000, "000001" -> 1000
for (size_t i = frac.size(); i < 6; ++i) {
val *= 10;
}
return static_cast<int64_t>(val);
}
+
+/// Parse fractional seconds (after '.') and return nanos.
+Result<int64_t> ParseFractionalNanos(std::string_view frac) {
+ if (frac.empty() || frac.size() > 9) [[unlikely]] {
+ return InvalidArgument("Invalid fractional seconds: '{}'", frac);
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto val, StringUtils::ParseNumber<int32_t>(frac));
+ // Right-pad to 9 digits: "500" -> 500000000, "001" -> 1000000, "000001" ->
1000
+ for (size_t i = frac.size(); i < 9; ++i) {
+ val *= 10;
+ }
+ return static_cast<int64_t>(val);
+}
+
+template <typename TimeScaleParser>
+Result<int64_t> ParseTimeWithFraction(std::string_view str, int64_t
units_per_second,
+ TimeScaleParser&& parse_fraction) {
+ if (str.size() < 5 || str[2] != ':') [[unlikely]] {
+ return InvalidArgument("Invalid time string: '{}'", str);
+ }
+
+ ICEBERG_ASSIGN_OR_RAISE(auto hours,
+ StringUtils::ParseNumber<int64_t>(str.substr(0, 2)));
+ ICEBERG_ASSIGN_OR_RAISE(auto minutes,
+ StringUtils::ParseNumber<int64_t>(str.substr(3, 2)));
+ int64_t seconds = 0;
+
+ int64_t frac_units = 0;
+ if (str.size() > 5) {
+ if (str[5] != ':' || str.size() < 8) [[unlikely]] {
+ return InvalidArgument("Invalid time string: '{}'", str);
+ }
+ ICEBERG_ASSIGN_OR_RAISE(seconds,
StringUtils::ParseNumber<int64_t>(str.substr(6, 2)));
+ if (str.size() > 8) {
+ if (str[8] != '.') [[unlikely]] {
+ return InvalidArgument("Invalid time string: '{}'", str);
+ }
+ ICEBERG_ASSIGN_OR_RAISE(frac_units, parse_fraction(str.substr(9)));
+ }
+ }
+
+ if (hours < 0 || hours > 23 || minutes < 0 || minutes > 59 || seconds < 0 ||
+ seconds > 59) [[unlikely]] {
+ return InvalidArgument("Invalid time string: '{}'", str);
+ }
+
+ return hours * 3'600 * units_per_second + minutes * 60 * units_per_second +
+ seconds * units_per_second + frac_units;
+}
} // namespace
std::string TransformUtil::HumanYear(int32_t year_ordinal) {
@@ -79,7 +180,7 @@ std::string TransformUtil::HumanMonth(int32_t month_ordinal)
{
}
std::string TransformUtil::HumanDay(int32_t day_ordinal) {
- auto ymd = std::chrono::sys_days(kEpochDate) +
std::chrono::days{day_ordinal};
+ auto ymd = std::chrono::sys_days{kEpochDate} +
std::chrono::days{day_ordinal};
return std::format("{:%F}", ymd);
}
@@ -105,9 +206,14 @@ std::string TransformUtil::HumanTime(int64_t
micros_from_midnight) {
}
std::string TransformUtil::HumanTimestamp(int64_t timestamp_micros) {
+ const auto micros_since_epoch = std::chrono::microseconds{timestamp_micros};
+ const auto seconds_since_epoch =
+ std::chrono::floor<std::chrono::seconds>(micros_since_epoch);
auto tp = std::chrono::time_point<std::chrono::system_clock,
std::chrono::seconds>{
- std::chrono::seconds(timestamp_micros / kMicrosPerSecond)};
- auto micros = timestamp_micros % kMicrosPerSecond;
+ seconds_since_epoch};
+ auto micros =
std::chrono::duration_cast<std::chrono::microseconds>(micros_since_epoch -
+
seconds_since_epoch)
+ .count();
if (micros == 0) {
return std::format("{:%FT%T}", tp);
} else if (micros % kMicrosPerMillis == 0) {
@@ -117,10 +223,35 @@ std::string TransformUtil::HumanTimestamp(int64_t
timestamp_micros) {
}
}
+std::string TransformUtil::HumanTimestampNs(int64_t timestamp_nanos) {
+ const auto nanos_since_epoch = std::chrono::nanoseconds{timestamp_nanos};
+ const auto seconds_since_epoch =
+ std::chrono::floor<std::chrono::seconds>(nanos_since_epoch);
+ auto tp = std::chrono::time_point<std::chrono::system_clock,
std::chrono::seconds>{
+ seconds_since_epoch};
+ auto nanos =
std::chrono::duration_cast<std::chrono::nanoseconds>(nanos_since_epoch -
+
seconds_since_epoch)
+ .count();
+ if (nanos == 0) {
+ return std::format("{:%FT%T}", tp);
+ } else if (nanos % kNanosPerMillis == 0) {
+ return std::format("{:%FT%T}.{:03d}", tp, nanos / kNanosPerMillis);
+ } else if (nanos % kMicrosPerMillis == 0) {
+ return std::format("{:%FT%T}.{:06d}", tp, nanos / kMicrosPerMillis);
+ } else {
+ return std::format("{:%FT%T}.{:09d}", tp, nanos);
+ }
+}
+
std::string TransformUtil::HumanTimestampWithZone(int64_t timestamp_micros) {
+ const auto micros_since_epoch = std::chrono::microseconds{timestamp_micros};
+ const auto seconds_since_epoch =
+ std::chrono::floor<std::chrono::seconds>(micros_since_epoch);
auto tp = std::chrono::time_point<std::chrono::system_clock,
std::chrono::seconds>{
- std::chrono::seconds(timestamp_micros / kMicrosPerSecond)};
- auto micros = timestamp_micros % kMicrosPerSecond;
+ seconds_since_epoch};
+ auto micros =
std::chrono::duration_cast<std::chrono::microseconds>(micros_since_epoch -
+
seconds_since_epoch)
+ .count();
if (micros == 0) {
return std::format("{:%FT%T}+00:00", tp);
} else if (micros % kMicrosPerMillis == 0) {
@@ -130,6 +261,26 @@ std::string TransformUtil::HumanTimestampWithZone(int64_t
timestamp_micros) {
}
}
+std::string TransformUtil::HumanTimestampNsWithZone(int64_t timestamp_nanos) {
+ const auto nanos_since_epoch = std::chrono::nanoseconds{timestamp_nanos};
+ const auto seconds_since_epoch =
+ std::chrono::floor<std::chrono::seconds>(nanos_since_epoch);
+ auto tp = std::chrono::time_point<std::chrono::system_clock,
std::chrono::seconds>{
+ seconds_since_epoch};
+ auto nanos =
std::chrono::duration_cast<std::chrono::nanoseconds>(nanos_since_epoch -
+
seconds_since_epoch)
+ .count();
+ if (nanos == 0) {
+ return std::format("{:%FT%T}+00:00", tp);
+ } else if (nanos % kNanosPerMillis == 0) {
+ return std::format("{:%FT%T}.{:03d}+00:00", tp, nanos / kNanosPerMillis);
+ } else if (nanos % kMicrosPerMillis == 0) {
+ return std::format("{:%FT%T}.{:06d}+00:00", tp, nanos / kMicrosPerMillis);
+ } else {
+ return std::format("{:%FT%T}.{:09d}+00:00", tp, nanos);
+ }
+}
+
Result<int32_t> TransformUtil::ParseDay(std::string_view str) {
// Expected format: "[+-]yyyy-MM-dd"
// Parse year, month, day manually, skipping leading '+' or '-' to find
first date dash
@@ -156,82 +307,76 @@ Result<int32_t> TransformUtil::ParseDay(std::string_view
str) {
return InvalidArgument("Invalid date: '{}'", str);
}
- auto days = std::chrono::sys_days(ymd) - std::chrono::sys_days(kEpochDate);
+ auto days = std::chrono::sys_days{ymd} - std::chrono::sys_days{kEpochDate};
return static_cast<int32_t>(days.count());
}
Result<int64_t> TransformUtil::ParseTime(std::string_view str) {
- if (str.size() < 5 || str[2] != ':') [[unlikely]] {
- return InvalidArgument("Invalid time string: '{}'", str);
- }
+ return ParseTimeWithFraction(str, kMicrosPerSecond, ParseFractionalMicros);
+}
- ICEBERG_ASSIGN_OR_RAISE(auto hours,
- StringUtils::ParseNumber<int64_t>(str.substr(0, 2)));
- ICEBERG_ASSIGN_OR_RAISE(auto minutes,
- StringUtils::ParseNumber<int64_t>(str.substr(3, 2)));
- int64_t seconds = 0;
+Result<int64_t> TransformUtil::ParseTimeNs(std::string_view str) {
+ return ParseTimeWithFraction(str, kNanosPerSecond, ParseFractionalNanos);
+}
- int64_t frac_micros = 0;
- if (str.size() > 5) {
- if (str[5] != ':' || str.size() < 8) [[unlikely]] {
- return InvalidArgument("Invalid time string: '{}'", str);
- }
- ICEBERG_ASSIGN_OR_RAISE(seconds,
StringUtils::ParseNumber<int64_t>(str.substr(6, 2)));
- if (str.size() > 8) {
- if (str[8] != '.') [[unlikely]] {
- return InvalidArgument("Invalid time string: '{}'", str);
- }
- ICEBERG_ASSIGN_OR_RAISE(frac_micros,
ParseFractionalMicros(str.substr(9)));
- }
+Result<int64_t> TransformUtil::ParseTimestamp(std::string_view str) {
+ auto t_pos = str.find('T');
+ if (t_pos == std::string_view::npos) [[unlikely]] {
+ return InvalidArgument("Invalid timestamp string (missing 'T'): '{}'",
str);
}
- // check that hours, minutes, seconds are in valid ranges
- if (hours < 0 || hours > 23 || minutes < 0 || minutes > 59 || seconds < 0 ||
- seconds > 59) [[unlikely]] {
- return InvalidArgument("Invalid time string: '{}'", str);
- }
+ ICEBERG_ASSIGN_OR_RAISE(auto days, ParseDay(str.substr(0, t_pos)));
+ ICEBERG_ASSIGN_OR_RAISE(auto time_micros, ParseTime(str.substr(t_pos + 1)));
- return hours * 3'600 * kMicrosPerSecond + minutes * 60 * kMicrosPerSecond +
- seconds * kMicrosPerSecond + frac_micros;
+ return TimestampFromDayTime(days, time_micros, kMicrosPerDay,
/*offset_micros=*/0,
+ /*units_per_micro=*/1);
}
-Result<int64_t> TransformUtil::ParseTimestamp(std::string_view str) {
- // Format: "yyyy-MM-ddTHH:mm:ss[.SSS[SSS]]"
+Result<int64_t> TransformUtil::ParseTimestampNs(std::string_view str) {
auto t_pos = str.find('T');
if (t_pos == std::string_view::npos) [[unlikely]] {
return InvalidArgument("Invalid timestamp string (missing 'T'): '{}'",
str);
}
ICEBERG_ASSIGN_OR_RAISE(auto days, ParseDay(str.substr(0, t_pos)));
- ICEBERG_ASSIGN_OR_RAISE(auto time_micros, ParseTime(str.substr(t_pos + 1)));
+ ICEBERG_ASSIGN_OR_RAISE(auto time_nanos, ParseTimeNs(str.substr(t_pos + 1)));
- return static_cast<int64_t>(days) * kMicrosPerDay + time_micros;
+ return TimestampFromDayTime(days, time_nanos, kNanosPerDay,
/*offset_micros=*/0,
+ /*units_per_micro=*/1'000);
}
Result<int64_t> TransformUtil::ParseTimestampWithZone(std::string_view str) {
- if (str.empty()) [[unlikely]] {
- return InvalidArgument("Invalid timestamptz string: '{}'", str);
+ ICEBERG_ASSIGN_OR_RAISE(auto timestamp_with_offset,
ParseTimestampWithZoneSuffix(str));
+ const auto [timestamp_part, offset_micros] = timestamp_with_offset;
+
+ auto t_pos = timestamp_part.find('T');
+ if (t_pos == std::string_view::npos) [[unlikely]] {
+ return InvalidArgument("Invalid timestamp string (missing 'T'): '{}'",
+ timestamp_part);
}
- int64_t offset_micros = 0;
- std::string_view timestamp_part;
+ ICEBERG_ASSIGN_OR_RAISE(auto days, ParseDay(timestamp_part.substr(0,
t_pos)));
+ ICEBERG_ASSIGN_OR_RAISE(auto time_micros,
ParseTime(timestamp_part.substr(t_pos + 1)));
- if (str.back() == 'Z') {
- // "Z" suffix means UTC (offset = 0)
- timestamp_part = str.substr(0, str.size() - 1);
- } else if (str.size() >= 6 &&
- (str[str.size() - 6] == '+' || str[str.size() - 6] == '-')) {
- // Parse "+HH:mm" or "-HH:mm" offset suffix
- ICEBERG_ASSIGN_OR_RAISE(offset_micros,
- ParseTimezoneOffset(str.substr(str.size() - 6)));
- timestamp_part = str.substr(0, str.size() - 6);
- } else {
- return InvalidArgument("Invalid timestamptz string (missing timezone
suffix): '{}'",
- str);
+ return TimestampFromDayTime(days, time_micros, kMicrosPerDay, offset_micros,
+ /*units_per_micro=*/1);
+}
+
+Result<int64_t> TransformUtil::ParseTimestampNsWithZone(std::string_view str) {
+ ICEBERG_ASSIGN_OR_RAISE(auto timestamp_with_offset,
ParseTimestampWithZoneSuffix(str));
+ const auto [timestamp_part, offset_micros] = timestamp_with_offset;
+
+ auto t_pos = timestamp_part.find('T');
+ if (t_pos == std::string_view::npos) [[unlikely]] {
+ return InvalidArgument("Invalid timestamp string (missing 'T'): '{}'",
+ timestamp_part);
}
- ICEBERG_ASSIGN_OR_RAISE(auto local_micros, ParseTimestamp(timestamp_part));
- return local_micros - offset_micros;
+ ICEBERG_ASSIGN_OR_RAISE(auto days, ParseDay(timestamp_part.substr(0,
t_pos)));
+ ICEBERG_ASSIGN_OR_RAISE(auto time_nanos,
ParseTimeNs(timestamp_part.substr(t_pos + 1)));
+
+ return TimestampFromDayTime(days, time_nanos, kNanosPerDay, offset_micros,
+ /*units_per_micro=*/1'000);
}
std::string TransformUtil::Base64Encode(std::string_view str_to_encode) {
diff --git a/src/iceberg/util/transform_util.h
b/src/iceberg/util/transform_util.h
index c23d08c8..b9c69209 100644
--- a/src/iceberg/util/transform_util.h
+++ b/src/iceberg/util/transform_util.h
@@ -23,6 +23,7 @@
#include "iceberg/iceberg_export.h"
#include "iceberg/result.h"
+#include "iceberg/type_fwd.h"
namespace iceberg {
@@ -86,6 +87,19 @@ class ICEBERG_EXPORT TransformUtil {
/// \return a string representation of this timestamp.
static std::string HumanTimestamp(int64_t timestamp_micros);
+ /// \brief Returns a string representation of a timestamp in nanoseconds.
+ ///
+ /// The output will be one of the following forms, according to the
precision of the
+ /// timestamp:
+ /// - yyyy-MM-ddTHH:mm:ss
+ /// - yyyy-MM-ddTHH:mm:ss.SSS
+ /// - yyyy-MM-ddTHH:mm:ss.SSSSSS
+ /// - yyyy-MM-ddTHH:mm:ss.SSSSSSSSS
+ ///
+ /// \param timestamp_nanos the timestamp in nanoseconds.
+ /// \return a string representation of this timestamp.
+ static std::string HumanTimestampNs(int64_t timestamp_nanos);
+
/// \brief Returns a human-readable string representation of a timestamp
with a time
/// zone.
///
@@ -99,6 +113,20 @@ class ICEBERG_EXPORT TransformUtil {
/// \return a string representation of this timestamp.
static std::string HumanTimestampWithZone(int64_t timestamp_micros);
+ /// \brief Returns a string representation of a timestamp in nanoseconds
with a time
+ /// zone.
+ ///
+ /// The output will be one of the following forms, according to the
precision of the
+ /// timestamp:
+ /// - yyyy-MM-ddTHH:mm:ss+00:00
+ /// - yyyy-MM-ddTHH:mm:ss.SSS+00:00
+ /// - yyyy-MM-ddTHH:mm:ss.SSSSSS+00:00
+ /// - yyyy-MM-ddTHH:mm:ss.SSSSSSSSS+00:00
+ ///
+ /// \param timestamp_nanos the timestamp in nanoseconds.
+ /// \return a string representation of this timestamp.
+ static std::string HumanTimestampNsWithZone(int64_t timestamp_nanos);
+
/// \brief Parses a date string in "[+-]yyyy-MM-dd" format into days since
epoch.
///
/// Supports an optional '+' or '-' prefix for extended years beyond 9999.
@@ -117,6 +145,16 @@ class ICEBERG_EXPORT TransformUtil {
/// \return The number of microseconds from midnight, or an error.
static Result<int64_t> ParseTime(std::string_view str);
+ /// \brief Parses a time string into nanoseconds from midnight.
+ ///
+ /// Accepts ISO-8601 local time formats: "HH:mm", "HH:mm:ss", or
+ /// "HH:mm:ss.f" where the fractional part can be 1-9 digits.
+ /// Digits beyond 9 (nanosecond precision) are truncated.
+ ///
+ /// \param str The time string to parse.
+ /// \return The number of nanoseconds from midnight, or an error.
+ static Result<int64_t> ParseTimeNs(std::string_view str);
+
/// \brief Parses a timestamp string into microseconds since epoch.
///
/// Accepts ISO-8601 local date-time formats: "yyyy-MM-ddTHH:mm",
@@ -127,6 +165,16 @@ class ICEBERG_EXPORT TransformUtil {
/// \return The number of microseconds since epoch, or an error.
static Result<int64_t> ParseTimestamp(std::string_view str);
+ /// \brief Parses a timestamp string into nanoseconds since epoch.
+ ///
+ /// Accepts ISO-8601 local date-time formats: "yyyy-MM-ddTHH:mm",
+ /// "yyyy-MM-ddTHH:mm:ss", or "yyyy-MM-ddTHH:mm:ss.f" where the
+ /// fractional part can be 1-9 digits.
+ ///
+ /// \param str The timestamp string to parse.
+ /// \return The number of nanoseconds since epoch, or an error.
+ static Result<int64_t> ParseTimestampNs(std::string_view str);
+
/// \brief Parses a timestamp-with-zone string into microseconds since epoch
(UTC).
///
/// Accepts the same formats as ParseTimestamp, with a timezone suffix:
@@ -137,6 +185,16 @@ class ICEBERG_EXPORT TransformUtil {
/// \return The number of microseconds since epoch (UTC), or an error.
static Result<int64_t> ParseTimestampWithZone(std::string_view str);
+ /// \brief Parses a timestamp-with-zone string into nanoseconds since epoch
(UTC).
+ ///
+ /// Accepts the same formats as ParseTimestampNs, with a timezone suffix:
+ /// "Z", "+HH:mm", or "-HH:mm". Non-UTC offsets are converted to UTC.
+ /// The seconds and fractional parts are optional (e.g.
"yyyy-MM-ddTHH:mm+00:00").
+ ///
+ /// \param str The timestamp string to parse.
+ /// \return The number of nanoseconds since epoch (UTC), or an error.
+ static Result<int64_t> ParseTimestampNsWithZone(std::string_view str);
+
/// \brief Base64 encode a string
static std::string Base64Encode(std::string_view str_to_encode);
};
diff --git a/src/iceberg/util/visitor_generate.h
b/src/iceberg/util/visitor_generate.h
index 053371d4..7a364854 100644
--- a/src/iceberg/util/visitor_generate.h
+++ b/src/iceberg/util/visitor_generate.h
@@ -32,6 +32,8 @@ namespace iceberg {
ACTION(Time); \
ACTION(Timestamp); \
ACTION(TimestampTz); \
+ ACTION(TimestampNs); \
+ ACTION(TimestampTzNs); \
ACTION(String); \
ACTION(Uuid); \
ACTION(Fixed); \