This is an automated email from the ASF dual-hosted git repository.
wgtmac pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-cpp.git
The following commit(s) were added to refs/heads/main by this push:
new 76fa723b refactor: move temporal utilities out of transform util (#675)
76fa723b is described below
commit 76fa723b4919d581fcf105e6c62fd09be35660b9
Author: Junwang Zhao <[email protected]>
AuthorDate: Mon May 25 16:55:41 2026 +0800
refactor: move temporal utilities out of transform util (#675)
Also add internal math helpers for floor division and checked
multiplication. Keep the Human* formatting helpers in TransformUtil to
stay consistent with the Java TransformUtil implementation.
---
src/iceberg/expression/json_serde.cc | 13 +-
src/iceberg/expression/literal.cc | 13 +-
src/iceberg/test/CMakeLists.txt | 2 +
...lusive_metrics_evaluator_with_transform_test.cc | 6 +-
src/iceberg/test/literal_test.cc | 10 +-
src/iceberg/test/math_util_internal_test.cc | 56 ++++
src/iceberg/test/meson.build | 2 +
src/iceberg/test/temporal_test_helper.h | 7 +-
src/iceberg/test/temporal_util_test.cc | 230 +++++++++++++++++
src/iceberg/test/transform_util_test.cc | 275 --------------------
src/iceberg/transform.cc | 1 +
.../util/{temporal_util.h => math_util_internal.h} | 40 ++-
src/iceberg/util/temporal_util.cc | 252 ++++++++++++++++--
src/iceberg/util/temporal_util.h | 90 +++++++
src/iceberg/util/transform_util.cc | 282 ++-------------------
src/iceberg/util/transform_util.h | 72 +-----
16 files changed, 679 insertions(+), 672 deletions(-)
diff --git a/src/iceberg/expression/json_serde.cc
b/src/iceberg/expression/json_serde.cc
index 065f41cf..df8aba88 100644
--- a/src/iceberg/expression/json_serde.cc
+++ b/src/iceberg/expression/json_serde.cc
@@ -33,6 +33,7 @@
#include "iceberg/util/json_util_internal.h"
#include "iceberg/util/macros.h"
#include "iceberg/util/string_util.h"
+#include "iceberg/util/temporal_util.h"
#include "iceberg/util/transform_util.h"
namespace iceberg {
@@ -363,7 +364,7 @@ Result<Literal> LiteralFromJson(const nlohmann::json& json,
const Type* type) {
return JsonParseError("Cannot parse {} as a date value",
SafeDumpJson(json));
}
ICEBERG_ASSIGN_OR_RAISE(auto days,
-
TransformUtil::ParseDay(json.get<std::string>()));
+
TemporalUtils::ParseDay(json.get<std::string>()));
return Literal::Date(days);
}
@@ -372,7 +373,7 @@ Result<Literal> LiteralFromJson(const nlohmann::json& json,
const Type* type) {
return JsonParseError("Cannot parse {} as a time value",
SafeDumpJson(json));
}
ICEBERG_ASSIGN_OR_RAISE(auto micros,
-
TransformUtil::ParseTime(json.get<std::string>()));
+
TemporalUtils::ParseTime(json.get<std::string>()));
return Literal::Time(micros);
}
@@ -381,7 +382,7 @@ Result<Literal> LiteralFromJson(const nlohmann::json& json,
const Type* type) {
return JsonParseError("Cannot parse {} as a timestamp value",
SafeDumpJson(json));
}
ICEBERG_ASSIGN_OR_RAISE(auto micros,
-
TransformUtil::ParseTimestamp(json.get<std::string>()));
+
TemporalUtils::ParseTimestamp(json.get<std::string>()));
return Literal::Timestamp(micros);
}
@@ -391,7 +392,7 @@ Result<Literal> LiteralFromJson(const nlohmann::json& json,
const Type* type) {
SafeDumpJson(json));
}
ICEBERG_ASSIGN_OR_RAISE(
- auto micros,
TransformUtil::ParseTimestampWithZone(json.get<std::string>()));
+ auto micros,
TemporalUtils::ParseTimestampWithZone(json.get<std::string>()));
return Literal::TimestampTz(micros);
}
@@ -401,7 +402,7 @@ Result<Literal> LiteralFromJson(const nlohmann::json& json,
const Type* type) {
SafeDumpJson(json));
}
ICEBERG_ASSIGN_OR_RAISE(auto nanos,
-
TransformUtil::ParseTimestampNs(json.get<std::string>()));
+
TemporalUtils::ParseTimestampNs(json.get<std::string>()));
return Literal::TimestampNs(nanos);
}
@@ -411,7 +412,7 @@ Result<Literal> LiteralFromJson(const nlohmann::json& json,
const Type* type) {
SafeDumpJson(json));
}
ICEBERG_ASSIGN_OR_RAISE(
- auto nanos,
TransformUtil::ParseTimestampNsWithZone(json.get<std::string>()));
+ auto nanos,
TemporalUtils::ParseTimestampNsWithZone(json.get<std::string>()));
return Literal::TimestampTzNs(nanos);
}
diff --git a/src/iceberg/expression/literal.cc
b/src/iceberg/expression/literal.cc
index cbf0ab01..d11ab265 100644
--- a/src/iceberg/expression/literal.cc
+++ b/src/iceberg/expression/literal.cc
@@ -32,7 +32,6 @@
#include "iceberg/util/macros.h"
#include "iceberg/util/string_util.h"
#include "iceberg/util/temporal_util.h"
-#include "iceberg/util/transform_util.h"
namespace iceberg {
@@ -203,29 +202,29 @@ Result<Literal> LiteralCaster::CastFromString(
return Literal::UUID(uuid);
}
case TypeId::kDate: {
- ICEBERG_ASSIGN_OR_RAISE(auto days, TransformUtil::ParseDay(str_val));
+ ICEBERG_ASSIGN_OR_RAISE(auto days, TemporalUtils::ParseDay(str_val));
return Literal::Date(days);
}
case TypeId::kTime: {
- ICEBERG_ASSIGN_OR_RAISE(auto micros, TransformUtil::ParseTime(str_val));
+ ICEBERG_ASSIGN_OR_RAISE(auto micros, TemporalUtils::ParseTime(str_val));
return Literal::Time(micros);
}
case TypeId::kTimestamp: {
- ICEBERG_ASSIGN_OR_RAISE(auto micros,
TransformUtil::ParseTimestamp(str_val));
+ ICEBERG_ASSIGN_OR_RAISE(auto micros,
TemporalUtils::ParseTimestamp(str_val));
return Literal::Timestamp(micros);
}
case TypeId::kTimestampTz: {
ICEBERG_ASSIGN_OR_RAISE(auto micros,
- TransformUtil::ParseTimestampWithZone(str_val));
+ TemporalUtils::ParseTimestampWithZone(str_val));
return Literal::TimestampTz(micros);
}
case TypeId::kTimestampNs: {
- ICEBERG_ASSIGN_OR_RAISE(auto nanos,
TransformUtil::ParseTimestampNs(str_val));
+ ICEBERG_ASSIGN_OR_RAISE(auto nanos,
TemporalUtils::ParseTimestampNs(str_val));
return Literal::TimestampNs(nanos);
}
case TypeId::kTimestampTzNs: {
ICEBERG_ASSIGN_OR_RAISE(auto nanos,
-
TransformUtil::ParseTimestampNsWithZone(str_val));
+
TemporalUtils::ParseTimestampNsWithZone(str_val));
return Literal::TimestampTzNs(nanos);
}
case TypeId::kBinary: {
diff --git a/src/iceberg/test/CMakeLists.txt b/src/iceberg/test/CMakeLists.txt
index c632403c..7b546267 100644
--- a/src/iceberg/test/CMakeLists.txt
+++ b/src/iceberg/test/CMakeLists.txt
@@ -128,11 +128,13 @@ add_iceberg_test(util_test
formatter_test.cc
lazy_test.cc
location_util_test.cc
+ math_util_internal_test.cc
roaring_position_bitmap_test.cc
position_delete_index_test.cc
retry_util_test.cc
string_util_test.cc
struct_like_set_test.cc
+ temporal_util_test.cc
transform_util_test.cc
truncate_util_test.cc
url_encoder_test.cc
diff --git
a/src/iceberg/test/inclusive_metrics_evaluator_with_transform_test.cc
b/src/iceberg/test/inclusive_metrics_evaluator_with_transform_test.cc
index 935f3c3a..4502cda7 100644
--- a/src/iceberg/test/inclusive_metrics_evaluator_with_transform_test.cc
+++ b/src/iceberg/test/inclusive_metrics_evaluator_with_transform_test.cc
@@ -30,6 +30,7 @@
#include "iceberg/schema.h"
#include "iceberg/test/matchers.h"
#include "iceberg/type.h"
+#include "iceberg/util/temporal_util.h"
namespace iceberg {
@@ -38,9 +39,8 @@ constexpr bool kRowsMightMatch = true;
constexpr bool kRowCannotMatch = false;
constexpr int64_t kIntMinValue = 30;
constexpr int64_t kIntMaxValue = 79;
-constexpr int64_t kMicrosPerDay = 86'400'000'000LL;
-constexpr int64_t kTsMinValue = 30 * kMicrosPerDay;
-constexpr int64_t kTsMaxValue = 79 * kMicrosPerDay;
+constexpr int64_t kTsMinValue = 30 * internal::kMicrosPerDay;
+constexpr int64_t kTsMaxValue = 79 * internal::kMicrosPerDay;
std::shared_ptr<UnboundTerm<BoundTransform>> ToBoundTransform(
const std::shared_ptr<UnboundTransform>& transform) {
diff --git a/src/iceberg/test/literal_test.cc b/src/iceberg/test/literal_test.cc
index e7753382..433c4fbe 100644
--- a/src/iceberg/test/literal_test.cc
+++ b/src/iceberg/test/literal_test.cc
@@ -29,6 +29,7 @@
#include "iceberg/test/matchers.h"
#include "iceberg/test/temporal_test_helper.h"
#include "iceberg/type.h"
+#include "iceberg/util/temporal_util.h"
namespace iceberg {
@@ -678,10 +679,11 @@ INSTANTIATE_TEST_SUITE_P(
.small_literal = Literal::Date(100),
.large_literal = Literal::Date(200),
.equal_literal = Literal::Date(100)},
- ComparisonLiteralTestParam{.test_name = "Time",
- .small_literal =
Literal::Time(43200000000LL),
- .large_literal =
Literal::Time(86400000000LL),
- .equal_literal =
Literal::Time(43200000000LL)},
+ ComparisonLiteralTestParam{
+ .test_name = "Time",
+ .small_literal = Literal::Time(internal::kMicrosPerDay / 2),
+ .large_literal = Literal::Time(internal::kMicrosPerDay),
+ .equal_literal = Literal::Time(internal::kMicrosPerDay / 2)},
ComparisonLiteralTestParam{.test_name = "Timestamp",
.small_literal =
Literal::Timestamp(1000000LL),
.large_literal =
Literal::Timestamp(2000000LL),
diff --git a/src/iceberg/test/math_util_internal_test.cc
b/src/iceberg/test/math_util_internal_test.cc
new file mode 100644
index 00000000..ac9ef36c
--- /dev/null
+++ b/src/iceberg/test/math_util_internal_test.cc
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/math_util_internal.h"
+
+#include <limits>
+
+#include <gtest/gtest.h>
+
+#include "iceberg/test/matchers.h"
+
+namespace iceberg {
+
+TEST(MathUtilInternalTest, FloorDiv) {
+ EXPECT_EQ(0, FloorDiv(0, 1000));
+ EXPECT_EQ(1, FloorDiv(1001, 1000));
+ EXPECT_EQ(-1, FloorDiv(-1, 1000));
+ EXPECT_EQ(-2, FloorDiv(-1001, 1000));
+ EXPECT_EQ(1, FloorDiv(-1001, -1000));
+ EXPECT_EQ(-2, FloorDiv(1001, -1000));
+}
+
+TEST(MathUtilInternalTest, MultiplyExact) {
+ ICEBERG_UNWRAP_OR_FAIL(auto positive, MultiplyExact(1000, 1000));
+ EXPECT_EQ(1000000, positive);
+
+ ICEBERG_UNWRAP_OR_FAIL(auto negative, MultiplyExact(-1000, 1000));
+ EXPECT_EQ(-1000000, negative);
+
+ ICEBERG_UNWRAP_OR_FAIL(auto min_value,
+ MultiplyExact(std::numeric_limits<int64_t>::min(),
1));
+ EXPECT_EQ(std::numeric_limits<int64_t>::min(), min_value);
+
+ EXPECT_THAT(MultiplyExact(std::numeric_limits<int64_t>::max(), 2),
+ IsError(ErrorKind::kInvalidArgument));
+ EXPECT_THAT(MultiplyExact(std::numeric_limits<int64_t>::min(), -1),
+ IsError(ErrorKind::kInvalidArgument));
+}
+
+} // namespace iceberg
diff --git a/src/iceberg/test/meson.build b/src/iceberg/test/meson.build
index 1acb46e9..6928ab82 100644
--- a/src/iceberg/test/meson.build
+++ b/src/iceberg/test/meson.build
@@ -92,11 +92,13 @@ iceberg_tests = {
'formatter_test.cc',
'lazy_test.cc',
'location_util_test.cc',
+ 'math_util_internal_test.cc',
'position_delete_index_test.cc',
'retry_util_test.cc',
'roaring_position_bitmap_test.cc',
'string_util_test.cc',
'struct_like_set_test.cc',
+ 'temporal_util_test.cc',
'transform_util_test.cc',
'truncate_util_test.cc',
'url_encoder_test.cc',
diff --git a/src/iceberg/test/temporal_test_helper.h
b/src/iceberg/test/temporal_test_helper.h
index 0f290489..c4ba3a15 100644
--- a/src/iceberg/test/temporal_test_helper.h
+++ b/src/iceberg/test/temporal_test_helper.h
@@ -22,6 +22,8 @@
#include <chrono>
#include <cstdint>
+#include "iceberg/util/temporal_util.h"
+
namespace iceberg {
using namespace std::chrono; // NOLINT
@@ -64,13 +66,12 @@ struct TimestampNanosParts {
};
class TemporalTestHelper {
- static constexpr auto kEpochDays = sys_days(year{1970} / January / 1);
-
public:
/// \brief Construct a Calendar date without timezone or time
static int32_t CreateDate(const DateParts& parts) {
return static_cast<int32_t>(
- (sys_days(year{parts.year} / month{parts.month} / day{parts.day}) -
kEpochDays)
+ (sys_days(year{parts.year} / month{parts.month} / day{parts.day}) -
+ internal::kEpochDays)
.count());
}
diff --git a/src/iceberg/test/temporal_util_test.cc
b/src/iceberg/test/temporal_util_test.cc
new file mode 100644
index 00000000..0d4426b0
--- /dev/null
+++ b/src/iceberg/test/temporal_util_test.cc
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "iceberg/util/temporal_util.h"
+
+#include <limits>
+#include <string>
+
+#include <gtest/gtest.h>
+
+#include "iceberg/test/matchers.h"
+
+namespace iceberg {
+
+TEST(TemporalUtilTest, ParseTimestampNs) {
+ ICEBERG_UNWRAP_OR_FAIL(
+ auto nanos,
TemporalUtils::ParseTimestampNs("2026-01-01T00:00:01.000001001"));
+ EXPECT_EQ(nanos, 1767225601000001001L);
+
+ ICEBERG_UNWRAP_OR_FAIL(auto pre_epoch_nanos, TemporalUtils::ParseTimestampNs(
+
"1969-12-31T23:59:59.123456789"));
+ EXPECT_EQ(pre_epoch_nanos, -876543211);
+}
+
+TEST(TemporalUtilTest, ParseTimestampNsChecksInt64Bounds) {
+ ICEBERG_UNWRAP_OR_FAIL(
+ auto max_nanos,
TemporalUtils::ParseTimestampNs("2262-04-11T23:47:16.854775807"));
+ EXPECT_EQ(max_nanos, std::numeric_limits<int64_t>::max());
+
+ ICEBERG_UNWRAP_OR_FAIL(
+ auto min_nanos,
TemporalUtils::ParseTimestampNs("1677-09-21T00:12:43.145224192"));
+ EXPECT_EQ(min_nanos, std::numeric_limits<int64_t>::min());
+
+ EXPECT_THAT(TemporalUtils::ParseTimestampNs("2262-04-11T23:47:16.854775808"),
+ IsError(ErrorKind::kInvalidArgument));
+ EXPECT_THAT(TemporalUtils::ParseTimestampNs("1677-09-21T00:12:43.145224191"),
+ IsError(ErrorKind::kInvalidArgument));
+}
+
+TEST(TemporalUtilTest, ParseTimestampNsRejectsMoreThanNineFractionalDigits) {
+
EXPECT_THAT(TemporalUtils::ParseTimestampNs("2026-01-01T00:00:01.0000010011"),
+ IsError(ErrorKind::kInvalidArgument));
+}
+
+TEST(TemporalUtilTest, ParseTimestampNsWithZone) {
+ ICEBERG_UNWRAP_OR_FAIL(auto nanos, TemporalUtils::ParseTimestampNsWithZone(
+
"2026-01-01T00:00:01.000001001+00:00"));
+ EXPECT_EQ(nanos, 1767225601000001001L);
+}
+
+TEST(TemporalUtilTest, ParseTimestampNsWithZoneChecksInt64BoundsAfterOffset) {
+ ICEBERG_UNWRAP_OR_FAIL(auto max_nanos,
TemporalUtils::ParseTimestampNsWithZone(
+
"2262-04-12T00:47:16.854775807+01:00"));
+ EXPECT_EQ(max_nanos, std::numeric_limits<int64_t>::max());
+
+ ICEBERG_UNWRAP_OR_FAIL(auto min_nanos,
TemporalUtils::ParseTimestampNsWithZone(
+
"1677-09-20T23:12:43.145224192-01:00"));
+ EXPECT_EQ(min_nanos, std::numeric_limits<int64_t>::min());
+
+ EXPECT_THAT(
+
TemporalUtils::ParseTimestampNsWithZone("2262-04-11T23:47:16.854775807-00:01"),
+ IsError(ErrorKind::kInvalidArgument));
+ EXPECT_THAT(
+
TemporalUtils::ParseTimestampNsWithZone("1677-09-21T00:12:43.145224192+00:01"),
+ IsError(ErrorKind::kInvalidArgument));
+}
+
+TEST(TemporalUtilTest, ParseTimestampNsWithZoneRejectsOffsetPastPlusMinus1800)
{
+ EXPECT_THAT(
+
TemporalUtils::ParseTimestampNsWithZone("2026-01-01T00:00:01.000001001+18:01"),
+ IsError(ErrorKind::kInvalidArgument));
+ EXPECT_THAT(
+
TemporalUtils::ParseTimestampNsWithZone("2026-01-01T00:00:01.000001001-18:30"),
+ IsError(ErrorKind::kInvalidArgument));
+}
+
+struct ParseParam {
+ std::string name;
+ std::string str;
+ int64_t value;
+ enum Kind { kDay, kTime, kTimestamp, kTimestampTz } kind;
+};
+
+class TemporalParseTest : public ::testing::TestWithParam<ParseParam> {};
+
+TEST_P(TemporalParseTest, ParsesCorrectly) {
+ const auto& param = GetParam();
+ switch (param.kind) {
+ case ParseParam::kDay: {
+ ICEBERG_UNWRAP_OR_FAIL(auto parsed, TemporalUtils::ParseDay(param.str));
+ EXPECT_EQ(parsed, static_cast<int32_t>(param.value));
+ break;
+ }
+ case ParseParam::kTime: {
+ ICEBERG_UNWRAP_OR_FAIL(auto parsed, TemporalUtils::ParseTime(param.str));
+ EXPECT_EQ(parsed, param.value);
+ break;
+ }
+ case ParseParam::kTimestamp: {
+ ICEBERG_UNWRAP_OR_FAIL(auto parsed,
TemporalUtils::ParseTimestamp(param.str));
+ EXPECT_EQ(parsed, param.value);
+ break;
+ }
+ case ParseParam::kTimestampTz: {
+ ICEBERG_UNWRAP_OR_FAIL(auto parsed,
+ TemporalUtils::ParseTimestampWithZone(param.str));
+ EXPECT_EQ(parsed, param.value);
+ break;
+ }
+ }
+}
+
+struct ParseTimeErrorParam {
+ std::string name;
+ std::string str;
+};
+
+class ParseTimeErrorTest : public
::testing::TestWithParam<ParseTimeErrorParam> {};
+
+TEST_P(ParseTimeErrorTest, ReturnsError) {
+ EXPECT_THAT(TemporalUtils::ParseTime(GetParam().str),
+ IsError(ErrorKind::kInvalidArgument));
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ TemporalUtilTest, TemporalParseTest,
+ ::testing::Values(
+ ParseParam{"DayEpoch", "1970-01-01", 0, ParseParam::kDay},
+ ParseParam{"DayNext", "1970-01-02", 1, ParseParam::kDay},
+ ParseParam{"DayBeforeEpoch", "1969-12-31", -1, ParseParam::kDay},
+ ParseParam{"DayYear999", "0999-12-31", -354286, ParseParam::kDay},
+ ParseParam{"DayNonLeap", "1971-01-01", 365, ParseParam::kDay},
+ ParseParam{"DayY2K", "2000-01-01", 10957, ParseParam::kDay},
+ ParseParam{"Day2026", "2026-01-01", 20454, ParseParam::kDay},
+ ParseParam{"TimeMidnight", "00:00", 0, ParseParam::kTime},
+ ParseParam{"TimeOneSec", "00:00:01", 1000000, ParseParam::kTime},
+ ParseParam{"TimeMillis", "00:00:01.500", 1500000, ParseParam::kTime},
+ ParseParam{"TimeOneMillis", "00:00:01.001", 1001000,
ParseParam::kTime},
+ ParseParam{"TimeMicros", "00:00:01.000001", 1000001,
ParseParam::kTime},
+ ParseParam{"TimeHourMinSec", "01:02:03", 3723000000,
ParseParam::kTime},
+ ParseParam{"TimeEndOfDay", "23:59:59", 86399000000, ParseParam::kTime},
+ ParseParam{"TimestampEpoch", "1970-01-01T00:00:00", 0,
ParseParam::kTimestamp},
+ ParseParam{"TimestampOneSec", "1970-01-01T00:00:01", 1000000,
+ ParseParam::kTimestamp},
+ ParseParam{"TimestampMillis", "2026-01-01T00:00:01.500",
1767225601500000L,
+ ParseParam::kTimestamp},
+ ParseParam{"TimestampOneMillis", "2026-01-01T00:00:01.001",
1767225601001000L,
+ ParseParam::kTimestamp},
+ ParseParam{"TimestampMicros", "2026-01-01T00:00:01.000001",
1767225601000001L,
+ ParseParam::kTimestamp},
+ ParseParam{"TimestampTzEpoch", "1970-01-01T00:00:00+00:00", 0,
+ ParseParam::kTimestampTz},
+ ParseParam{"TimestampTzOneSec", "1970-01-01T00:00:01+00:00", 1000000,
+ ParseParam::kTimestampTz},
+ ParseParam{"TimestampTzMillis", "2026-01-01T00:00:01.500+00:00",
+ 1767225601500000L, ParseParam::kTimestampTz},
+ ParseParam{"TimestampTzOneMillis", "2026-01-01T00:00:01.001+00:00",
+ 1767225601001000L, ParseParam::kTimestampTz},
+ ParseParam{"TimestampTzMicros", "2026-01-01T00:00:01.000001+00:00",
+ 1767225601000001L, ParseParam::kTimestampTz},
+ ParseParam{"TimestampTzSuffixZ_Epoch", "1970-01-01T00:00:00Z", 0,
+ ParseParam::kTimestampTz},
+ ParseParam{"TimestampTzSuffixZ_Millis", "2026-01-01T00:00:01.500Z",
+ 1767225601500000L, ParseParam::kTimestampTz},
+ ParseParam{"TimestampTzNegZero_Epoch", "1970-01-01T00:00:00-00:00", 0,
+ ParseParam::kTimestampTz},
+ ParseParam{"TimestampTzNegZero_Millis",
"2026-01-01T00:00:01.500-00:00",
+ 1767225601500000L, ParseParam::kTimestampTz},
+ ParseParam{"TimeTruncatesNanos", "00:00:01.123456789", 1123456,
+ ParseParam::kTime},
+ ParseParam{"1Digit", "00:00:01.5", 1500000, ParseParam::kTime},
+ ParseParam{"2Digits", "00:00:01.50", 1500000, ParseParam::kTime},
+ ParseParam{"2DigitsNonZero", "00:00:01.12", 1120000,
ParseParam::kTime},
+ ParseParam{"4Digits", "00:00:01.0001", 1000100, ParseParam::kTime},
+ ParseParam{"TimestampNoSec_Zero", "1970-01-01T00:00", 0,
ParseParam::kTimestamp},
+ ParseParam{"TimestampNoSec_OneMin", "1970-01-01T00:01", 60000000,
+ ParseParam::kTimestamp},
+ ParseParam{"TimestampTzNoSec_Offset", "1970-01-01T00:00+00:00", 0,
+ ParseParam::kTimestampTz},
+ ParseParam{"TimestampTzNoSec_OneMin", "1970-01-01T00:01+00:00",
60000000,
+ ParseParam::kTimestampTz},
+ ParseParam{"TimestampTzNoSec_Z", "1970-01-01T00:00Z", 0,
+ ParseParam::kTimestampTz},
+ ParseParam{"ExtendedYearPlusEpoch", "+1970-01-01", 0,
ParseParam::kDay},
+ ParseParam{"ExtendedYearPlus2026", "+2026-01-01", 20454,
ParseParam::kDay},
+ ParseParam{"ExtendedYearMinus2026", "-2026-01-01", -1459509,
ParseParam::kDay},
+ ParseParam{"TimestampTzPositiveOffset", "1970-01-01T05:00:00+05:00", 0,
+ ParseParam::kTimestampTz},
+ ParseParam{"TimestampTzNegativeOffset", "1970-01-01T00:00:00-05:00",
18000000000,
+ ParseParam::kTimestampTz},
+ ParseParam{"TimestampTzOffsetWithMillis",
"2026-01-01T05:30:01.500+05:30",
+ 1767225601500000L, ParseParam::kTimestampTz},
+ ParseParam{"TimestampTzNegOffsetToEpoch", "1969-12-31T19:00:00-05:00",
0,
+ ParseParam::kTimestampTz},
+ ParseParam{"TimestampTzNoSecWithOffset", "1970-01-01T05:30+05:30", 0,
+ ParseParam::kTimestampTz}),
+ [](const ::testing::TestParamInfo<ParseParam>& info) { return
info.param.name; });
+
+INSTANTIATE_TEST_SUITE_P(
+ TemporalUtilTest, ParseTimeErrorTest,
+ ::testing::Values(ParseTimeErrorParam{"EmptyString", ""},
+ ParseTimeErrorParam{"TooShort1Char", "1"},
+ ParseTimeErrorParam{"TooShort2Chars", "12"},
+ ParseTimeErrorParam{"TooShort4Chars", "12:3"},
+ ParseTimeErrorParam{"MissingColon", "1200:00"},
+ ParseTimeErrorParam{"OutofRangeHours", "24:00:00"},
+ ParseTimeErrorParam{"OutofRangeMinutes", "12:60:00"},
+ ParseTimeErrorParam{"OutofRangeSeconds", "12:30:61"},
+ ParseTimeErrorParam{"SpaceInsteadOfColon", "12 30"}),
+ [](const ::testing::TestParamInfo<ParseTimeErrorParam>& info) {
+ return info.param.name;
+ });
+
+} // namespace iceberg
diff --git a/src/iceberg/test/transform_util_test.cc
b/src/iceberg/test/transform_util_test.cc
index 5b64bb33..f5a22233 100644
--- a/src/iceberg/test/transform_util_test.cc
+++ b/src/iceberg/test/transform_util_test.cc
@@ -19,12 +19,8 @@
#include "iceberg/util/transform_util.h"
-#include <limits>
-
#include <gtest/gtest.h>
-#include "iceberg/test/matchers.h"
-
namespace iceberg {
TEST(TransformUtilTest, HumanYear) {
@@ -157,69 +153,6 @@ TEST(TransformUtilTest, HumanTimestampNsWithZone) {
TransformUtil::HumanTimestampNsWithZone(-876543211));
}
-TEST(TransformUtilTest, ParseTimestampNs) {
- ICEBERG_UNWRAP_OR_FAIL(
- auto nanos,
TransformUtil::ParseTimestampNs("2026-01-01T00:00:01.000001001"));
- EXPECT_EQ(nanos, 1767225601000001001L);
- ICEBERG_UNWRAP_OR_FAIL(auto pre_epoch_nanos, TransformUtil::ParseTimestampNs(
-
"1969-12-31T23:59:59.123456789"));
- EXPECT_EQ(pre_epoch_nanos, -876543211);
- EXPECT_EQ(TransformUtil::HumanTimestampNs(pre_epoch_nanos),
- "1969-12-31T23:59:59.123456789");
-}
-
-TEST(TransformUtilTest, ParseTimestampNsChecksInt64Bounds) {
- ICEBERG_UNWRAP_OR_FAIL(
- auto max_nanos,
TransformUtil::ParseTimestampNs("2262-04-11T23:47:16.854775807"));
- EXPECT_EQ(max_nanos, std::numeric_limits<int64_t>::max());
-
- ICEBERG_UNWRAP_OR_FAIL(
- auto min_nanos,
TransformUtil::ParseTimestampNs("1677-09-21T00:12:43.145224192"));
- EXPECT_EQ(min_nanos, std::numeric_limits<int64_t>::min());
-
- EXPECT_THAT(TransformUtil::ParseTimestampNs("2262-04-11T23:47:16.854775808"),
- IsError(ErrorKind::kInvalidArgument));
- EXPECT_THAT(TransformUtil::ParseTimestampNs("1677-09-21T00:12:43.145224191"),
- IsError(ErrorKind::kInvalidArgument));
-}
-
-TEST(TransformUtilTest, ParseTimestampNsRejectsMoreThanNineFractionalDigits) {
-
EXPECT_THAT(TransformUtil::ParseTimestampNs("2026-01-01T00:00:01.0000010011"),
- IsError(ErrorKind::kInvalidArgument));
-}
-
-TEST(TransformUtilTest, ParseTimestampNsWithZone) {
- ICEBERG_UNWRAP_OR_FAIL(auto nanos, TransformUtil::ParseTimestampNsWithZone(
-
"2026-01-01T00:00:01.000001001+00:00"));
- EXPECT_EQ(nanos, 1767225601000001001L);
-}
-
-TEST(TransformUtilTest, ParseTimestampNsWithZoneChecksInt64BoundsAfterOffset) {
- ICEBERG_UNWRAP_OR_FAIL(auto max_nanos,
TransformUtil::ParseTimestampNsWithZone(
-
"2262-04-12T00:47:16.854775807+01:00"));
- EXPECT_EQ(max_nanos, std::numeric_limits<int64_t>::max());
-
- ICEBERG_UNWRAP_OR_FAIL(auto min_nanos,
TransformUtil::ParseTimestampNsWithZone(
-
"1677-09-20T23:12:43.145224192-01:00"));
- EXPECT_EQ(min_nanos, std::numeric_limits<int64_t>::min());
-
- EXPECT_THAT(
-
TransformUtil::ParseTimestampNsWithZone("2262-04-11T23:47:16.854775807-00:01"),
- IsError(ErrorKind::kInvalidArgument));
- EXPECT_THAT(
-
TransformUtil::ParseTimestampNsWithZone("1677-09-21T00:12:43.145224192+00:01"),
- IsError(ErrorKind::kInvalidArgument));
-}
-
-TEST(TransformUtilTest,
ParseTimestampNsWithZoneRejectsOffsetPastPlusMinus1800) {
- EXPECT_THAT(
-
TransformUtil::ParseTimestampNsWithZone("2026-01-01T00:00:01.000001001+18:01"),
- IsError(ErrorKind::kInvalidArgument));
- EXPECT_THAT(
-
TransformUtil::ParseTimestampNsWithZone("2026-01-01T00:00:01.000001001-18:30"),
- IsError(ErrorKind::kInvalidArgument));
-}
-
TEST(TransformUtilTest, Base64Encode) {
// Empty string
EXPECT_EQ("", TransformUtil::Base64Encode(""));
@@ -245,212 +178,4 @@ TEST(TransformUtilTest, Base64Encode) {
EXPECT_EQ("AA==", TransformUtil::Base64Encode({"\x00", 1}));
}
-struct ParseRoundTripParam {
- std::string name;
- std::string str;
- int64_t value;
- enum Kind { kDay, kTime, kTimestamp, kTimestampTz } kind;
-};
-
-class ParseRoundTripTest : public
::testing::TestWithParam<ParseRoundTripParam> {};
-
-TEST_P(ParseRoundTripTest, RoundTrip) {
- const auto& param = GetParam();
- switch (param.kind) {
- case ParseRoundTripParam::kDay: {
- EXPECT_EQ(TransformUtil::HumanDay(static_cast<int32_t>(param.value)),
param.str);
- ICEBERG_UNWRAP_OR_FAIL(auto parsed, TransformUtil::ParseDay(param.str));
- EXPECT_EQ(parsed, static_cast<int32_t>(param.value));
- break;
- }
- case ParseRoundTripParam::kTime: {
- EXPECT_EQ(TransformUtil::HumanTime(param.value), param.str);
- ICEBERG_UNWRAP_OR_FAIL(auto parsed, TransformUtil::ParseTime(param.str));
- EXPECT_EQ(parsed, param.value);
- break;
- }
- case ParseRoundTripParam::kTimestamp: {
- EXPECT_EQ(TransformUtil::HumanTimestamp(param.value), param.str);
- ICEBERG_UNWRAP_OR_FAIL(auto parsed,
TransformUtil::ParseTimestamp(param.str));
- EXPECT_EQ(parsed, param.value);
- break;
- }
- case ParseRoundTripParam::kTimestampTz: {
- EXPECT_EQ(TransformUtil::HumanTimestampWithZone(param.value), param.str);
- ICEBERG_UNWRAP_OR_FAIL(auto parsed,
- TransformUtil::ParseTimestampWithZone(param.str));
- EXPECT_EQ(parsed, param.value);
- break;
- }
- }
-}
-
-struct ParseOnlyParam {
- std::string name;
- std::string str;
- int64_t value;
- enum Kind { kDay, kTime, kTimestamp, kTimestampTz } kind;
-};
-
-class ParseOnlyTest : public ::testing::TestWithParam<ParseOnlyParam> {};
-
-TEST_P(ParseOnlyTest, ParsesCorrectly) {
- const auto& param = GetParam();
- switch (param.kind) {
- case ParseOnlyParam::kDay: {
- ICEBERG_UNWRAP_OR_FAIL(auto parsed, TransformUtil::ParseDay(param.str));
- EXPECT_EQ(parsed, static_cast<int32_t>(param.value));
- break;
- }
- case ParseOnlyParam::kTime: {
- ICEBERG_UNWRAP_OR_FAIL(auto parsed, TransformUtil::ParseTime(param.str));
- EXPECT_EQ(parsed, param.value);
- break;
- }
- case ParseOnlyParam::kTimestamp: {
- ICEBERG_UNWRAP_OR_FAIL(auto parsed,
TransformUtil::ParseTimestamp(param.str));
- EXPECT_EQ(parsed, param.value);
- break;
- }
- case ParseOnlyParam::kTimestampTz: {
- ICEBERG_UNWRAP_OR_FAIL(auto parsed,
- TransformUtil::ParseTimestampWithZone(param.str));
- EXPECT_EQ(parsed, param.value);
- break;
- }
- }
-}
-
-struct ParseTimeErrorParam {
- std::string name;
- std::string str;
-};
-
-class ParseTimeErrorTest : public
::testing::TestWithParam<ParseTimeErrorParam> {};
-
-TEST_P(ParseTimeErrorTest, ReturnsError) {
- EXPECT_THAT(TransformUtil::ParseTime(GetParam().str),
- IsError(ErrorKind::kInvalidArgument));
-}
-
-INSTANTIATE_TEST_SUITE_P(
- TransformUtilTest, ParseRoundTripTest,
- ::testing::Values(
- // Day round-trips
- ParseRoundTripParam{"DayEpoch", "1970-01-01", 0,
ParseRoundTripParam::kDay},
- ParseRoundTripParam{"DayNext", "1970-01-02", 1,
ParseRoundTripParam::kDay},
- ParseRoundTripParam{"DayBeforeEpoch", "1969-12-31", -1,
- ParseRoundTripParam::kDay},
- ParseRoundTripParam{"DayYear999", "0999-12-31", -354286,
- ParseRoundTripParam::kDay},
- ParseRoundTripParam{"DayNonLeap", "1971-01-01", 365,
ParseRoundTripParam::kDay},
- ParseRoundTripParam{"DayY2K", "2000-01-01", 10957,
ParseRoundTripParam::kDay},
- ParseRoundTripParam{"Day2026", "2026-01-01", 20454,
ParseRoundTripParam::kDay},
- // Time round-trips
- ParseRoundTripParam{"TimeMidnight", "00:00", 0,
ParseRoundTripParam::kTime},
- ParseRoundTripParam{"TimeOneSec", "00:00:01", 1000000,
- ParseRoundTripParam::kTime},
- ParseRoundTripParam{"TimeMillis", "00:00:01.500", 1500000,
- ParseRoundTripParam::kTime},
- ParseRoundTripParam{"TimeOneMillis", "00:00:01.001", 1001000,
- ParseRoundTripParam::kTime},
- ParseRoundTripParam{"TimeMicros", "00:00:01.000001", 1000001,
- ParseRoundTripParam::kTime},
- ParseRoundTripParam{"TimeHourMinSec", "01:02:03", 3723000000,
- ParseRoundTripParam::kTime},
- ParseRoundTripParam{"TimeEndOfDay", "23:59:59", 86399000000,
- ParseRoundTripParam::kTime},
- // Timestamp round-trips
- ParseRoundTripParam{"TimestampEpoch", "1970-01-01T00:00:00", 0,
- ParseRoundTripParam::kTimestamp},
- ParseRoundTripParam{"TimestampOneSec", "1970-01-01T00:00:01", 1000000,
- ParseRoundTripParam::kTimestamp},
- ParseRoundTripParam{"TimestampMillis", "2026-01-01T00:00:01.500",
- 1767225601500000L,
ParseRoundTripParam::kTimestamp},
- ParseRoundTripParam{"TimestampOneMillis", "2026-01-01T00:00:01.001",
- 1767225601001000L,
ParseRoundTripParam::kTimestamp},
- ParseRoundTripParam{"TimestampMicros", "2026-01-01T00:00:01.000001",
- 1767225601000001L,
ParseRoundTripParam::kTimestamp},
- // TimestampTz round-trips
- ParseRoundTripParam{"TimestampTzEpoch", "1970-01-01T00:00:00+00:00", 0,
- ParseRoundTripParam::kTimestampTz},
- ParseRoundTripParam{"TimestampTzOneSec", "1970-01-01T00:00:01+00:00",
1000000,
- ParseRoundTripParam::kTimestampTz},
- ParseRoundTripParam{"TimestampTzMillis",
"2026-01-01T00:00:01.500+00:00",
- 1767225601500000L,
ParseRoundTripParam::kTimestampTz},
- ParseRoundTripParam{"TimestampTzOneMillis",
"2026-01-01T00:00:01.001+00:00",
- 1767225601001000L,
ParseRoundTripParam::kTimestampTz},
- ParseRoundTripParam{"TimestampTzMicros",
"2026-01-01T00:00:01.000001+00:00",
- 1767225601000001L,
ParseRoundTripParam::kTimestampTz}),
- [](const ::testing::TestParamInfo<ParseRoundTripParam>& info) {
- return info.param.name;
- });
-
-INSTANTIATE_TEST_SUITE_P(
- TransformUtilTest, ParseOnlyTest,
- ::testing::Values(
- // TimestampTz with "Z" suffix
- ParseOnlyParam{"TimestampTzSuffixZ_Epoch", "1970-01-01T00:00:00Z", 0,
- ParseOnlyParam::kTimestampTz},
- ParseOnlyParam{"TimestampTzSuffixZ_Millis", "2026-01-01T00:00:01.500Z",
- 1767225601500000L, ParseOnlyParam::kTimestampTz},
- // TimestampTz with "-00:00" suffix
- ParseOnlyParam{"TimestampTzNegZero_Epoch",
"1970-01-01T00:00:00-00:00", 0,
- ParseOnlyParam::kTimestampTz},
- ParseOnlyParam{"TimestampTzNegZero_Millis",
"2026-01-01T00:00:01.500-00:00",
- 1767225601500000L, ParseOnlyParam::kTimestampTz},
- // Fractional micros truncates nanos
- ParseOnlyParam{"TimeTruncatesNanos", "00:00:01.123456789", 1123456,
- ParseOnlyParam::kTime},
- // Fractional seconds (trimmed trailing zeros)
- ParseOnlyParam{"1Digit", "00:00:01.5", 1500000, ParseOnlyParam::kTime},
- ParseOnlyParam{"2Digits", "00:00:01.50", 1500000,
ParseOnlyParam::kTime},
- ParseOnlyParam{"2DigitsNonZero", "00:00:01.12", 1120000,
ParseOnlyParam::kTime},
- ParseOnlyParam{"4Digits", "00:00:01.0001", 1000100,
ParseOnlyParam::kTime},
- // Timestamp without seconds
- ParseOnlyParam{"TimestampNoSec_Zero", "1970-01-01T00:00", 0,
- ParseOnlyParam::kTimestamp},
- ParseOnlyParam{"TimestampNoSec_OneMin", "1970-01-01T00:01", 60000000,
- ParseOnlyParam::kTimestamp},
- // TimestampTz without seconds
- ParseOnlyParam{"TimestampTzNoSec_Offset", "1970-01-01T00:00+00:00", 0,
- ParseOnlyParam::kTimestampTz},
- ParseOnlyParam{"TimestampTzNoSec_OneMin", "1970-01-01T00:01+00:00",
60000000,
- ParseOnlyParam::kTimestampTz},
- ParseOnlyParam{"TimestampTzNoSec_Z", "1970-01-01T00:00Z", 0,
- ParseOnlyParam::kTimestampTz},
- // Extended year with '+' prefix
- ParseOnlyParam{"ExtendedYearPlusEpoch", "+1970-01-01", 0,
ParseOnlyParam::kDay},
- ParseOnlyParam{"ExtendedYearPlus2026", "+2026-01-01", 20454,
- ParseOnlyParam::kDay},
- ParseOnlyParam{"ExtendedYearMinus2026", "-2026-01-01", -1459509,
- ParseOnlyParam::kDay},
- // Non-UTC timezone offsets
- ParseOnlyParam{"TimestampTzPositiveOffset",
"1970-01-01T05:00:00+05:00", 0,
- ParseOnlyParam::kTimestampTz},
- ParseOnlyParam{"TimestampTzNegativeOffset",
"1970-01-01T00:00:00-05:00",
- 18000000000, ParseOnlyParam::kTimestampTz},
- ParseOnlyParam{"TimestampTzOffsetWithMillis",
"2026-01-01T05:30:01.500+05:30",
- 1767225601500000L, ParseOnlyParam::kTimestampTz},
- ParseOnlyParam{"TimestampTzNegOffsetToEpoch",
"1969-12-31T19:00:00-05:00", 0,
- ParseOnlyParam::kTimestampTz},
- ParseOnlyParam{"TimestampTzNoSecWithOffset", "1970-01-01T05:30+05:30",
0,
- ParseOnlyParam::kTimestampTz}),
- [](const ::testing::TestParamInfo<ParseOnlyParam>& info) { return
info.param.name; });
-
-INSTANTIATE_TEST_SUITE_P(
- TransformUtilTest, ParseTimeErrorTest,
- ::testing::Values(ParseTimeErrorParam{"EmptyString", ""},
- ParseTimeErrorParam{"TooShort1Char", "1"},
- ParseTimeErrorParam{"TooShort2Chars", "12"},
- ParseTimeErrorParam{"TooShort4Chars", "12:3"},
- ParseTimeErrorParam{"MissingColon", "1200:00"},
- ParseTimeErrorParam{"OutofRangeHours", "24:00:00"},
- ParseTimeErrorParam{"OutofRangeMinutes", "12:60:00"},
- ParseTimeErrorParam{"OutofRangeSeconds", "12:30:61"},
- ParseTimeErrorParam{"SpaceInsteadOfColon", "12 30"}),
- [](const ::testing::TestParamInfo<ParseTimeErrorParam>& info) {
- return info.param.name;
- });
-
} // namespace iceberg
diff --git a/src/iceberg/transform.cc b/src/iceberg/transform.cc
index 8a7d4b3e..c019c7ea 100644
--- a/src/iceberg/transform.cc
+++ b/src/iceberg/transform.cc
@@ -32,6 +32,7 @@
#include "iceberg/util/macros.h"
#include "iceberg/util/projection_util_internal.h"
#include "iceberg/util/string_util.h"
+#include "iceberg/util/temporal_util.h"
#include "iceberg/util/transform_util.h"
namespace iceberg {
diff --git a/src/iceberg/util/temporal_util.h
b/src/iceberg/util/math_util_internal.h
similarity index 50%
copy from src/iceberg/util/temporal_util.h
copy to src/iceberg/util/math_util_internal.h
index 414e4fd2..20d8ef89 100644
--- a/src/iceberg/util/temporal_util.h
+++ b/src/iceberg/util/math_util_internal.h
@@ -20,32 +20,28 @@
#pragma once
#include <cstdint>
+#include <limits>
-#include "iceberg/iceberg_export.h"
#include "iceberg/result.h"
-#include "iceberg/type_fwd.h"
+#include "iceberg/util/int128.h"
namespace iceberg {
-class ICEBERG_EXPORT TemporalUtils {
- public:
- /// \brief Convert nanoseconds since epoch to microseconds using floor
division.
- static int64_t NanosToMicros(int64_t nanos);
-
- /// \brief Convert microseconds since epoch to nanoseconds, failing on
overflow.
- static Result<int64_t> MicrosToNanos(int64_t micros);
-
- /// \brief Extract a date or timestamp year, as years from 1970
- static Result<Literal> ExtractYear(const Literal& literal);
-
- /// \brief Extract a date or timestamp month, as months from 1970-01-01
- static Result<Literal> ExtractMonth(const Literal& literal);
-
- /// \brief Extract a date or timestamp day, as days from 1970-01-01
- static Result<Literal> ExtractDay(const Literal& literal);
-
- /// \brief Extract a timestamp hour, as hours from 1970-01-01 00:00:00
- static Result<Literal> ExtractHour(const Literal& literal);
-};
+inline constexpr int64_t FloorDiv(int64_t dividend, int64_t divisor) {
+ const auto quotient = dividend / divisor;
+ if ((dividend ^ divisor) < 0 && quotient * divisor != dividend) {
+ return quotient - 1;
+ }
+ return quotient;
+}
+
+inline Result<int64_t> MultiplyExact(int64_t lhs, int64_t rhs) {
+ const auto result = static_cast<int128_t>(lhs) * static_cast<int128_t>(rhs);
+ if (result > std::numeric_limits<int64_t>::max() ||
+ result < std::numeric_limits<int64_t>::min()) [[unlikely]] {
+ return InvalidArgument("Long overflow when multiplying {} by {}", lhs,
rhs);
+ }
+ return static_cast<int64_t>(result);
+}
} // namespace iceberg
diff --git a/src/iceberg/util/temporal_util.cc
b/src/iceberg/util/temporal_util.cc
index b91fcec7..e00ee7cf 100644
--- a/src/iceberg/util/temporal_util.cc
+++ b/src/iceberg/util/temporal_util.cc
@@ -26,6 +26,9 @@
#include "iceberg/expression/literal.h"
#include "iceberg/util/int128.h"
+#include "iceberg/util/macros.h"
+#include "iceberg/util/math_util_internal.h"
+#include "iceberg/util/string_util.h"
namespace iceberg {
@@ -33,30 +36,136 @@ namespace {
using namespace std::chrono; // NOLINT
-constexpr int64_t kNanosPerMicro = 1000;
+/// Parse a timezone offset of the form "+HH:mm" or "-HH:mm" and return the
+/// offset in microseconds (positive for east of UTC, negative for west).
+Result<int64_t> ParseTimezoneOffset(std::string_view offset) {
+ if (offset.size() != 6 || (offset[0] != '+' && offset[0] != '-') ||
offset[3] != ':') {
+ return InvalidArgument("Invalid timezone offset: '{}'", offset);
+ }
+ bool negative = offset[0] == '-';
+ ICEBERG_ASSIGN_OR_RAISE(auto hours,
+ StringUtils::ParseNumber<int64_t>(offset.substr(1,
2)));
+ ICEBERG_ASSIGN_OR_RAISE(auto minutes,
+ StringUtils::ParseNumber<int64_t>(offset.substr(4,
2)));
+ if (hours > 18 || minutes > 59) [[unlikely]] {
+ return InvalidArgument("Invalid timezone offset: '{}'", offset);
+ }
+
+ if (hours == 18 && minutes != 0) [[unlikely]] {
+ return InvalidArgument("Timezone offset '{}' not in range [-18:00,
+18:00]", offset);
+ }
-constexpr auto kEpochYmd = year{1970} / January / 1;
-constexpr auto kEpochDays = sys_days(kEpochYmd);
+ auto micros = hours * internal::kSecondsPerHour * internal::kMicrosPerSecond
+
+ minutes * internal::kSecondsPerMinute *
internal::kMicrosPerSecond;
+ return negative ? -micros : micros;
+}
+
+Result<std::pair<std::string_view, int64_t>> ParseTimestampWithZoneSuffix(
+ std::string_view str) {
+ if (str.empty()) [[unlikely]] {
+ return InvalidArgument("Invalid timestamptz string: '{}'", str);
+ }
-inline constexpr int64_t FloorDiv(int64_t dividend, int64_t divisor) {
- const auto quotient = dividend / divisor;
- if ((dividend ^ divisor) < 0 && quotient * divisor != dividend) {
- return quotient - 1;
+ int64_t offset_micros = 0;
+ std::string_view timestamp_part;
+
+ if (str.back() == 'Z') {
+ timestamp_part = str.substr(0, str.size() - 1);
+ } else if (str.size() >= 6 &&
+ (str[str.size() - 6] == '+' || str[str.size() - 6] == '-')) {
+ ICEBERG_ASSIGN_OR_RAISE(offset_micros,
+ ParseTimezoneOffset(str.substr(str.size() - 6)));
+ timestamp_part = str.substr(0, str.size() - 6);
+ } else {
+ return InvalidArgument("Invalid timestamptz string (missing timezone
suffix): '{}'",
+ str);
}
- return quotient;
+
+ return std::make_pair(timestamp_part, offset_micros);
}
-Result<int64_t> MultiplyExact(int64_t lhs, int64_t rhs) {
- const auto result = static_cast<int128_t>(lhs) * static_cast<int128_t>(rhs);
- if (result > std::numeric_limits<int64_t>::max() ||
- result < std::numeric_limits<int64_t>::min()) [[unlikely]] {
- return InvalidArgument("Long overflow when multiplying {} by {}", lhs,
rhs);
+Result<int64_t> TimestampFromDayTime(int32_t days, int64_t time_units,
+ int64_t units_per_day, int64_t
offset_micros,
+ int64_t units_per_micro) {
+ const auto offset_units =
+ static_cast<int128_t>(offset_micros) *
static_cast<int128_t>(units_per_micro);
+ const auto timestamp =
+ static_cast<int128_t>(days) * static_cast<int128_t>(units_per_day) +
+ static_cast<int128_t>(time_units) - offset_units;
+
+ if (timestamp > std::numeric_limits<int64_t>::max() ||
+ timestamp < std::numeric_limits<int64_t>::min()) [[unlikely]] {
+ return InvalidArgument("Timestamp value is out of int64 range");
}
- return static_cast<int64_t>(result);
+
+ return static_cast<int64_t>(timestamp);
+}
+
+/// Parse fractional seconds (after '.') and return micros.
+/// Digits beyond 6 are truncated.
+Result<int64_t> ParseFractionalMicros(std::string_view frac) {
+ if (frac.empty() || frac.size() > 9) [[unlikely]] {
+ return InvalidArgument("Invalid fractional seconds: '{}'", frac);
+ }
+ if (frac.size() > 6) frac = frac.substr(0, 6);
+ ICEBERG_ASSIGN_OR_RAISE(auto val, StringUtils::ParseNumber<int32_t>(frac));
+ for (size_t i = frac.size(); i < 6; ++i) {
+ val *= 10;
+ }
+ return static_cast<int64_t>(val);
+}
+
+/// Parse fractional seconds (after '.') and return nanos.
+Result<int64_t> ParseFractionalNanos(std::string_view frac) {
+ if (frac.empty() || frac.size() > 9) [[unlikely]] {
+ return InvalidArgument("Invalid fractional seconds: '{}'", frac);
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto val, StringUtils::ParseNumber<int32_t>(frac));
+ for (size_t i = frac.size(); i < 9; ++i) {
+ val *= 10;
+ }
+ return static_cast<int64_t>(val);
+}
+
+template <typename TimeScaleParser>
+Result<int64_t> ParseTimeWithFraction(std::string_view str, int64_t
units_per_second,
+ TimeScaleParser&& parse_fraction) {
+ if (str.size() < 5 || str[2] != ':') [[unlikely]] {
+ return InvalidArgument("Invalid time string: '{}'", str);
+ }
+
+ ICEBERG_ASSIGN_OR_RAISE(auto hours,
+ StringUtils::ParseNumber<int64_t>(str.substr(0, 2)));
+ ICEBERG_ASSIGN_OR_RAISE(auto minutes,
+ StringUtils::ParseNumber<int64_t>(str.substr(3, 2)));
+ int64_t seconds = 0;
+
+ int64_t frac_units = 0;
+ if (str.size() > 5) {
+ if (str[5] != ':' || str.size() < 8) [[unlikely]] {
+ return InvalidArgument("Invalid time string: '{}'", str);
+ }
+ ICEBERG_ASSIGN_OR_RAISE(seconds,
StringUtils::ParseNumber<int64_t>(str.substr(6, 2)));
+ if (str.size() > 8) {
+ if (str[8] != '.') [[unlikely]] {
+ return InvalidArgument("Invalid time string: '{}'", str);
+ }
+ ICEBERG_ASSIGN_OR_RAISE(frac_units, parse_fraction(str.substr(9)));
+ }
+ }
+
+ if (hours < 0 || hours > 23 || minutes < 0 || minutes > 59 || seconds < 0 ||
+ seconds > 59) [[unlikely]] {
+ return InvalidArgument("Invalid time string: '{}'", str);
+ }
+
+ return hours * internal::kSecondsPerHour * units_per_second +
+ minutes * internal::kSecondsPerMinute * units_per_second +
+ seconds * units_per_second + frac_units;
}
inline constexpr year_month_day DateToYmd(int32_t days_since_epoch) {
- return {kEpochDays + days{days_since_epoch}};
+ return {internal::kEpochDays + days{days_since_epoch}};
}
inline constexpr year_month_day TimestampToYmd(int64_t micros_since_epoch) {
@@ -86,7 +195,7 @@ inline constexpr int32_t TimestampNsToDuration(int64_t
nanos_since_epoch) {
}
inline constexpr int32_t MonthsSinceEpoch(const year_month_day& ymd) {
- auto delta = ymd.year() - kEpochYmd.year();
+ auto delta = ymd.year() - internal::kEpochYmd.year();
// Calculate the month as months from 1970-01
// Note: January is month 1, so we subtract 1 to get zero-based month count.
return static_cast<int32_t>(delta.count() * 12 +
static_cast<unsigned>(ymd.month()) -
@@ -102,21 +211,21 @@ template <>
Result<Literal> ExtractYearImpl<TypeId::kDate>(const Literal& literal) {
auto value = std::get<int32_t>(literal.value());
auto ymd = DateToYmd(value);
- return Literal::Int((ymd.year() - kEpochYmd.year()).count());
+ return Literal::Int((ymd.year() - internal::kEpochYmd.year()).count());
}
template <>
Result<Literal> ExtractYearImpl<TypeId::kTimestamp>(const Literal& literal) {
auto value = std::get<int64_t>(literal.value());
auto ymd = TimestampToYmd(value);
- return Literal::Int((ymd.year() - kEpochYmd.year()).count());
+ return Literal::Int((ymd.year() - internal::kEpochYmd.year()).count());
}
template <>
Result<Literal> ExtractYearImpl<TypeId::kTimestampNs>(const Literal& literal) {
auto value = std::get<int64_t>(literal.value());
auto ymd = TimestampNsToYmd(value);
- return Literal::Int((ymd.year() - kEpochYmd.year()).count());
+ return Literal::Int((ymd.year() - internal::kEpochYmd.year()).count());
}
template <>
@@ -227,11 +336,112 @@ Result<Literal>
ExtractHourImpl<TypeId::kTimestampTzNs>(const Literal& literal)
} // namespace
int64_t TemporalUtils::NanosToMicros(int64_t nanos) {
- return FloorDiv(nanos, kNanosPerMicro);
+ return FloorDiv(nanos, internal::kNanosPerMicro);
}
Result<int64_t> TemporalUtils::MicrosToNanos(int64_t micros) {
- return MultiplyExact(micros, kNanosPerMicro);
+ return MultiplyExact(micros, internal::kNanosPerMicro);
+}
+
+Result<int32_t> TemporalUtils::ParseDay(std::string_view str) {
+ auto dash1 = str.find('-', (!str.empty() && (str[0] == '-' || str[0] ==
'+')) ? 1 : 0);
+ auto dash2 = str.find('-', dash1 + 1);
+ if (str.size() < 10 || dash1 == std::string_view::npos ||
+ dash2 == std::string_view::npos) [[unlikely]] {
+ return InvalidArgument("Invalid date string: '{}'", str);
+ }
+ auto year_str = str.substr(0, dash1);
+ if (!year_str.empty() && year_str[0] == '+') {
+ year_str = year_str.substr(1);
+ }
+ ICEBERG_ASSIGN_OR_RAISE(auto year_value,
StringUtils::ParseNumber<int32_t>(year_str));
+ ICEBERG_ASSIGN_OR_RAISE(auto month_value,
StringUtils::ParseNumber<int32_t>(str.substr(
+ dash1 + 1, dash2 - dash1 -
1)));
+ ICEBERG_ASSIGN_OR_RAISE(auto day_value,
+ StringUtils::ParseNumber<int32_t>(str.substr(dash2 +
1)));
+
+ auto ymd = std::chrono::year{year_value} /
+ std::chrono::month{static_cast<unsigned>(month_value)} /
+ std::chrono::day{static_cast<unsigned>(day_value)};
+ if (!ymd.ok()) [[unlikely]] {
+ return InvalidArgument("Invalid date: '{}'", str);
+ }
+
+ auto days_since_epoch = std::chrono::sys_days{ymd} - internal::kEpochDays;
+ return static_cast<int32_t>(days_since_epoch.count());
+}
+
+Result<int64_t> TemporalUtils::ParseTime(std::string_view str) {
+ return ParseTimeWithFraction(str, internal::kMicrosPerSecond,
ParseFractionalMicros);
+}
+
+Result<int64_t> TemporalUtils::ParseTimeNs(std::string_view str) {
+ return ParseTimeWithFraction(str, internal::kNanosPerSecond,
ParseFractionalNanos);
+}
+
+Result<int64_t> TemporalUtils::ParseTimestamp(std::string_view str) {
+ auto t_pos = str.find('T');
+ if (t_pos == std::string_view::npos) [[unlikely]] {
+ return InvalidArgument("Invalid timestamp string (missing 'T'): '{}'",
str);
+ }
+
+ ICEBERG_ASSIGN_OR_RAISE(auto days_since_epoch, ParseDay(str.substr(0,
t_pos)));
+ ICEBERG_ASSIGN_OR_RAISE(auto time_micros, ParseTime(str.substr(t_pos + 1)));
+
+ return TimestampFromDayTime(days_since_epoch, time_micros,
internal::kMicrosPerDay,
+ /*offset_micros=*/0, /*units_per_micro=*/1);
+}
+
+Result<int64_t> TemporalUtils::ParseTimestampNs(std::string_view str) {
+ auto t_pos = str.find('T');
+ if (t_pos == std::string_view::npos) [[unlikely]] {
+ return InvalidArgument("Invalid timestamp string (missing 'T'): '{}'",
str);
+ }
+
+ ICEBERG_ASSIGN_OR_RAISE(auto days_since_epoch, ParseDay(str.substr(0,
t_pos)));
+ ICEBERG_ASSIGN_OR_RAISE(auto time_nanos, ParseTimeNs(str.substr(t_pos + 1)));
+
+ return TimestampFromDayTime(days_since_epoch, time_nanos,
internal::kNanosPerDay,
+ /*offset_micros=*/0,
+ /*units_per_micro=*/internal::kNanosPerMicro);
+}
+
+Result<int64_t> TemporalUtils::ParseTimestampWithZone(std::string_view str) {
+ ICEBERG_ASSIGN_OR_RAISE(auto timestamp_with_offset,
ParseTimestampWithZoneSuffix(str));
+ const auto [timestamp_part, offset_micros] = timestamp_with_offset;
+
+ auto t_pos = timestamp_part.find('T');
+ if (t_pos == std::string_view::npos) [[unlikely]] {
+ return InvalidArgument("Invalid timestamp string (missing 'T'): '{}'",
+ timestamp_part);
+ }
+
+ ICEBERG_ASSIGN_OR_RAISE(auto days_since_epoch,
+ ParseDay(timestamp_part.substr(0, t_pos)));
+ ICEBERG_ASSIGN_OR_RAISE(auto time_micros,
ParseTime(timestamp_part.substr(t_pos + 1)));
+
+ return TimestampFromDayTime(days_since_epoch, time_micros,
internal::kMicrosPerDay,
+ offset_micros,
+ /*units_per_micro=*/1);
+}
+
+Result<int64_t> TemporalUtils::ParseTimestampNsWithZone(std::string_view str) {
+ ICEBERG_ASSIGN_OR_RAISE(auto timestamp_with_offset,
ParseTimestampWithZoneSuffix(str));
+ const auto [timestamp_part, offset_micros] = timestamp_with_offset;
+
+ auto t_pos = timestamp_part.find('T');
+ if (t_pos == std::string_view::npos) [[unlikely]] {
+ return InvalidArgument("Invalid timestamp string (missing 'T'): '{}'",
+ timestamp_part);
+ }
+
+ ICEBERG_ASSIGN_OR_RAISE(auto days_since_epoch,
+ ParseDay(timestamp_part.substr(0, t_pos)));
+ ICEBERG_ASSIGN_OR_RAISE(auto time_nanos,
ParseTimeNs(timestamp_part.substr(t_pos + 1)));
+
+ return TimestampFromDayTime(days_since_epoch, time_nanos,
internal::kNanosPerDay,
+ offset_micros,
+ /*units_per_micro=*/internal::kNanosPerMicro);
}
#define DISPATCH_EXTRACT_YEAR(type_id) \
diff --git a/src/iceberg/util/temporal_util.h b/src/iceberg/util/temporal_util.h
index 414e4fd2..2121f565 100644
--- a/src/iceberg/util/temporal_util.h
+++ b/src/iceberg/util/temporal_util.h
@@ -19,12 +19,34 @@
#pragma once
+#include <chrono>
#include <cstdint>
+#include <string_view>
#include "iceberg/iceberg_export.h"
#include "iceberg/result.h"
#include "iceberg/type_fwd.h"
+namespace iceberg::internal {
+
+inline constexpr int64_t kNanosPerMicro = 1000;
+inline constexpr int64_t kMicrosPerMilli = 1000;
+inline constexpr int64_t kMicrosPerSecond = 1000 * kMicrosPerMilli;
+inline constexpr int64_t kSecondsPerMinute = 60;
+inline constexpr int64_t kMinutesPerHour = 60;
+inline constexpr int64_t kHoursPerDay = 24;
+inline constexpr int64_t kSecondsPerHour = kMinutesPerHour * kSecondsPerMinute;
+inline constexpr int64_t kSecondsPerDay = kHoursPerDay * kSecondsPerHour;
+inline constexpr int64_t kMicrosPerDay = kSecondsPerDay * kMicrosPerSecond;
+inline constexpr int64_t kNanosPerMilli = kMicrosPerMilli * kNanosPerMicro;
+inline constexpr int64_t kNanosPerSecond = kMicrosPerSecond * kNanosPerMicro;
+inline constexpr int64_t kNanosPerDay = kMicrosPerDay * kNanosPerMicro;
+
+inline constexpr auto kEpochYmd = std::chrono::year{1970} /
std::chrono::January / 1;
+inline constexpr auto kEpochDays = std::chrono::sys_days{kEpochYmd};
+
+} // namespace iceberg::internal
+
namespace iceberg {
class ICEBERG_EXPORT TemporalUtils {
@@ -35,6 +57,74 @@ class ICEBERG_EXPORT TemporalUtils {
/// \brief Convert microseconds since epoch to nanoseconds, failing on
overflow.
static Result<int64_t> MicrosToNanos(int64_t micros);
+ /// \brief Parses a date string in "[+-]yyyy-MM-dd" format into days since
epoch.
+ ///
+ /// Supports an optional '+' or '-' prefix for extended years beyond 9999.
+ ///
+ /// \param str The date string to parse.
+ /// \return The number of days since 1970-01-01, or an error.
+ static Result<int32_t> ParseDay(std::string_view str);
+
+ /// \brief Parses a time string into microseconds from midnight.
+ ///
+ /// Accepts ISO-8601 local time formats: "HH:mm", "HH:mm:ss", or
+ /// "HH:mm:ss.f" where the fractional part can be 1-9 digits.
+ /// Digits beyond 6 (microsecond precision) are truncated.
+ ///
+ /// \param str The time string to parse.
+ /// \return The number of microseconds from midnight, or an error.
+ static Result<int64_t> ParseTime(std::string_view str);
+
+ /// \brief Parses a time string into nanoseconds from midnight.
+ ///
+ /// Accepts ISO-8601 local time formats: "HH:mm", "HH:mm:ss", or
+ /// "HH:mm:ss.f" where the fractional part can be 1-9 digits.
+ /// Digits beyond 9 (nanosecond precision) are truncated.
+ ///
+ /// \param str The time string to parse.
+ /// \return The number of nanoseconds from midnight, or an error.
+ static Result<int64_t> ParseTimeNs(std::string_view str);
+
+ /// \brief Parses a timestamp string into microseconds since epoch.
+ ///
+ /// Accepts ISO-8601 local date-time formats: "yyyy-MM-ddTHH:mm",
+ /// "yyyy-MM-ddTHH:mm:ss", or "yyyy-MM-ddTHH:mm:ss.f" where the
+ /// fractional part can be 1-9 digits (truncated to microseconds).
+ ///
+ /// \param str The timestamp string to parse.
+ /// \return The number of microseconds since epoch, or an error.
+ static Result<int64_t> ParseTimestamp(std::string_view str);
+
+ /// \brief Parses a timestamp string into nanoseconds since epoch.
+ ///
+ /// Accepts ISO-8601 local date-time formats: "yyyy-MM-ddTHH:mm",
+ /// "yyyy-MM-ddTHH:mm:ss", or "yyyy-MM-ddTHH:mm:ss.f" where the
+ /// fractional part can be 1-9 digits.
+ ///
+ /// \param str The timestamp string to parse.
+ /// \return The number of nanoseconds since epoch, or an error.
+ static Result<int64_t> ParseTimestampNs(std::string_view str);
+
+ /// \brief Parses a timestamp-with-zone string into microseconds since epoch
(UTC).
+ ///
+ /// Accepts the same formats as ParseTimestamp, with a timezone suffix:
+ /// "Z", "+HH:mm", or "-HH:mm". Non-UTC offsets are converted to UTC.
+ /// The seconds and fractional parts are optional (e.g.
"yyyy-MM-ddTHH:mm+00:00").
+ ///
+ /// \param str The timestamp string to parse.
+ /// \return The number of microseconds since epoch (UTC), or an error.
+ static Result<int64_t> ParseTimestampWithZone(std::string_view str);
+
+ /// \brief Parses a timestamp-with-zone string into nanoseconds since epoch
(UTC).
+ ///
+ /// Accepts the same formats as ParseTimestampNs, with a timezone suffix:
+ /// "Z", "+HH:mm", or "-HH:mm". Non-UTC offsets are converted to UTC.
+ /// The seconds and fractional parts are optional (e.g.
"yyyy-MM-ddTHH:mm+00:00").
+ ///
+ /// \param str The timestamp string to parse.
+ /// \return The number of nanoseconds since epoch (UTC), or an error.
+ static Result<int64_t> ParseTimestampNsWithZone(std::string_view str);
+
/// \brief Extract a date or timestamp year, as years from 1970
static Result<Literal> ExtractYear(const Literal& literal);
diff --git a/src/iceberg/util/transform_util.cc
b/src/iceberg/util/transform_util.cc
index fc1b104e..d1244938 100644
--- a/src/iceberg/util/transform_util.cc
+++ b/src/iceberg/util/transform_util.cc
@@ -21,166 +21,24 @@
#include <array>
#include <chrono>
-#include <limits>
+#include <format>
-#include "iceberg/util/int128.h"
-#include "iceberg/util/macros.h"
-#include "iceberg/util/string_util.h"
+#include "iceberg/util/temporal_util.h"
namespace iceberg {
-namespace {
-constexpr auto kEpochDate = std::chrono::year{1970} / std::chrono::January / 1;
-constexpr int64_t kMicrosPerMillis = 1'000;
-constexpr int64_t kMicrosPerSecond = 1'000'000;
-constexpr int64_t kMicrosPerDay = 86'400'000'000LL;
-constexpr int64_t kNanosPerMillis = 1'000'000;
-constexpr int64_t kNanosPerSecond = 1'000'000'000;
-constexpr int64_t kNanosPerDay = 86'400'000'000'000LL;
-
-/// Parse a timezone offset of the form "+HH:mm" or "-HH:mm" and return the
-/// offset in microseconds (positive for east of UTC, negative for west).
-Result<int64_t> ParseTimezoneOffset(std::string_view offset) {
- if (offset.size() != 6 || (offset[0] != '+' && offset[0] != '-') ||
offset[3] != ':') {
- return InvalidArgument("Invalid timezone offset: '{}'", offset);
- }
- bool negative = offset[0] == '-';
- ICEBERG_ASSIGN_OR_RAISE(auto hours,
- StringUtils::ParseNumber<int64_t>(offset.substr(1,
2)));
- ICEBERG_ASSIGN_OR_RAISE(auto minutes,
- StringUtils::ParseNumber<int64_t>(offset.substr(4,
2)));
- if (hours > 18 || minutes > 59) [[unlikely]] {
- return InvalidArgument("Invalid timezone offset: '{}'", offset);
- }
-
- if (hours == 18 && minutes != 0) [[unlikely]] {
- return InvalidArgument("Timezone offset '{}' not in range [-18:00,
+18:00]", offset);
- }
-
- auto micros = hours * 3'600 * kMicrosPerSecond + minutes * 60 *
kMicrosPerSecond;
- return negative ? -micros : micros;
-}
-
-Result<std::pair<std::string_view, int64_t>> ParseTimestampWithZoneSuffix(
- std::string_view str) {
- if (str.empty()) [[unlikely]] {
- return InvalidArgument("Invalid timestamptz string: '{}'", str);
- }
-
- int64_t offset_micros = 0;
- std::string_view timestamp_part;
-
- if (str.back() == 'Z') {
- timestamp_part = str.substr(0, str.size() - 1);
- } else if (str.size() >= 6 &&
- (str[str.size() - 6] == '+' || str[str.size() - 6] == '-')) {
- // Parse "+HH:mm" or "-HH:mm" offset suffix
- ICEBERG_ASSIGN_OR_RAISE(offset_micros,
- ParseTimezoneOffset(str.substr(str.size() - 6)));
- timestamp_part = str.substr(0, str.size() - 6);
- } else {
- return InvalidArgument("Invalid timestamptz string (missing timezone
suffix): '{}'",
- str);
- }
-
- return std::make_pair(timestamp_part, offset_micros);
-}
-
-Result<int64_t> TimestampFromDayTime(int32_t days, int64_t time_units,
- int64_t units_per_day, int64_t
offset_micros,
- int64_t units_per_micro) {
- const auto offset_units =
- static_cast<int128_t>(offset_micros) *
static_cast<int128_t>(units_per_micro);
- const auto timestamp =
- static_cast<int128_t>(days) * static_cast<int128_t>(units_per_day) +
- static_cast<int128_t>(time_units) - offset_units;
-
- if (timestamp > std::numeric_limits<int64_t>::max() ||
- timestamp < std::numeric_limits<int64_t>::min()) [[unlikely]] {
- return InvalidArgument("Timestamp value is out of int64 range");
- }
-
- return static_cast<int64_t>(timestamp);
-}
-
-/// Parse fractional seconds (after '.') and return micros.
-/// Digits beyond 6 are truncated (nanosecond precision).
-Result<int64_t> ParseFractionalMicros(std::string_view frac) {
- if (frac.empty() || frac.size() > 9) [[unlikely]] {
- return InvalidArgument("Invalid fractional seconds: '{}'", frac);
- }
- // Truncate to microsecond precision (6 digits), matching Java
ISO_LOCAL_TIME behavior
- if (frac.size() > 6) frac = frac.substr(0, 6);
- ICEBERG_ASSIGN_OR_RAISE(auto val, StringUtils::ParseNumber<int32_t>(frac));
- // Right-pad to 6 digits: "500" -> 500000, "001" -> 1000, "000001" -> 1000
- for (size_t i = frac.size(); i < 6; ++i) {
- val *= 10;
- }
- return static_cast<int64_t>(val);
-}
-
-/// Parse fractional seconds (after '.') and return nanos.
-Result<int64_t> ParseFractionalNanos(std::string_view frac) {
- if (frac.empty() || frac.size() > 9) [[unlikely]] {
- return InvalidArgument("Invalid fractional seconds: '{}'", frac);
- }
- ICEBERG_ASSIGN_OR_RAISE(auto val, StringUtils::ParseNumber<int32_t>(frac));
- // Right-pad to 9 digits: "500" -> 500000000, "001" -> 1000000, "000001" ->
1000
- for (size_t i = frac.size(); i < 9; ++i) {
- val *= 10;
- }
- return static_cast<int64_t>(val);
-}
-
-template <typename TimeScaleParser>
-Result<int64_t> ParseTimeWithFraction(std::string_view str, int64_t
units_per_second,
- TimeScaleParser&& parse_fraction) {
- if (str.size() < 5 || str[2] != ':') [[unlikely]] {
- return InvalidArgument("Invalid time string: '{}'", str);
- }
-
- ICEBERG_ASSIGN_OR_RAISE(auto hours,
- StringUtils::ParseNumber<int64_t>(str.substr(0, 2)));
- ICEBERG_ASSIGN_OR_RAISE(auto minutes,
- StringUtils::ParseNumber<int64_t>(str.substr(3, 2)));
- int64_t seconds = 0;
-
- int64_t frac_units = 0;
- if (str.size() > 5) {
- if (str[5] != ':' || str.size() < 8) [[unlikely]] {
- return InvalidArgument("Invalid time string: '{}'", str);
- }
- ICEBERG_ASSIGN_OR_RAISE(seconds,
StringUtils::ParseNumber<int64_t>(str.substr(6, 2)));
- if (str.size() > 8) {
- if (str[8] != '.') [[unlikely]] {
- return InvalidArgument("Invalid time string: '{}'", str);
- }
- ICEBERG_ASSIGN_OR_RAISE(frac_units, parse_fraction(str.substr(9)));
- }
- }
-
- if (hours < 0 || hours > 23 || minutes < 0 || minutes > 59 || seconds < 0 ||
- seconds > 59) [[unlikely]] {
- return InvalidArgument("Invalid time string: '{}'", str);
- }
-
- return hours * 3'600 * units_per_second + minutes * 60 * units_per_second +
- seconds * units_per_second + frac_units;
-}
-} // namespace
-
std::string TransformUtil::HumanYear(int32_t year_ordinal) {
- auto y = kEpochDate + std::chrono::years{year_ordinal};
+ auto y = internal::kEpochYmd + std::chrono::years{year_ordinal};
return std::format("{:%Y}", y);
}
std::string TransformUtil::HumanMonth(int32_t month_ordinal) {
- auto ym = kEpochDate + std::chrono::months(month_ordinal);
+ auto ym = internal::kEpochYmd + std::chrono::months(month_ordinal);
return std::format("{:%Y-%m}", ym);
}
std::string TransformUtil::HumanDay(int32_t day_ordinal) {
- auto ymd = std::chrono::sys_days{kEpochDate} +
std::chrono::days{day_ordinal};
+ auto ymd = internal::kEpochDays + std::chrono::days{day_ordinal};
return std::format("{:%F}", ymd);
}
@@ -192,14 +50,14 @@ std::string TransformUtil::HumanHour(int32_t hour_ordinal)
{
std::string TransformUtil::HumanTime(int64_t micros_from_midnight) {
std::chrono::hh_mm_ss<std::chrono::seconds> hms{
- std::chrono::seconds{micros_from_midnight / kMicrosPerSecond}};
- auto micros = micros_from_midnight % kMicrosPerSecond;
+ std::chrono::seconds{micros_from_midnight / internal::kMicrosPerSecond}};
+ auto micros = micros_from_midnight % internal::kMicrosPerSecond;
if (micros == 0 && hms.seconds().count() == 0) {
return std::format("{:%R}", hms);
} else if (micros == 0) {
return std::format("{:%T}", hms);
- } else if (micros % kMicrosPerMillis == 0) {
- return std::format("{:%T}.{:03d}", hms, micros / kMicrosPerMillis);
+ } else if (micros % internal::kMicrosPerMilli == 0) {
+ return std::format("{:%T}.{:03d}", hms, micros /
internal::kMicrosPerMilli);
} else {
return std::format("{:%T}.{:06d}", hms, micros);
}
@@ -216,8 +74,8 @@ std::string TransformUtil::HumanTimestamp(int64_t
timestamp_micros) {
.count();
if (micros == 0) {
return std::format("{:%FT%T}", tp);
- } else if (micros % kMicrosPerMillis == 0) {
- return std::format("{:%FT%T}.{:03d}", tp, micros / kMicrosPerMillis);
+ } else if (micros % internal::kMicrosPerMilli == 0) {
+ return std::format("{:%FT%T}.{:03d}", tp, micros /
internal::kMicrosPerMilli);
} else {
return std::format("{:%FT%T}.{:06d}", tp, micros);
}
@@ -234,10 +92,10 @@ std::string TransformUtil::HumanTimestampNs(int64_t
timestamp_nanos) {
.count();
if (nanos == 0) {
return std::format("{:%FT%T}", tp);
- } else if (nanos % kNanosPerMillis == 0) {
- return std::format("{:%FT%T}.{:03d}", tp, nanos / kNanosPerMillis);
- } else if (nanos % kMicrosPerMillis == 0) {
- return std::format("{:%FT%T}.{:06d}", tp, nanos / kMicrosPerMillis);
+ } else if (nanos % internal::kNanosPerMilli == 0) {
+ return std::format("{:%FT%T}.{:03d}", tp, nanos /
internal::kNanosPerMilli);
+ } else if (nanos % internal::kNanosPerMicro == 0) {
+ return std::format("{:%FT%T}.{:06d}", tp, nanos /
internal::kNanosPerMicro);
} else {
return std::format("{:%FT%T}.{:09d}", tp, nanos);
}
@@ -254,8 +112,8 @@ std::string TransformUtil::HumanTimestampWithZone(int64_t
timestamp_micros) {
.count();
if (micros == 0) {
return std::format("{:%FT%T}+00:00", tp);
- } else if (micros % kMicrosPerMillis == 0) {
- return std::format("{:%FT%T}.{:03d}+00:00", tp, micros / kMicrosPerMillis);
+ } else if (micros % internal::kMicrosPerMilli == 0) {
+ return std::format("{:%FT%T}.{:03d}+00:00", tp, micros /
internal::kMicrosPerMilli);
} else {
return std::format("{:%FT%T}.{:06d}+00:00", tp, micros);
}
@@ -272,113 +130,15 @@ std::string
TransformUtil::HumanTimestampNsWithZone(int64_t timestamp_nanos) {
.count();
if (nanos == 0) {
return std::format("{:%FT%T}+00:00", tp);
- } else if (nanos % kNanosPerMillis == 0) {
- return std::format("{:%FT%T}.{:03d}+00:00", tp, nanos / kNanosPerMillis);
- } else if (nanos % kMicrosPerMillis == 0) {
- return std::format("{:%FT%T}.{:06d}+00:00", tp, nanos / kMicrosPerMillis);
+ } else if (nanos % internal::kNanosPerMilli == 0) {
+ return std::format("{:%FT%T}.{:03d}+00:00", tp, nanos /
internal::kNanosPerMilli);
+ } else if (nanos % internal::kNanosPerMicro == 0) {
+ return std::format("{:%FT%T}.{:06d}+00:00", tp, nanos /
internal::kNanosPerMicro);
} else {
return std::format("{:%FT%T}.{:09d}+00:00", tp, nanos);
}
}
-Result<int32_t> TransformUtil::ParseDay(std::string_view str) {
- // Expected format: "[+-]yyyy-MM-dd"
- // Parse year, month, day manually, skipping leading '+' or '-' to find
first date dash
- auto dash1 = str.find('-', (!str.empty() && (str[0] == '-' || str[0] ==
'+')) ? 1 : 0);
- auto dash2 = str.find('-', dash1 + 1);
- if (str.size() < 10 || dash1 == std::string_view::npos ||
- dash2 == std::string_view::npos) [[unlikely]] {
- return InvalidArgument("Invalid date string: '{}'", str);
- }
- auto year_str = str.substr(0, dash1);
- // std::from_chars does not accept '+' prefix, strip it for positive
extended years
- if (!year_str.empty() && year_str[0] == '+') {
- year_str = year_str.substr(1);
- }
- ICEBERG_ASSIGN_OR_RAISE(auto year,
StringUtils::ParseNumber<int32_t>(year_str));
- ICEBERG_ASSIGN_OR_RAISE(auto month, StringUtils::ParseNumber<int32_t>(
- str.substr(dash1 + 1, dash2 - dash1
- 1)));
- ICEBERG_ASSIGN_OR_RAISE(auto day,
- StringUtils::ParseNumber<int32_t>(str.substr(dash2 +
1)));
-
- auto ymd = std::chrono::year{year} /
std::chrono::month{static_cast<unsigned>(month)} /
- std::chrono::day{static_cast<unsigned>(day)};
- if (!ymd.ok()) [[unlikely]] {
- return InvalidArgument("Invalid date: '{}'", str);
- }
-
- auto days = std::chrono::sys_days{ymd} - std::chrono::sys_days{kEpochDate};
- return static_cast<int32_t>(days.count());
-}
-
-Result<int64_t> TransformUtil::ParseTime(std::string_view str) {
- return ParseTimeWithFraction(str, kMicrosPerSecond, ParseFractionalMicros);
-}
-
-Result<int64_t> TransformUtil::ParseTimeNs(std::string_view str) {
- return ParseTimeWithFraction(str, kNanosPerSecond, ParseFractionalNanos);
-}
-
-Result<int64_t> TransformUtil::ParseTimestamp(std::string_view str) {
- auto t_pos = str.find('T');
- if (t_pos == std::string_view::npos) [[unlikely]] {
- return InvalidArgument("Invalid timestamp string (missing 'T'): '{}'",
str);
- }
-
- ICEBERG_ASSIGN_OR_RAISE(auto days, ParseDay(str.substr(0, t_pos)));
- ICEBERG_ASSIGN_OR_RAISE(auto time_micros, ParseTime(str.substr(t_pos + 1)));
-
- return TimestampFromDayTime(days, time_micros, kMicrosPerDay,
/*offset_micros=*/0,
- /*units_per_micro=*/1);
-}
-
-Result<int64_t> TransformUtil::ParseTimestampNs(std::string_view str) {
- auto t_pos = str.find('T');
- if (t_pos == std::string_view::npos) [[unlikely]] {
- return InvalidArgument("Invalid timestamp string (missing 'T'): '{}'",
str);
- }
-
- ICEBERG_ASSIGN_OR_RAISE(auto days, ParseDay(str.substr(0, t_pos)));
- ICEBERG_ASSIGN_OR_RAISE(auto time_nanos, ParseTimeNs(str.substr(t_pos + 1)));
-
- return TimestampFromDayTime(days, time_nanos, kNanosPerDay,
/*offset_micros=*/0,
- /*units_per_micro=*/1'000);
-}
-
-Result<int64_t> TransformUtil::ParseTimestampWithZone(std::string_view str) {
- ICEBERG_ASSIGN_OR_RAISE(auto timestamp_with_offset,
ParseTimestampWithZoneSuffix(str));
- const auto [timestamp_part, offset_micros] = timestamp_with_offset;
-
- auto t_pos = timestamp_part.find('T');
- if (t_pos == std::string_view::npos) [[unlikely]] {
- return InvalidArgument("Invalid timestamp string (missing 'T'): '{}'",
- timestamp_part);
- }
-
- ICEBERG_ASSIGN_OR_RAISE(auto days, ParseDay(timestamp_part.substr(0,
t_pos)));
- ICEBERG_ASSIGN_OR_RAISE(auto time_micros,
ParseTime(timestamp_part.substr(t_pos + 1)));
-
- return TimestampFromDayTime(days, time_micros, kMicrosPerDay, offset_micros,
- /*units_per_micro=*/1);
-}
-
-Result<int64_t> TransformUtil::ParseTimestampNsWithZone(std::string_view str) {
- ICEBERG_ASSIGN_OR_RAISE(auto timestamp_with_offset,
ParseTimestampWithZoneSuffix(str));
- const auto [timestamp_part, offset_micros] = timestamp_with_offset;
-
- auto t_pos = timestamp_part.find('T');
- if (t_pos == std::string_view::npos) [[unlikely]] {
- return InvalidArgument("Invalid timestamp string (missing 'T'): '{}'",
- timestamp_part);
- }
-
- ICEBERG_ASSIGN_OR_RAISE(auto days, ParseDay(timestamp_part.substr(0,
t_pos)));
- ICEBERG_ASSIGN_OR_RAISE(auto time_nanos,
ParseTimeNs(timestamp_part.substr(t_pos + 1)));
-
- return TimestampFromDayTime(days, time_nanos, kNanosPerDay, offset_micros,
- /*units_per_micro=*/1'000);
-}
-
std::string TransformUtil::Base64Encode(std::string_view str_to_encode) {
static constexpr std::string_view kBase64Chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
diff --git a/src/iceberg/util/transform_util.h
b/src/iceberg/util/transform_util.h
index b9c69209..2fbcb3e5 100644
--- a/src/iceberg/util/transform_util.h
+++ b/src/iceberg/util/transform_util.h
@@ -19,11 +19,11 @@
#pragma once
+#include <cstdint>
#include <string>
+#include <string_view>
#include "iceberg/iceberg_export.h"
-#include "iceberg/result.h"
-#include "iceberg/type_fwd.h"
namespace iceberg {
@@ -127,74 +127,6 @@ class ICEBERG_EXPORT TransformUtil {
/// \return a string representation of this timestamp.
static std::string HumanTimestampNsWithZone(int64_t timestamp_nanos);
- /// \brief Parses a date string in "[+-]yyyy-MM-dd" format into days since
epoch.
- ///
- /// Supports an optional '+' or '-' prefix for extended years beyond 9999.
- ///
- /// \param str The date string to parse.
- /// \return The number of days since 1970-01-01, or an error.
- static Result<int32_t> ParseDay(std::string_view str);
-
- /// \brief Parses a time string into microseconds from midnight.
- ///
- /// Accepts ISO-8601 local time formats: "HH:mm", "HH:mm:ss", or
- /// "HH:mm:ss.f" where the fractional part can be 1-9 digits.
- /// Digits beyond 6 (microsecond precision) are truncated.
- ///
- /// \param str The time string to parse.
- /// \return The number of microseconds from midnight, or an error.
- static Result<int64_t> ParseTime(std::string_view str);
-
- /// \brief Parses a time string into nanoseconds from midnight.
- ///
- /// Accepts ISO-8601 local time formats: "HH:mm", "HH:mm:ss", or
- /// "HH:mm:ss.f" where the fractional part can be 1-9 digits.
- /// Digits beyond 9 (nanosecond precision) are truncated.
- ///
- /// \param str The time string to parse.
- /// \return The number of nanoseconds from midnight, or an error.
- static Result<int64_t> ParseTimeNs(std::string_view str);
-
- /// \brief Parses a timestamp string into microseconds since epoch.
- ///
- /// Accepts ISO-8601 local date-time formats: "yyyy-MM-ddTHH:mm",
- /// "yyyy-MM-ddTHH:mm:ss", or "yyyy-MM-ddTHH:mm:ss.f" where the
- /// fractional part can be 1-9 digits (truncated to microseconds).
- ///
- /// \param str The timestamp string to parse.
- /// \return The number of microseconds since epoch, or an error.
- static Result<int64_t> ParseTimestamp(std::string_view str);
-
- /// \brief Parses a timestamp string into nanoseconds since epoch.
- ///
- /// Accepts ISO-8601 local date-time formats: "yyyy-MM-ddTHH:mm",
- /// "yyyy-MM-ddTHH:mm:ss", or "yyyy-MM-ddTHH:mm:ss.f" where the
- /// fractional part can be 1-9 digits.
- ///
- /// \param str The timestamp string to parse.
- /// \return The number of nanoseconds since epoch, or an error.
- static Result<int64_t> ParseTimestampNs(std::string_view str);
-
- /// \brief Parses a timestamp-with-zone string into microseconds since epoch
(UTC).
- ///
- /// Accepts the same formats as ParseTimestamp, with a timezone suffix:
- /// "Z", "+HH:mm", or "-HH:mm". Non-UTC offsets are converted to UTC.
- /// The seconds and fractional parts are optional (e.g.
"yyyy-MM-ddTHH:mm+00:00").
- ///
- /// \param str The timestamp string to parse.
- /// \return The number of microseconds since epoch (UTC), or an error.
- static Result<int64_t> ParseTimestampWithZone(std::string_view str);
-
- /// \brief Parses a timestamp-with-zone string into nanoseconds since epoch
(UTC).
- ///
- /// Accepts the same formats as ParseTimestampNs, with a timezone suffix:
- /// "Z", "+HH:mm", or "-HH:mm". Non-UTC offsets are converted to UTC.
- /// The seconds and fractional parts are optional (e.g.
"yyyy-MM-ddTHH:mm+00:00").
- ///
- /// \param str The timestamp string to parse.
- /// \return The number of nanoseconds since epoch (UTC), or an error.
- static Result<int64_t> ParseTimestampNsWithZone(std::string_view str);
-
/// \brief Base64 encode a string
static std::string Base64Encode(std::string_view str_to_encode);
};