wgtmac commented on code in PR #653:
URL: https://github.com/apache/iceberg-cpp/pull/653#discussion_r3291979217
##########
src/iceberg/util/transform_util.cc:
##########
@@ -44,28 +49,124 @@ Result<int64_t> ParseTimezoneOffset(std::string_view
offset) {
StringUtils::ParseNumber<int64_t>(offset.substr(1,
2)));
ICEBERG_ASSIGN_OR_RAISE(auto minutes,
StringUtils::ParseNumber<int64_t>(offset.substr(4,
2)));
- if (hours > 18 || minutes > 59) {
+ if (hours > 18 || minutes > 59) [[unlikely]] {
return InvalidArgument("Invalid timezone offset: '{}'", offset);
}
+
+ if (hours == 18 && minutes != 0) [[unlikely]] {
+ return InvalidArgument("Timezone offset '{}' not in range [-18:00,
+18:00]", offset);
+ }
+
auto micros = hours * 3'600 * kMicrosPerSecond + minutes * 60 *
kMicrosPerSecond;
return negative ? -micros : micros;
}
+Result<std::pair<std::string_view, int64_t>> ParseTimestampWithZoneSuffix(
+ std::string_view str) {
+ if (str.empty()) [[unlikely]] {
+ return InvalidArgument("Invalid timestamptz string: '{}'", str);
+ }
+
+ int64_t offset_micros = 0;
+ std::string_view timestamp_part;
+
+ if (str.back() == 'Z') {
+ timestamp_part = str.substr(0, str.size() - 1);
+ } else if (str.size() >= 6 &&
+ (str[str.size() - 6] == '+' || str[str.size() - 6] == '-')) {
+ // Parse "+HH:mm" or "-HH:mm" offset suffix
+ ICEBERG_ASSIGN_OR_RAISE(offset_micros,
+ ParseTimezoneOffset(str.substr(str.size() - 6)));
+ timestamp_part = str.substr(0, str.size() - 6);
+ } else {
+ return InvalidArgument("Invalid timestamptz string (missing timezone
suffix): '{}'",
+ str);
+ }
+
+ return std::make_pair(timestamp_part, offset_micros);
+}
+
+Result<int64_t> TimestampFromDayTime(int32_t days, int64_t time_units,
+ int64_t units_per_day, int64_t
offset_micros,
+ int64_t units_per_micro) {
+ const auto offset_units =
+ static_cast<int128_t>(offset_micros) *
static_cast<int128_t>(units_per_micro);
Review Comment:
Should we use the new `MultiplyExact` instead of int128_t trick here?
Depending on int128_t may occupy more cycles. If the next plan is to add a
dedicate arithmetic utility for overflow check, I'm fine with that.
##########
src/iceberg/expression/literal.cc:
##########
@@ -388,18 +451,19 @@ std::strong_ordering CompareFloat(T lhs, T rhs) {
namespace {
bool Comparable(TypeId lhs, TypeId rhs) {
- switch (lhs) {
- case TypeId::kInt:
- case TypeId::kDate:
- return rhs == TypeId::kInt || rhs == TypeId::kDate;
- case TypeId::kLong:
- case TypeId::kTimestamp:
- case TypeId::kTimestampTz:
- return rhs == TypeId::kLong || rhs == TypeId::kTimestamp ||
- rhs == TypeId::kTimestampTz;
- default:
- return lhs == rhs;
+ if (lhs == rhs) {
+ return true;
+ }
+ if ((lhs == TypeId::kInt || lhs == TypeId::kDate) &&
+ (rhs == TypeId::kInt || rhs == TypeId::kDate)) {
+ return true;
}
+ if ((lhs == TypeId::kTimestamp || lhs == TypeId::kTimestampTz) &&
Review Comment:
I checked that this aligns with the Java impl but I think that is because of
Java's deficiency that timestamp type does not carry zone info. Not a strong
opinion but I think it is much safer if we do not allow comparison between
zoned and non-zoned.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]