pitrou commented on code in PR #12865:
URL: https://github.com/apache/arrow/pull/12865#discussion_r2207590411


##########
cpp/src/arrow/compute/kernels/temporal_internal.h:
##########
@@ -40,15 +39,42 @@ using arrow_vendored::date::year_month_day;
 using arrow_vendored::date::zoned_time;
 using std::chrono::duration_cast;
 
+using ArrowTimeZone = std::variant<const time_zone*, const OffsetZone>;
+
+template <class... Ts>

Review Comment:
   You should add a comment pointing where this comes from.



##########
cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc:
##########
@@ -663,15 +661,19 @@ struct Nanosecond {
 
 template <typename Duration>
 struct IsDaylightSavings {
-  explicit IsDaylightSavings(const FunctionOptions* options, const time_zone* 
tz)
-      : tz_(tz) {}
+  explicit IsDaylightSavings(const FunctionOptions* options, const 
ArrowTimeZone* tz)

Review Comment:
   Why pass `tz` as a pointer if we're copying it anyway?



##########
cpp/src/arrow/compute/kernels/temporal_internal.h:
##########
@@ -40,15 +39,42 @@ using arrow_vendored::date::year_month_day;
 using arrow_vendored::date::zoned_time;
 using std::chrono::duration_cast;
 
+using ArrowTimeZone = std::variant<const time_zone*, const OffsetZone>;
+
+template <class... Ts>
+struct overloads : Ts... {
+  using Ts::operator()...;
+};
+template <class... Ts>
+overloads(Ts...) -> overloads<Ts...>;
+
 inline int64_t GetQuarter(const year_month_day& ymd) {
   return static_cast<int64_t>((static_cast<uint32_t>(ymd.month()) - 1) / 3);
 }
 
-static inline Result<const time_zone*> LocateZone(const std::string& timezone) 
{
+static inline Result<const ArrowTimeZone> LocateZone(const std::string& 
timezone) {
   try {
-    return locate_zone(timezone);
+    const char* timezone_string = timezone.c_str();
+    if ((timezone_string[0] == '+' || timezone_string[0] == '-') &&
+        (timezone.length() == 5 || timezone.length() == 6)) {
+      // Valid offset strings have to have 4 digits and a sign prefix.
+      // Valid examples: +01:23 and -0123, invalid examples: 1:23, 123, 0123, 
01:23.
+      std::chrono::minutes zone_offset;
+      if (timezone.length() == 6) {
+        arrow::internal::detail::ParseHH_MM(timezone_string + 1, &zone_offset);
+      } else {
+        arrow::internal::detail::ParseHHMM(timezone_string + 1, &zone_offset);

Review Comment:
   Not handling errors here?



##########
cpp/src/arrow/compute/kernels/temporal_internal.h:
##########
@@ -40,15 +39,42 @@ using arrow_vendored::date::year_month_day;
 using arrow_vendored::date::zoned_time;
 using std::chrono::duration_cast;
 
+using ArrowTimeZone = std::variant<const time_zone*, const OffsetZone>;
+
+template <class... Ts>
+struct overloads : Ts... {
+  using Ts::operator()...;
+};
+template <class... Ts>
+overloads(Ts...) -> overloads<Ts...>;
+
 inline int64_t GetQuarter(const year_month_day& ymd) {
   return static_cast<int64_t>((static_cast<uint32_t>(ymd.month()) - 1) / 3);
 }
 
-static inline Result<const time_zone*> LocateZone(const std::string& timezone) 
{
+static inline Result<const ArrowTimeZone> LocateZone(const std::string& 
timezone) {
   try {
-    return locate_zone(timezone);
+    const char* timezone_string = timezone.c_str();

Review Comment:
   You probably don't need this? The string can be indexed directly, assuming 
you check the length first rather than last.



##########
cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc:
##########
@@ -1353,31 +1353,44 @@ Result<TypeHolder> 
ResolveLocalTimestampOutput(KernelContext* ctx,
 
 template <typename Duration>
 struct AssumeTimezone {
-  explicit AssumeTimezone(const AssumeTimezoneOptions* options, const 
time_zone* tz)
-      : options(*options), tz_(tz) {}
+  explicit AssumeTimezone(const AssumeTimezoneOptions* options, const 
ArrowTimeZone* tz)
+      : options(*options), tz_(*tz) {}
 
   template <typename T, typename Arg0>
-  T get_local_time(Arg0 arg, const time_zone* tz) const {
-    return static_cast<T>(zoned_time<Duration>(tz, 
local_time<Duration>(Duration{arg}))
-                              .get_sys_time()
-                              .time_since_epoch()
-                              .count());
+  T get_local_time(Arg0 arg, const ArrowTimeZone* tz) const {
+    const auto visitor =
+        overloads{[arg](const time_zone* tz) {
+                    return zoned_time<Duration>{tz, 
local_time<Duration>(Duration{arg})}
+                        .get_sys_time();
+                  },
+                  [arg](const OffsetZone tz) {
+                    return zoned_time<Duration, const OffsetZone*>{
+                        &tz, local_time<Duration>(Duration{arg})}
+                        .get_sys_time();
+                  }};
+    return std::visit(visitor, tz_).time_since_epoch().count();
   }
 
   template <typename T, typename Arg0>
   T get_local_time(Arg0 arg, const arrow_vendored::date::choose choose,
-                   const time_zone* tz) const {
-    return static_cast<T>(
-        zoned_time<Duration>(tz, local_time<Duration>(Duration{arg}), choose)
-            .get_sys_time()
-            .time_since_epoch()
-            .count());
+                   const ArrowTimeZone* tz) const {
+    const auto visitor = overloads{[arg, choose](const time_zone* tz) {
+                                     return zoned_time<Duration>{
+                                         tz, 
local_time<Duration>(Duration{arg}), choose}
+                                         .get_sys_time();
+                                   },
+                                   [arg, choose](const OffsetZone tz) {
+                                     return zoned_time<Duration, const 
OffsetZone*>{
+                                         &tz, 
local_time<Duration>(Duration{arg}), choose}
+                                         .get_sys_time();
+                                   }};
+    return static_cast<T>(std::visit(visitor, tz_).time_since_epoch().count());

Review Comment:
   Can the `.get_sys_time()` call be moved here?



##########
cpp/src/arrow/util/date_internal.h:
##########
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "arrow/vendored/datetime.h"
+
+namespace arrow::internal {
+
+using arrow_vendored::date::choose;
+using arrow_vendored::date::days;
+using arrow_vendored::date::floor;
+using arrow_vendored::date::format;
+using arrow_vendored::date::local_days;
+using arrow_vendored::date::local_time;
+using arrow_vendored::date::locate_zone;
+using arrow_vendored::date::sys_days;
+using arrow_vendored::date::sys_info;
+using arrow_vendored::date::sys_seconds;
+using arrow_vendored::date::sys_time;
+using arrow_vendored::date::time_zone;
+using arrow_vendored::date::year_month_day;
+using arrow_vendored::date::zoned_time;
+using arrow_vendored::date::zoned_traits;
+using std::chrono::minutes;
+
+class OffsetZone {
+  std::chrono::minutes offset_;
+
+ public:
+  explicit OffsetZone(std::chrono::minutes offset) : offset_{offset} {}
+
+  template <class Duration>
+  local_time<Duration> to_local(sys_time<Duration> tp) const {
+    return local_time<Duration>{(tp + offset_).time_since_epoch()};
+  }
+
+  template <class Duration>
+  sys_time<Duration> to_sys(local_time<Duration> tp, choose = 
choose::earliest) const {
+    return sys_time<Duration>{(tp - offset_).time_since_epoch()};
+  }
+
+  template <class Duration>
+  sys_info get_info(sys_time<Duration> st) const {
+    return {sys_seconds::min(), sys_seconds::max(), offset_, minutes(0),
+            offset_ >= minutes(0) ? "+" + format("%H%M", offset_)
+                                  : "-" + format("%H%M", -offset_)};
+  }
+
+  const OffsetZone* operator->() const { return this; }
+};
+
+}  // namespace arrow::internal
+
+namespace arrow_vendored::date {
+using arrow::internal::OffsetZone;
+
+template <>
+struct zoned_traits<OffsetZone> {
+  static OffsetZone default_zone() { return 
OffsetZone{std::chrono::minutes{0}}; }
+
+  static OffsetZone locate_zone(const std::string& name) {
+    if (name == "UTC") return OffsetZone{std::chrono::minutes{0}};

Review Comment:
   Is it useful to implement this at all or do we always construct a 
`OffsetZone` directly from its minutes?



##########
cpp/src/arrow/compute/kernels/temporal_internal.h:
##########
@@ -128,22 +162,31 @@ struct ZonedLocalizer {
 template <typename Duration>
 struct TimestampFormatter {
   const char* format;
-  const time_zone* tz;
+  const ArrowTimeZone tz_;
   std::ostringstream bufstream;
 
-  explicit TimestampFormatter(const std::string& format, const time_zone* tz,
+  explicit TimestampFormatter(const std::string& format, const ArrowTimeZone 
tz,
                               const std::locale& locale)
-      : format(format.c_str()), tz(tz) {
+      : format(format.c_str()), tz_(tz) {
     bufstream.imbue(locale);
     // Propagate errors as C++ exceptions (to get an actual error message)
     bufstream.exceptions(std::ios::failbit | std::ios::badbit);
   }
 
   Result<std::string> operator()(int64_t arg) {
     bufstream.str("");
-    const auto zt = zoned_time<Duration>{tz, 
sys_time<Duration>(Duration{arg})};
+    const auto visitor = overloads{

Review Comment:
   This `std::visit` construct that creates a `zoned_time` from the given 
`ArrowTimeZone` seems to come time and time (!) again. Ideally we'd be able to 
write something like:
   ```c++
       const auto timepoint = sys_time<Duration>(Duration{arg});
       auto format_zoned_time = [&](auto&& zt) {
         try {
           arrow_vendored::date::to_stream(bufstream, format, zt);
           return Status::OK();
         } catch (const std::runtime_error& ex) {
           bufstream.clear();
           return Status::Invalid("Failed formatting timestamp: ", ex.what());
         }
       };
       RETURN_NOT_OK(ApplyTimeZone(tz_, timepoint, format_zoned_time));
       return std::move(bufstream).str();
   ```
   
   You could have several overloads:
   ```c++
   template <class Duration, class Func>
   auto ApplyTimeZone(const ArrowTimeZone&, sys_time<Duration>, Func&& func)
     -> decltype(func(zoned_time<Duration>{}));
   
   template <class Duration, class Func>
   auto ApplyTimeZone(const ArrowTimeZone&, local_time<Duration>, 
std::optional<choose>, Func&& func)
     -> decltype(func(zoned_time<Duration>{}));
   ```
   



##########
cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc:
##########
@@ -1353,31 +1353,44 @@ Result<TypeHolder> 
ResolveLocalTimestampOutput(KernelContext* ctx,
 
 template <typename Duration>
 struct AssumeTimezone {
-  explicit AssumeTimezone(const AssumeTimezoneOptions* options, const 
time_zone* tz)
-      : options(*options), tz_(tz) {}
+  explicit AssumeTimezone(const AssumeTimezoneOptions* options, const 
ArrowTimeZone* tz)
+      : options(*options), tz_(*tz) {}
 
   template <typename T, typename Arg0>
-  T get_local_time(Arg0 arg, const time_zone* tz) const {
-    return static_cast<T>(zoned_time<Duration>(tz, 
local_time<Duration>(Duration{arg}))
-                              .get_sys_time()
-                              .time_since_epoch()
-                              .count());
+  T get_local_time(Arg0 arg, const ArrowTimeZone* tz) const {
+    const auto visitor =
+        overloads{[arg](const time_zone* tz) {
+                    return zoned_time<Duration>{tz, 
local_time<Duration>(Duration{arg})}
+                        .get_sys_time();
+                  },
+                  [arg](const OffsetZone tz) {
+                    return zoned_time<Duration, const OffsetZone*>{
+                        &tz, local_time<Duration>(Duration{arg})}
+                        .get_sys_time();
+                  }};
+    return std::visit(visitor, tz_).time_since_epoch().count();
   }
 
   template <typename T, typename Arg0>
   T get_local_time(Arg0 arg, const arrow_vendored::date::choose choose,
-                   const time_zone* tz) const {
-    return static_cast<T>(
-        zoned_time<Duration>(tz, local_time<Duration>(Duration{arg}), choose)
-            .get_sys_time()
-            .time_since_epoch()
-            .count());
+                   const ArrowTimeZone* tz) const {
+    const auto visitor = overloads{[arg, choose](const time_zone* tz) {

Review Comment:
   `local_time<Duration>(Duration{arg})` can be factored out of this. Same for 
the other `get_local_time` overload.



##########
cpp/src/arrow/compute/kernels/temporal_internal.h:
##########
@@ -40,15 +39,42 @@ using arrow_vendored::date::year_month_day;
 using arrow_vendored::date::zoned_time;
 using std::chrono::duration_cast;
 
+using ArrowTimeZone = std::variant<const time_zone*, const OffsetZone>;

Review Comment:
   Besides, I think the second `const` isn't useful (the first one is about the 
value pointed to, not the pointer).
   ```suggestion
   using ArrowTimeZone = std::variant<const time_zone*, OffsetZone>;
   ```



##########
cpp/src/arrow/compute/kernels/temporal_internal.h:
##########
@@ -40,15 +39,42 @@ using arrow_vendored::date::year_month_day;
 using arrow_vendored::date::zoned_time;
 using std::chrono::duration_cast;
 
+using ArrowTimeZone = std::variant<const time_zone*, const OffsetZone>;
+
+template <class... Ts>
+struct overloads : Ts... {
+  using Ts::operator()...;
+};
+template <class... Ts>
+overloads(Ts...) -> overloads<Ts...>;
+
 inline int64_t GetQuarter(const year_month_day& ymd) {
   return static_cast<int64_t>((static_cast<uint32_t>(ymd.month()) - 1) / 3);
 }
 
-static inline Result<const time_zone*> LocateZone(const std::string& timezone) 
{
+static inline Result<const ArrowTimeZone> LocateZone(const std::string& 
timezone) {
   try {
-    return locate_zone(timezone);
+    const char* timezone_string = timezone.c_str();
+    if ((timezone_string[0] == '+' || timezone_string[0] == '-') &&
+        (timezone.length() == 5 || timezone.length() == 6)) {
+      // Valid offset strings have to have 4 digits and a sign prefix.
+      // Valid examples: +01:23 and -0123, invalid examples: 1:23, 123, 0123, 
01:23.
+      std::chrono::minutes zone_offset;
+      if (timezone.length() == 6) {
+        arrow::internal::detail::ParseHH_MM(timezone_string + 1, &zone_offset);
+      } else {
+        arrow::internal::detail::ParseHHMM(timezone_string + 1, &zone_offset);
+      }
+
+      zone_offset = timezone_string[0] == '-' ? -zone_offset : zone_offset;
+      return ArrowTimeZone{OffsetZone(zone_offset)};
+    } else {
+      const time_zone* offset_zone = locate_zone(timezone_string);
+      return ArrowTimeZone{offset_zone};
+    }
   } catch (const std::runtime_error& ex) {

Review Comment:
   This `catch` statement should probably only apply to `locate_zone`, not the 
entire piece of code.



##########
cpp/src/arrow/compute/kernels/temporal_internal.h:
##########
@@ -40,15 +39,42 @@ using arrow_vendored::date::year_month_day;
 using arrow_vendored::date::zoned_time;
 using std::chrono::duration_cast;
 
+using ArrowTimeZone = std::variant<const time_zone*, const OffsetZone>;
+
+template <class... Ts>
+struct overloads : Ts... {
+  using Ts::operator()...;
+};
+template <class... Ts>
+overloads(Ts...) -> overloads<Ts...>;
+
 inline int64_t GetQuarter(const year_month_day& ymd) {
   return static_cast<int64_t>((static_cast<uint32_t>(ymd.month()) - 1) / 3);
 }
 
-static inline Result<const time_zone*> LocateZone(const std::string& timezone) 
{
+static inline Result<const ArrowTimeZone> LocateZone(const std::string& 
timezone) {

Review Comment:
   You don't need a return value to be `const`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to