pitrou commented on a change in pull request #12464:
URL: https://github.com/apache/arrow/pull/12464#discussion_r829168784



##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -267,12 +267,17 @@ class ARROW_EXPORT StructFieldOptions : public 
FunctionOptions {
 
 class ARROW_EXPORT StrptimeOptions : public FunctionOptions {
  public:
-  explicit StrptimeOptions(std::string format, TimeUnit::type unit);
+  explicit StrptimeOptions(std::string format, TimeUnit::type unit,
+                           bool raise_errors = true);
   StrptimeOptions();
   static constexpr char const kTypeName[] = "StrptimeOptions";
 
+  /// The desired format string.
   std::string format;
+  /// The desired time resolution
   TimeUnit::type unit;
+  /// Raise on parsing errors

Review comment:
       Indeed, we may want to make this more descriptive, e.g. `bool 
error_is_null` or something. Opinions?

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc
##########
@@ -1143,6 +1145,117 @@ struct Strftime {
 };
 #endif
 
+// ----------------------------------------------------------------------
+// Convert string representations of timestamps in arbitrary format to 
timestamps
+
+static std::string GetZone(std::string format) {
+  // Check for use of %z or %Z
+  size_t cur = 0;
+  std::string zone = "";
+  while (cur < format.size() - 1) {
+    if (format[cur] == '%') {
+      if (format[cur + 1] == 'z') {
+        zone = "UTC";
+        break;
+      }
+      cur++;
+    }
+    cur++;
+  }
+  return zone;
+}
+
+template <typename Duration, typename InType>
+struct Strptime {
+  const std::shared_ptr<TimestampParser> parser;
+  const TimeUnit::type unit;
+  const std::string zone;
+  const bool raise_errors;
+
+  static Result<Strptime> Make(KernelContext* ctx, const DataType& type) {
+    const StrptimeOptions& options = StrptimeState::Get(ctx);
+
+    return Strptime{TimestampParser::MakeStrptime(options.format),
+                    std::move(options.unit), GetZone(options.format),
+                    options.raise_errors};
+  }
+
+  static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) {
+    ARROW_ASSIGN_OR_RAISE(auto self, Make(ctx, *in.type));
+
+    if (in.is_valid) {
+      auto s = internal::UnboxScalar<InType>::Unbox(in);
+      int64_t result;
+      if ((*self.parser)(s.data(), s.size(), self.unit, &result)) {
+        *checked_cast<TimestampScalar*>(out) =
+            TimestampScalar(result, timestamp(self.unit, self.zone));
+      } else {
+        if (self.raise_errors) {
+          return Status::Invalid("Failed to parse string: '", s.data(),
+                                 "' as a scalar of type ",
+                                 TimestampType(self.unit).ToString());
+        } else {
+          out->is_valid = false;
+        }
+      }
+    } else {
+      out->is_valid = false;
+    }
+    return Status::OK();
+  }
+
+  static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) {
+    ARROW_ASSIGN_OR_RAISE(auto self, Make(ctx, *in.type));
+
+    std::unique_ptr<ArrayBuilder> array_builder;

Review comment:
       (probably `GetMutableValues`, but yes)




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to