pitrou commented on a change in pull request #12464: URL: https://github.com/apache/arrow/pull/12464#discussion_r829168784
########## File path: cpp/src/arrow/compute/api_scalar.h ########## @@ -267,12 +267,17 @@ class ARROW_EXPORT StructFieldOptions : public FunctionOptions { class ARROW_EXPORT StrptimeOptions : public FunctionOptions { public: - explicit StrptimeOptions(std::string format, TimeUnit::type unit); + explicit StrptimeOptions(std::string format, TimeUnit::type unit, + bool raise_errors = true); StrptimeOptions(); static constexpr char const kTypeName[] = "StrptimeOptions"; + /// The desired format string. std::string format; + /// The desired time resolution TimeUnit::type unit; + /// Raise on parsing errors Review comment: Indeed, we may want to make this more descriptive, e.g. `bool error_is_null` or something. Opinions? ########## File path: cpp/src/arrow/compute/kernels/scalar_temporal_unary.cc ########## @@ -1143,6 +1145,117 @@ struct Strftime { }; #endif +// ---------------------------------------------------------------------- +// Convert string representations of timestamps in arbitrary format to timestamps + +static std::string GetZone(std::string format) { + // Check for use of %z or %Z + size_t cur = 0; + std::string zone = ""; + while (cur < format.size() - 1) { + if (format[cur] == '%') { + if (format[cur + 1] == 'z') { + zone = "UTC"; + break; + } + cur++; + } + cur++; + } + return zone; +} + +template <typename Duration, typename InType> +struct Strptime { + const std::shared_ptr<TimestampParser> parser; + const TimeUnit::type unit; + const std::string zone; + const bool raise_errors; + + static Result<Strptime> Make(KernelContext* ctx, const DataType& type) { + const StrptimeOptions& options = StrptimeState::Get(ctx); + + return Strptime{TimestampParser::MakeStrptime(options.format), + std::move(options.unit), GetZone(options.format), + options.raise_errors}; + } + + static Status Call(KernelContext* ctx, const Scalar& in, Scalar* out) { + ARROW_ASSIGN_OR_RAISE(auto self, Make(ctx, *in.type)); + + if (in.is_valid) { + auto s = internal::UnboxScalar<InType>::Unbox(in); + int64_t result; + if ((*self.parser)(s.data(), s.size(), self.unit, &result)) { + *checked_cast<TimestampScalar*>(out) = + TimestampScalar(result, timestamp(self.unit, self.zone)); + } else { + if (self.raise_errors) { + return Status::Invalid("Failed to parse string: '", s.data(), + "' as a scalar of type ", + TimestampType(self.unit).ToString()); + } else { + out->is_valid = false; + } + } + } else { + out->is_valid = false; + } + return Status::OK(); + } + + static Status Call(KernelContext* ctx, const ArrayData& in, ArrayData* out) { + ARROW_ASSIGN_OR_RAISE(auto self, Make(ctx, *in.type)); + + std::unique_ptr<ArrayBuilder> array_builder; Review comment: (probably `GetMutableValues`, but yes) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org