jorisvandenbossche commented on a change in pull request #11358: URL: https://github.com/apache/arrow/pull/11358#discussion_r744856366
########## File path: cpp/src/arrow/compute/kernels/scalar_string_test.cc ########## @@ -1757,6 +1758,24 @@ TYPED_TEST(TestStringKernels, Strptime) { std::string output1 = R"(["2020-05-01", null, "1900-12-11"])"; StrptimeOptions options("%m/%d/%Y", TimeUnit::MICRO); this->CheckUnary("strptime", input1, timestamp(TimeUnit::MICRO), output1, &options); + + input1 = R"(["5/1/2020 %z", null, "12/11/1900 %z"])"; + options.format = "%m/%d/%Y %%z"; + this->CheckUnary("strptime", input1, timestamp(TimeUnit::MICRO), output1, &options); +} + +TYPED_TEST(TestStringKernels, StrptimeZoneOffset) { + if (!arrow::internal::kStrptimeSupportsZone) { + GTEST_SKIP() << "strptime does not support %z on this platform"; + } + // N.B. BSD strptime only supports (+/-)HHMM and not the wider range + // of values GNU strptime supports. + std::string input1 = R"(["5/1/2020 +0100", null, "12/11/1900 -0130"])"; Review comment: Also add the case of parsing a string with trailing `Z`? That now seems to work with this PR: ``` In [26]: pc.strptime(["2012-01-01 09:00:00Z"], format="%Y-%m-%d %H:%M:%S%z", unit="s") Out[26]: <pyarrow.lib.TimestampArray object at 0x7fb709a65760> [ 2012-01-01 09:00:00 ] ``` ########## File path: cpp/src/arrow/compute/kernels/scalar_cast_test.cc ########## @@ -1870,7 +1870,32 @@ TEST(Cast, StringToTimestamp) { } } - // NOTE: timestamp parsing is tested comprehensively in parsing-util-test.cc + auto zoned = ArrayFromJSON(string_type, + R"(["2020-02-29T00:00:00Z", "2020-03-02T10:11:12+0102"])"); + auto mixed = ArrayFromJSON(string_type, + R"(["2020-03-02T10:11:12+0102", "2020-02-29T00:00:00"])"); + + // Timestamp with zone offset should not parse as naive + CheckCastFails(zoned, CastOptions::Safe(timestamp(TimeUnit::SECOND))); + + // Mixed zoned/unzoned should not parse as naive + CheckCastFails(mixed, CastOptions::Safe(timestamp(TimeUnit::SECOND))); + EXPECT_RAISES_WITH_MESSAGE_THAT( + Invalid, ::testing::HasSubstr("expected no zone offset"), + Cast(mixed, CastOptions::Safe(timestamp(TimeUnit::SECOND)))); + + // ...or as timestamp with timezone + EXPECT_RAISES_WITH_MESSAGE_THAT( + Invalid, ::testing::HasSubstr("expected a zone offset"), + Cast(mixed, CastOptions::Safe(timestamp(TimeUnit::SECOND, "UTC")))); Review comment: Also test `strings` here for the case of fully unzoned? (although in code that probably triggers the same check as the mixed case?) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org