jonkeane commented on a change in pull request #12154:
URL: https://github.com/apache/arrow/pull/12154#discussion_r786804593



##########
File path: r/tests/testthat/test-dplyr-funcs-datetime.R
##########
@@ -616,3 +616,292 @@ test_that("extract yday from date", {
     test_df
   )
 })
+
+test_that("round/floor/ceiling on datetime (to nearest second)", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        out_1 = round_date(datetime),
+        out_2 = floor_date(datetime),
+        out_3 = ceiling_date(datetime),
+      ) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("period unit abbreviation", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        out_1 = round_date(datetime, "minute"),
+        out_2 = round_date(datetime, "minutes"),
+        out_3 = round_date(datetime, "mins"),
+      ) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("period unit extracts integer multiples", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        out_1 = round_date(datetime, "1 minute"),
+        out_2 = round_date(datetime, "2 minutes"),
+        out_3 = round_date(datetime, "10 minutes")
+      ) %>%
+      collect(),
+    test_df
+  )
+})
+
+# lubridate errors when 60 sec/60 min/24 hour thresholds exceeded.
+# this test checks that arrow does too.
+test_that("period unit maxima are enforced", {
+
+  expect_error(suppressWarnings( # <- hack
+    test_df %>%
+      arrow_table() %>%
+      mutate(out = round_date(datetime, "61 seconds")) %>%
+      collect()
+  ))
+
+  expect_error(suppressWarnings(
+    test_df %>%
+      arrow_table() %>%
+      mutate(out = round_date(datetime, "61 minutes")) %>%
+      collect()
+  ))
+
+  expect_error(suppressWarnings(
+    test_df %>%
+      arrow_table() %>%
+      mutate(out = round_date(datetime, "25 hours")) %>%
+      collect()
+  ))
+
+})
+
+test_that("datetime rounding between 1sec and 1day", {
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        out_1 = round_date(datetime, "second"),
+        out_2 = round_date(datetime, "minute"),
+        out_3 = round_date(datetime, "hour"),
+        out_4 = round_date(datetime, "day")
+      ) %>%
+      collect(),
+    test_df
+  )
+})
+
+# lubridate doesn't accept millisecond, microsecond or nanosecond descriptors:
+# instead it supports corresponding fractions of 1 second. these tests added to
+# that arrow verify that fractional second inputs to arrow mirror lubridate
+
+test_that("datetime rounding below 1sec", {
+
+  expect_equal(
+    test_df %>%
+      arrow_table() %>%
+      mutate(out = round_date(datetime, ".001 second")) %>%
+      collect(),
+
+    test_df %>%
+      arrow_table() %>%
+      mutate(out = round_date(datetime, "1 millisecond")) %>%
+      collect()
+  )
+
+  expect_equal(
+    test_df %>%
+      arrow_table() %>%
+      mutate(out = round_date(datetime, ".000001 second")) %>%
+      collect(),
+
+    test_df %>%
+      arrow_table() %>%
+      mutate(out = round_date(datetime, "1 microsecond")) %>%
+      collect()
+  )
+
+  expect_equal(
+    test_df %>%
+      arrow_table() %>%
+      mutate(out = round_date(datetime, ".000000001 second")) %>%
+      collect(),
+
+    test_df %>%
+      arrow_table() %>%
+      mutate(out = round_date(datetime, "1 nanosecond")) %>%
+      collect()
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        out_1 = round_date(datetime, ".01 second"),
+        out_2 = round_date(datetime, ".001 second"),
+        out_3 = round_date(datetime, ".00001 second")
+      ) %>%
+      collect(),
+    test_df
+  )
+})
+
+
+# a simplified test case using UTC timezone only
+test_df_v2 <- tibble::tibble(
+  datetime = c(as.POSIXct("2017-01-01 00:00:11.3456789", tz = "UTC"), NA),
+  date = c(as.Date("2021-09-09"), NA),
+  integer = 1:2
+)
+
+test_that("datetime round/floor/ceil to month/quarter/year", {
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        out_1 = round_date(datetime, "month"),
+        out_2 = round_date(datetime, "quarter"),
+        out_3 = round_date(datetime, "year"),
+      ) %>%
+      collect(),
+    test_df_v2
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        out_1 = floor_date(datetime, "month"),
+        out_2 = floor_date(datetime, "quarter"),
+        out_3 = floor_date(datetime, "year"),
+      ) %>%
+      collect(),
+    test_df_v2
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        out_1 = ceiling_date(datetime, "month"),
+        out_2 = ceiling_date(datetime, "quarter"),
+        out_3 = ceiling_date(datetime, "year"),
+      ) %>%
+      collect(),
+    test_df_v2
+  )
+})
+
+# NOTE: the hard coding of week_starts = 4 needs to be fixed. round_temporal()
+# treats 1970-01-01 as the beginning of week 1, i.e., week_starts on a Thursday
+
+# NOTE: arrow dplyr binding for ceiling_date() does not force dates up to the
+# next date. the logic mirrors lubridate prior to v1.6.0 (change_on_boundary = 
FALSE).
+# I'm not 100% sold on this implementation, but it's not obviously terrible
+
+test_that("datetime round/floor/ceil to week", {
+
+  expect_equal(
+    test_df_v2 %>%
+      arrow_table() %>%
+      mutate(out = round_date(datetime, "1 week")) %>%
+      collect(),
+    test_df_v2 %>%
+      mutate(out = round_date(datetime, "1 week", week_start = 4))
+  )
+
+  expect_equal(
+    test_df_v2 %>%
+      arrow_table() %>%
+      mutate(out = ceiling_date(datetime, "1 week")) %>%
+      collect(),
+    test_df_v2 %>%
+      mutate(out = ceiling_date(datetime, "1 week", week_start = 4, 
change_on_boundary = FALSE))
+  )
+
+  expect_equal(
+    test_df_v2 %>%
+      arrow_table() %>%
+      mutate(out = floor_date(datetime, "1 week")) %>%
+      collect(),
+    test_df_v2 %>%
+      mutate(out = floor_date(datetime, "1 week", week_start = 4))
+  )
+})
+
+# NOTE: lubridate::round_date() sometimes coerces output from Date to POSIXct.
+# this is not the default for the round_temporal() function in libarrow, which
+# is type stable: timestamps stay timestamps, and date32 stays date32. the
+# current implementation preserves the type stability property. consequently
+# there are edge cases where the arrow dplyr binding will not precisely mirror
+# the lubridate original. with that in mind, all tests for date32 rounding 
coerce
+# the lubridate equivalent back to Date

Review comment:
       Yup, I agree here. It might be nice to add a small comment in those test 
cases about this (so future-us remembers why we have `as.Date()` at the end 
there




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to