djnavarro commented on code in PR #12154:
URL: https://github.com/apache/arrow/pull/12154#discussion_r912591451


##########
r/tests/testthat/test-dplyr-funcs-datetime.R:
##########
@@ -2309,3 +2308,535 @@ test_that("build_formats() and 
build_format_from_order()", {
       "%y%b%d%H%M%S", "%Y%b%d%H%M%S")
   )
 })
+
+
+
+# tests for datetime rounding ---------------------------------------------
+
+# an easy date to avoid conflating tests of different things
+easy_date <- as.POSIXct("2022-10-11 12:00:00", tz = "UTC")
+easy_df <- tibble::tibble(datetime = easy_date)
+
+# dates near month boundaries over the course of 1 year
+month_boundaries <- c(
+  "2021-01-01 00:01:00", "2021-02-01 00:01:00", "2021-03-01 00:01:00",
+  "2021-04-01 00:01:00", "2021-05-01 00:01:00", "2021-06-01 00:01:00",
+  "2021-07-01 00:01:00", "2021-08-01 00:01:00", "2021-09-01 00:01:00",
+  "2021-10-01 00:01:00", "2021-11-01 00:01:00", "2021-12-01 00:01:00",
+  "2021-01-31 23:59:00", "2021-02-28 23:59:00", "2021-03-31 23:59:00",
+  "2021-04-30 23:59:00", "2021-05-31 23:59:00", "2021-06-30 23:59:00",
+  "2021-07-31 23:59:00", "2021-08-31 23:59:00", "2021-09-30 23:59:00",
+  "2021-10-31 23:59:00", "2021-11-30 23:59:00", "2021-12-31 23:59:00"
+)
+year_of_dates <- tibble::tibble(
+  datetime = as.POSIXct(month_boundaries, tz = "UTC"),
+  date = as.Date(datetime)
+)
+
+# test case used to check we catch week boundaries for all week_start values
+fortnight <- tibble::tibble(
+  date = as.Date(c(
+    "2022-04-04", # Monday
+    "2022-04-05", # Tuesday
+    "2022-04-06", # Wednesday
+    "2022-04-07", # Thursday
+    "2022-04-08", # Friday
+    "2022-04-09", # Saturday
+    "2022-04-10", # Sunday
+    "2022-04-11", # Monday
+    "2022-04-12", # Tuesday
+    "2022-04-13", # Wednesday
+    "2022-04-14", # Thursday
+    "2022-04-15", # Friday
+    "2022-04-16", # Saturday
+    "2022-04-17"  # Sunday
+  )),
+  datetime = as.POSIXct(date)
+)
+
+# test case to check we catch interval lower boundaries for ceiling_date
+boundary_times <- tibble::tibble(
+  datetime = as.POSIXct(strptime(c(
+    "2022-05-10 00:00:00", # boundary for week when week_start = 7 (Sunday)
+    "2022-05-11 00:00:00", # boundary for week when week_start = 1 (Monday)
+    "2022-05-12 00:00:00", # boundary for week when week_start = 2 (Tuesday)
+    "2022-03-10 00:00:00", # boundary for day, hour, minute, second, 
millisecond
+    "2022-03-10 00:00:01", # boundary for second, millisecond
+    "2022-03-10 00:01:00", # boundary for second, millisecond, minute
+    "2022-03-10 01:00:00", # boundary for second, millisecond, minute, hour
+    "2022-01-01 00:00:00"  # boundary for year
+  ), tz = "UTC", format = "%F %T")),
+  date = as.Date(datetime)
+)
+
+# test case to check rounding takes place in local time
+datestrings <- c(
+  "1970-01-01T00:00:59.123456789", "2000-02-29T23:23:23.999999999",
+  "1899-01-01T00:59:20.001001001", "2033-05-18T03:33:20.000000000",
+  "2020-01-01T01:05:05.001", "2019-12-31T02:10:10.002",
+  "2019-12-30T03:15:15.003", "2009-12-31T04:20:20.004132",
+  "2010-01-01T05:25:25.005321", "2010-01-03T06:30:30.006163",
+  "2010-01-04T07:35:35", "2006-01-01T08:40:40", "2005-12-31T09:45:45",
+  "2008-12-28T00:00:00", "2008-12-29T00:00:00", "2012-01-01 01:02:03"
+)
+tz_times <- tibble::tibble(
+  utc_time = as.POSIXct(datestrings, tz = "UTC"),
+  syd_time = as.POSIXct(datestrings, tz = "Australia/Sydney"),   # UTC +10   
(UTC +11 with DST)
+  adl_time = as.POSIXct(datestrings, tz = "Australia/Adelaide"), # UTC +9:30 
(UTC +10:30 with DST)
+  mar_time = as.POSIXct(datestrings, tz = "Pacific/Marquesas"),  # UTC -9:30 
(no DST)
+  kat_time = as.POSIXct(datestrings, tz = "Asia/Kathmandu")      # UTC +5:45 
(no DST)
+)
+
+
+test_that("timestamp round/floor/ceiling works for a minimal test", {
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        round_datetime = round_date(datetime),
+        floor_datetime = floor_date(datetime),
+        ceiling_datetime = ceiling_date(datetime, change_on_boundary = FALSE)
+      ) %>%
+      collect(),
+    test_df
+  )
+})
+
+test_that("timestamp round/floor/ceiling accepts period unit abbreviation", {
+
+  # test helper to ensure standard abbreviations of period names
+  # are understood by arrow and mirror the lubridate behaviour
+  check_period_abbreviation <- function(unit, synonyms) {
+
+    # check arrow against lubridate
+    compare_dplyr_binding(
+      .input %>%
+        mutate(out_1 = round_date(datetime, unit)) %>%
+        collect(),
+      easy_df
+    )
+
+    # check synonyms
+    base <- call_binding("round_date", Expression$scalar(easy_date), unit)
+    for (syn in synonyms) {
+      expect_equal(
+        call_binding("round_date", Expression$scalar(easy_date), syn),
+        base
+      )
+    }
+  }
+
+  check_period_abbreviation("minute", synonyms = c("minutes", "min", "mins"))
+  check_period_abbreviation("second", synonyms = c("seconds", "sec", "secs"))
+  check_period_abbreviation("month", synonyms = c("months", "mon", "mons"))
+})
+
+
+test_that("temporal round/floor/ceiling accepts periods with multiple units", {
+
+  check_multiple_unit_period <- function(unit, multiplier) {
+    unit_string <- paste(multiplier, unit)
+    compare_dplyr_binding(
+      .input %>%
+        mutate(
+          round_datetime = round_date(datetime, unit_string),
+          floor_datetime = floor_date(datetime, unit_string),
+          ceiling_datetime = ceiling_date(datetime, unit_string)
+        ) %>%
+        collect(),
+      easy_df
+    )

Review Comment:
   Done! There are now tests for 7 seconds, 13 seconds, 7 minutes, 13 minutes, 
etc



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to