This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 3c7a0cad0e ARROW-16395: [R] Implement lubridate's parsers with year, 
month, and day, hour, minute, and second components (#13627)
3c7a0cad0e is described below

commit 3c7a0cad0e25ed66e4c555d9da49f320f803573c
Author: Rok Mihevc <[email protected]>
AuthorDate: Fri Jul 22 21:02:53 2022 +0200

    ARROW-16395: [R] Implement lubridate's parsers with year, month, and day, 
hour, minute, and second components (#13627)
    
    This is to resolve 
[ARROW-16395](https://issues.apache.org/jira/browse/ARROW-16395).
    
    Authored-by: Rok <[email protected]>
    Signed-off-by: Dewey Dunnington <[email protected]>
---
 r/R/dplyr-funcs-datetime.R                   |  25 ++--
 r/tests/testthat/test-dplyr-funcs-datetime.R | 172 +++++++++++++++++++++++++++
 2 files changed, 190 insertions(+), 7 deletions(-)

diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
index fd705e4578..9a010452b8 100644
--- a/r/R/dplyr-funcs-datetime.R
+++ b/r/R/dplyr-funcs-datetime.R
@@ -580,12 +580,23 @@ register_bindings_datetime_parsers <- function() {
     }
   })
 
-  ymd_parser_vec <- c("ymd", "ydm", "mdy", "myd", "dmy", "dym", "ym", "my", 
"yq")
+  parser_vec <- c(
+    "ymd", "ydm", "mdy", "myd", "dmy", "dym", "ym", "my", "yq",
+    "ymd_HMS", "ymd_HM", "ymd_H", "dmy_HMS", "dmy_HM", "dmy_H",
+    "mdy_HMS", "mdy_HM", "mdy_H", "ydm_HMS", "ydm_HM", "ydm_H"
+  )
 
-  ymd_parser_map_factory <- function(order) {
+  parser_map_factory <- function(order) {
     force(order)
-    function(x, tz = NULL) {
-      parse_x <- call_binding("parse_date_time", x, order, tz)
+    function(x, quiet = TRUE, tz = NULL, locale = NULL, truncated = 0) {
+      if (!is.null(locale)) {
+        arrow_not_supported("`locale`")
+      }
+      # Parsers returning datetimes return UTC by default and never return 
dates.
+      if (is.null(tz) && nchar(order) > 3) {
+        tz <- "UTC"
+      }
+      parse_x <- call_binding("parse_date_time", x, order, tz, truncated, 
quiet)
       if (is.null(tz)) {
         # we cast so we can mimic the behaviour of the `tz` argument in 
lubridate
         # "If NULL (default), a Date object is returned. Otherwise a POSIXct 
with
@@ -596,10 +607,10 @@ register_bindings_datetime_parsers <- function() {
     }
   }
 
-  for (ymd_order in ymd_parser_vec) {
+  for (order in parser_vec) {
     register_binding(
-      paste0("lubridate::", ymd_order),
-      ymd_parser_map_factory(ymd_order)
+      paste0("lubridate::", tolower(order)),
+      parser_map_factory(order)
     )
   }
 
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R 
b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 1c6b73d7af..0c655e48bb 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -2222,6 +2222,26 @@ test_that("parse_date_time with hours, minutes and 
seconds components", {
     test_dates_times
   )
 
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        ymd_hms_dttm = ymd_hms(ymd_hms_string),
+        ymd_hm_dttm = ymd_hm(ymd_hm_string),
+        ymd_h_dttm = ymd_h(ymd_h_string),
+        dmy_hms_dttm = dmy_hms(dmy_hms_string),
+        dmy_hm_dttm = dmy_hm(dmy_hm_string),
+        dmy_h_dttm = dmy_h(dmy_h_string),
+        mdy_hms_dttm = mdy_hms(mdy_hms_string),
+        mdy_hm_dttm = mdy_hm(mdy_hm_string),
+        mdy_h_dttm = mdy_h(mdy_h_string),
+        ydm_hms_dttm = ydm_hms(ydm_hms_string),
+        ydm_hm_dttm = ydm_hm(ydm_hm_string),
+        ydm_h_dttm = ydm_h(ydm_h_string)
+      ) %>%
+      collect(),
+    test_dates_times
+  )
+
   # parse_date_time with timezone
   pm_tz <- "Pacific/Marquesas"
   compare_dplyr_binding(
@@ -2244,6 +2264,46 @@ test_that("parse_date_time with hours, minutes and 
seconds components", {
     test_dates_times
   )
 
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        ymd_hms_dttm = ymd_hms(ymd_hms_string, tz = pm_tz),
+        ymd_hm_dttm = ymd_hm(ymd_hm_string, tz = pm_tz),
+        ymd_h_dttm = ymd_h(ymd_h_string, tz = pm_tz),
+        dmy_hms_dttm = dmy_hms(dmy_hms_string, tz = pm_tz),
+        dmy_hm_dttm = dmy_hm(dmy_hm_string, tz = pm_tz),
+        dmy_h_dttm = dmy_h(dmy_h_string, tz = pm_tz),
+        mdy_hms_dttm = mdy_hms(mdy_hms_string, tz = pm_tz),
+        mdy_hm_dttm = mdy_hm(mdy_hm_string, tz = pm_tz),
+        mdy_h_dttm = mdy_h(mdy_h_string, tz = pm_tz),
+        ydm_hms_dttm = ydm_hms(ydm_hms_string, tz = pm_tz),
+        ydm_hm_dttm = ydm_hm(ydm_hm_string, tz = pm_tz),
+        ydm_h_dttm = ydm_h(ydm_h_string, tz = pm_tz),
+      ) %>%
+      collect(),
+    test_dates_times
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        ymd_hms_dttm = ymd_hms("2022-07-19 20:24:43"),
+        ymd_hm_dttm = ymd_hm("2022-07-19 20:24"),
+        ymd_h_dttm = ymd_h("2022-07-19 20"),
+        dmy_hms_dttm = dmy_hms("19-07-2022 20:24:43"),
+        dmy_hm_dttm = dmy_hm("19-07-2022 20:24"),
+        dmy_h_dttm = dmy_h("19-07-2022 20"),
+        mdy_hms_dttm = mdy_hms("07-19-2022 20:24:43"),
+        mdy_hm_dttm = mdy_hm("07-19-2022 20:24"),
+        mdy_h_dttm = mdy_h("07-19-2022 20"),
+        ydm_hms_dttm = ydm_hms("2022-19-07 20:24:43"),
+        ydm_hm_dttm = ydm_hm("2022-19-07 20:24"),
+        ydm_h_dttm = ydm_h("2022-19-07 20")
+      ) %>%
+      collect(),
+    test_dates_times
+  )
+
   # test ymd_ims
   compare_dplyr_binding(
     .input %>%
@@ -2319,12 +2379,58 @@ test_that("parse_date_time with month names and HMS", {
       collect(),
     test_dates_times2
   )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        ymd_hms_dttm = ymd_hms(ymd_hms_string),
+        ymd_hm_dttm  = ymd_hm(ymd_hm_string),
+        ymd_h_dttm   = ymd_h(ymd_h_string),
+        dmy_hms_dttm = dmy_hms(dmy_hms_string),
+        dmy_hm_dttm  = dmy_hm(dmy_hm_string),
+        dmy_h_dttm   = dmy_h(dmy_h_string),
+        mdy_hms_dttm = mdy_hms(mdy_hms_string),
+        mdy_hm_dttm  = mdy_hm(mdy_hm_string),
+        mdy_h_dttm   = mdy_h(mdy_h_string),
+        ydm_hms_dttm = ydm_hms(ydm_hms_string),
+        ydm_hm_dttm  = ydm_hm(ydm_hm_string),
+        ydm_h_dttm   = ydm_h(ydm_h_string)
+      ) %>%
+      collect(),
+    test_dates_times2
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        ymd_hms_dttm = ymd_hms("2022-June-19 20:24:43"),
+        ymd_hm_dttm = ymd_hm("2022-June-19 20:24"),
+        ymd_h_dttm = ymd_h("2022-June-19 20"),
+        dmy_hms_dttm = dmy_hms("19-June-2022 20:24:43"),
+        dmy_hm_dttm = dmy_hm("19-June-2022 20:24"),
+        dmy_h_dttm = dmy_h("19-June-2022 20"),
+        mdy_hms_dttm = mdy_hms("June-19-2022 20:24:43"),
+        mdy_hm_dttm = mdy_hm("June-19-2022 20:24"),
+        mdy_h_dttm = mdy_h("June-19-2022 20"),
+        ydm_hms_dttm = ydm_hms("2022-19-June 20:24:43"),
+        ydm_hm_dttm = ydm_hm("2022-19-June 20:24"),
+        ydm_h_dttm = ydm_h("2022-19-June 20")
+      ) %>%
+      collect(),
+    test_dates_times2
+  )
 })
 
 test_that("parse_date_time with `quiet = FALSE` not supported", {
   # we need expect_warning twice as both the arrow pipeline (because quiet =
   # FALSE is not supported) and the fallback dplyr/lubridate one throw
   # warnings (the lubridate one because quiet is FALSE)
+  # https://issues.apache.org/jira/browse/ARROW-17146
+
+  # these functions' internals use some string processing which requires the
+  # RE2 library (not available on Windows with R 3.6 & the minimal nightly 
builds)
+  skip_if_not_available("re2")
+
   expect_warning(
     expect_warning(
       tibble(x = c("2022-05-19 13:46:51")) %>%
@@ -2337,6 +2443,16 @@ test_that("parse_date_time with `quiet = FALSE` not 
supported", {
     ),
     "All formats failed to parse"
   )
+
+  expect_warning(
+    tibble(x = c("2022-05-19 13:46:51")) %>%
+      arrow_table() %>%
+      mutate(
+        x_dttm = ymd_hms(x, quiet = FALSE)
+      ) %>%
+      collect(),
+    "`quiet = FALSE` not supported in Arrow"
+  )
 })
 
 test_that("parse_date_time with truncated formats", {
@@ -2362,6 +2478,11 @@ test_that("parse_date_time with truncated formats", {
             truncated_ymd_string,
             orders = "ymd_HMS",
             truncated = 3
+          ),
+        dttm2 =
+          ymd_hms(
+            truncated_ymd_string,
+            truncated = 3
           )
       ) %>%
       collect(),
@@ -2383,6 +2504,37 @@ test_that("parse_date_time with truncated formats", {
     test_truncation_df,
     warning = "a value for `truncated` > 4 not supported in Arrow"
   )
+
+  # values for truncated greater than nchar(orders) - 3 not supported in Arrow
+  compare_dplyr_binding(
+    .input %>%
+      mutate(
+        dttm =
+          ymd_hms(
+            truncated_ymd_string,
+            truncated = 5
+          )
+      ) %>%
+      collect(),
+    test_truncation_df,
+    warning = "a value for `truncated` > 4 not supported in Arrow"
+  )
+})
+
+test_that("parse_date_time with `locale != NULL` not supported", {
+  # parse_date_time currently doesn't take locale paramete which will be
+  # addressed in https://issues.apache.org/jira/browse/ARROW-17147
+  skip_if_not_available("re2")
+
+  expect_warning(
+    tibble(x = c("2022-05-19 13:46:51")) %>%
+      arrow_table() %>%
+      mutate(
+        x_dttm = ymd_hms(x, locale = "C")
+      ) %>%
+      collect(),
+    "`locale` not supported in Arrow"
+  )
 })
 
 test_that("parse_date_time with `exact = TRUE`, and with regular R objects", {
@@ -2514,6 +2666,26 @@ test_that("build_formats() and 
build_format_from_order()", {
       "%y%b%d%H%M%S", "%Y%b%d%H%M%S"
     )
   )
+
+  expect_equal(
+    build_format_from_order("ymdHM"),
+    c(
+      "%y-%m-%d-%H-%M", "%Y-%m-%d-%H-%M", "%y-%B-%d-%H-%M",
+      "%Y-%B-%d-%H-%M", "%y-%b-%d-%H-%M", "%Y-%b-%d-%H-%M",
+      "%y%m%d%H%M", "%Y%m%d%H%M", "%y%B%d%H%M", "%Y%B%d%H%M",
+      "%y%b%d%H%M", "%Y%b%d%H%M"
+    )
+  )
+
+  expect_equal(
+    build_format_from_order("ymdH"),
+    c(
+      "%y-%m-%d-%H", "%Y-%m-%d-%H", "%y-%B-%d-%H",
+      "%Y-%B-%d-%H", "%y-%b-%d-%H", "%Y-%b-%d-%H",
+      "%y%m%d%H", "%Y%m%d%H", "%y%B%d%H", "%Y%B%d%H",
+      "%y%b%d%H", "%Y%b%d%H"
+    )
+  )
 })
 
 

Reply via email to