jonkeane commented on a change in pull request #12319:
URL: https://github.com/apache/arrow/pull/12319#discussion_r819969875



##########
File path: r/tests/testthat/test-dplyr-funcs-type.R
##########
@@ -843,3 +843,90 @@ test_that("as.Date() converts successfully from date, 
timestamp, integer, char a
     test_df
   )
 })
+
+test_that("format date/time", {
+  skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168
+
+  times <- tibble(
+    datetime = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = 
"Pacific/Marquesas"), NA),
+    date = c(as.Date("2021-01-01"), NA)
+  )
+  formats <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %z %Z %j %U %W %x %X %% 
%G %V %u"
+  formats_date <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %j %U %W %x %X %% %G 
%V %u"
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = format(datetime, format = formats)) %>%
+      collect(),
+    times
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = format(date, format = formats_date)) %>%
+      collect(),
+    times
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = format(datetime, format = formats, tz = "Europe/Bucharest")) 
%>%
+      collect(),
+    times
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = format(datetime, format = formats, tz = "EST", usetz = TRUE)) 
%>%
+      collect(),
+    times
+  )
+
+  withr::with_timezone(
+    "Pacific/Marquesas",
+    {
+      compare_dplyr_binding(
+        .input %>%
+          mutate(
+            x = format(datetime, format = formats, tz = "EST"),
+            x_date = format(date, format = formats_date, tz = "EST")
+          ) %>%
+          collect(),
+        times
+      )
+
+      compare_dplyr_binding(
+        .input %>%
+          mutate(
+            x = format(datetime, format = formats),
+            x_date = format(date, format = formats_date)
+          ) %>%
+          collect(),
+        times
+      )
+    }
+  )
+})

Review comment:
       It might be nice to also test `format(1)` or the like for if someone 
provides an R object in the pipeline.

##########
File path: r/tests/testthat/test-dplyr-funcs-type.R
##########
@@ -843,3 +843,90 @@ test_that("as.Date() converts successfully from date, 
timestamp, integer, char a
     test_df
   )
 })
+
+test_that("format date/time", {
+  skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168
+
+  times <- tibble(
+    datetime = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = 
"Pacific/Marquesas"), NA),
+    date = c(as.Date("2021-01-01"), NA)
+  )
+  formats <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %z %Z %j %U %W %x %X %% 
%G %V %u"
+  formats_date <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %j %U %W %x %X %% %G 
%V %u"
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = format(datetime, format = formats)) %>%
+      collect(),
+    times
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = format(date, format = formats_date)) %>%
+      collect(),
+    times
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = format(datetime, format = formats, tz = "Europe/Bucharest")) 
%>%
+      collect(),
+    times
+  )
+
+  compare_dplyr_binding(
+    .input %>%
+      mutate(x = format(datetime, format = formats, tz = "EST", usetz = TRUE)) 
%>%
+      collect(),
+    times
+  )
+
+  withr::with_timezone(
+    "Pacific/Marquesas",
+    {
+      compare_dplyr_binding(
+        .input %>%
+          mutate(
+            x = format(datetime, format = formats, tz = "EST"),
+            x_date = format(date, format = formats_date, tz = "EST")
+          ) %>%
+          collect(),
+        times
+      )
+
+      compare_dplyr_binding(
+        .input %>%
+          mutate(
+            x = format(datetime, format = formats),
+            x_date = format(date, format = formats_date)
+          ) %>%
+          collect(),
+        times
+      )
+    }
+  )
+})
+
+test_that("format() for unsupported types returns the input as string", {
+  expect_equal(
+    example_data %>%
+      record_batch() %>%
+      mutate(x = format(int, trim = TRUE)) %>%
+      collect(),
+    example_data %>%
+      record_batch() %>%
+      mutate(x = as.character(int)) %>%
+      collect()
+  )
+  expect_equal(
+    example_data %>%
+      record_batch() %>%
+      mutate(y = format(dbl, nsmall = 3)) %>%
+      collect(),
+    example_data %>%
+      record_batch() %>%
+      mutate(y = as.character(dbl)) %>%
+      collect()
+  )
+})

Review comment:
       Could we have one of these two use `arrow_table()` instead of 
`record_batch()` just to confirm that path way works too?

##########
File path: r/R/dplyr-funcs-type.R
##########
@@ -292,3 +293,18 @@ register_bindings_type_elementwise <- function() {
     is_inf & !call_binding("is.na", is_inf)
   })
 }
+
+register_bindings_type_format <- function() {
+  register_binding("format", function(x, ...) {
+    if (!inherits(x, "Expression")) {
+      return(format(x, ...))
+    }
+
+    if (inherits(x, "Expression") &&
+        x$type_id() %in% Type[c("TIMESTAMP", "DATE32", "DATE64")]) {
+      binding_format_datetime(x, ...)
+    } else {
+      x$cast(string())

Review comment:
       Should we use `build_expr("cast", x, ...)` here (and then we can likely 
get rid of the bit about that returns early if x isn't an expression, I 
think...)?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to