jonkeane commented on a change in pull request #12319: URL: https://github.com/apache/arrow/pull/12319#discussion_r819969875
########## File path: r/tests/testthat/test-dplyr-funcs-type.R ########## @@ -843,3 +843,90 @@ test_that("as.Date() converts successfully from date, timestamp, integer, char a test_df ) }) + +test_that("format date/time", { + skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168 + + times <- tibble( + datetime = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Pacific/Marquesas"), NA), + date = c(as.Date("2021-01-01"), NA) + ) + formats <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %z %Z %j %U %W %x %X %% %G %V %u" + formats_date <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %j %U %W %x %X %% %G %V %u" + + compare_dplyr_binding( + .input %>% + mutate(x = format(datetime, format = formats)) %>% + collect(), + times + ) + + compare_dplyr_binding( + .input %>% + mutate(x = format(date, format = formats_date)) %>% + collect(), + times + ) + + compare_dplyr_binding( + .input %>% + mutate(x = format(datetime, format = formats, tz = "Europe/Bucharest")) %>% + collect(), + times + ) + + compare_dplyr_binding( + .input %>% + mutate(x = format(datetime, format = formats, tz = "EST", usetz = TRUE)) %>% + collect(), + times + ) + + withr::with_timezone( + "Pacific/Marquesas", + { + compare_dplyr_binding( + .input %>% + mutate( + x = format(datetime, format = formats, tz = "EST"), + x_date = format(date, format = formats_date, tz = "EST") + ) %>% + collect(), + times + ) + + compare_dplyr_binding( + .input %>% + mutate( + x = format(datetime, format = formats), + x_date = format(date, format = formats_date) + ) %>% + collect(), + times + ) + } + ) +}) Review comment: It might be nice to also test `format(1)` or the like for if someone provides an R object in the pipeline. ########## File path: r/tests/testthat/test-dplyr-funcs-type.R ########## @@ -843,3 +843,90 @@ test_that("as.Date() converts successfully from date, timestamp, integer, char a test_df ) }) + +test_that("format date/time", { + skip_on_os("windows") # https://issues.apache.org/jira/browse/ARROW-13168 + + times <- tibble( + datetime = c(lubridate::ymd_hms("2018-10-07 19:04:05", tz = "Pacific/Marquesas"), NA), + date = c(as.Date("2021-01-01"), NA) + ) + formats <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %z %Z %j %U %W %x %X %% %G %V %u" + formats_date <- "%a %A %w %d %b %B %m %y %Y %H %I %p %M %j %U %W %x %X %% %G %V %u" + + compare_dplyr_binding( + .input %>% + mutate(x = format(datetime, format = formats)) %>% + collect(), + times + ) + + compare_dplyr_binding( + .input %>% + mutate(x = format(date, format = formats_date)) %>% + collect(), + times + ) + + compare_dplyr_binding( + .input %>% + mutate(x = format(datetime, format = formats, tz = "Europe/Bucharest")) %>% + collect(), + times + ) + + compare_dplyr_binding( + .input %>% + mutate(x = format(datetime, format = formats, tz = "EST", usetz = TRUE)) %>% + collect(), + times + ) + + withr::with_timezone( + "Pacific/Marquesas", + { + compare_dplyr_binding( + .input %>% + mutate( + x = format(datetime, format = formats, tz = "EST"), + x_date = format(date, format = formats_date, tz = "EST") + ) %>% + collect(), + times + ) + + compare_dplyr_binding( + .input %>% + mutate( + x = format(datetime, format = formats), + x_date = format(date, format = formats_date) + ) %>% + collect(), + times + ) + } + ) +}) + +test_that("format() for unsupported types returns the input as string", { + expect_equal( + example_data %>% + record_batch() %>% + mutate(x = format(int, trim = TRUE)) %>% + collect(), + example_data %>% + record_batch() %>% + mutate(x = as.character(int)) %>% + collect() + ) + expect_equal( + example_data %>% + record_batch() %>% + mutate(y = format(dbl, nsmall = 3)) %>% + collect(), + example_data %>% + record_batch() %>% + mutate(y = as.character(dbl)) %>% + collect() + ) +}) Review comment: Could we have one of these two use `arrow_table()` instead of `record_batch()` just to confirm that path way works too? ########## File path: r/R/dplyr-funcs-type.R ########## @@ -292,3 +293,18 @@ register_bindings_type_elementwise <- function() { is_inf & !call_binding("is.na", is_inf) }) } + +register_bindings_type_format <- function() { + register_binding("format", function(x, ...) { + if (!inherits(x, "Expression")) { + return(format(x, ...)) + } + + if (inherits(x, "Expression") && + x$type_id() %in% Type[c("TIMESTAMP", "DATE32", "DATE64")]) { + binding_format_datetime(x, ...) + } else { + x$cast(string()) Review comment: Should we use `build_expr("cast", x, ...)` here (and then we can likely get rid of the bit about that returns early if x isn't an expression, I think...)? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org