thisisnic commented on a change in pull request #10191: URL: https://github.com/apache/arrow/pull/10191#discussion_r631305122
########## File path: r/tests/testthat/test-dplyr-string-functions.R ########## @@ -342,99 +342,73 @@ test_that("arrow_*_split_whitespace functions", { collect(), tibble(x = list(c("Foo\u00A0and", "bar"), c("baz\u2006and\u1680qux\u3000and", "quux"))) ) - }) test_that("errors and warnings in string splitting", { - df <- tibble(x = c("Foo and bar", "baz and qux and quux")) - # These conditions generate an error, but abandon_ship() catches the error, - # issues a warning, and pulls the data into R - expect_warning( - df %>% - Table$create() %>% - mutate(x = strsplit(x, "and.*", fixed = FALSE)) %>% - collect(), - regexp = "not supported" + # issues a warning, and pulls the data into R (if computing on InMemoryDataset) + # Elsewhere we test that abandon_ship() works, + # so here we can just call the functions directly + + x <- Expression$field_ref("x") + expect_error( + nse_funcs$strsplit(x, "and.*", fixed = FALSE), + 'Regular expression matching in strsplit() not supported by Arrow', + fixed = TRUE ) - expect_warning( - df %>% - Table$create() %>% - mutate(x = str_split(x, "and.?")) %>% - collect() + expect_error( + nse_funcs$str_split(x, "and.?"), + 'Regular expression matching in str_split() not supported by Arrow', + fixed = TRUE ) - expect_warning( - df %>% - Table$create() %>% - mutate(x = str_split(x, regex("and.?"), n = 2)) %>% - collect(), - regexp = "not supported" + expect_error( + nse_funcs$str_split(x, regex("and.*")), + 'Regular expression matching in str_split() not supported by Arrow', + fixed = TRUE ) - expect_warning( - df %>% - Table$create() %>% - mutate(x = str_split(x, fixed("and", ignore_case = TRUE))) %>% - collect(), - "not supported" + expect_error( + nse_funcs$str_split(x, fixed("and", ignore_case = TRUE)), + "Case-insensitive string splitting not supported by Arrow" ) - expect_warning( - df %>% - Table$create() %>% - mutate(x = str_split(x, coll("and.?"))) %>% - collect(), - regexp = "not supported" + expect_error( + nse_funcs$str_split(x, coll("and.?")), + "Pattern modifier `coll()` not supported by Arrow", + fixed = TRUE ) - expect_warning( - df %>% - Table$create() %>% - mutate(x = str_split(x, boundary(type = "word"))) %>% - collect(), - regexp = "not supported" + expect_error( + nse_funcs$str_split(x, boundary(type = "word")), + "Pattern modifier `boundary()` not supported by Arrow", + fixed = TRUE Review comment: The new approach to defining these functions makes it a lot clearer what each test is testing instead of needing all the extra dplyr code; nice. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org