jonkeane commented on a change in pull request #10724:
URL: https://github.com/apache/arrow/pull/10724#discussion_r671549980
##########
File path: r/tests/testthat/test-dplyr.R
##########
@@ -1013,44 +1013,158 @@ test_that("log functions", {
)
})
-
+
test_that("trig functions", {
-
+
df <- tibble(x = c(seq(from = 0, to = 1, by = 0.1), NA))
-
+
expect_dplyr_equal(
input %>%
mutate(y = sin(x)) %>%
collect(),
df
)
-
+
expect_dplyr_equal(
input %>%
mutate(y = cos(x)) %>%
collect(),
df
)
-
+
expect_dplyr_equal(
input %>%
mutate(y = tan(x)) %>%
collect(),
df
)
-
+
expect_dplyr_equal(
input %>%
mutate(y = asin(x)) %>%
collect(),
df
)
-
+
expect_dplyr_equal(
input %>%
mutate(y = acos(x)) %>%
collect(),
df
)
-})
\ No newline at end of file
+})
+
+test_that("if_else and ifelse", {
+ expect_dplyr_equal(
+ input %>%
+ mutate(
+ y = if_else(int > 5, 1, 0)
+ ) %>% collect(),
+ example_data
+ )
+
+ expect_dplyr_equal(
+ input %>%
+ mutate(
+ y = if_else(int > 5, int, 0L)
+ ) %>% collect(),
+ example_data
+ )
+
+ expect_error(
+ Table$create(example_data) %>%
+ mutate(
+ y = if_else(int > 5, 1, FALSE)
+ ) %>% collect(),
+ 'NotImplemented: Function if_else has no kernel matching input types'
+ )
+
+ expect_dplyr_equal(
+ input %>%
+ mutate(
+ y = if_else(int > 5, 1, NA_real_)
+ ) %>% collect(),
+ example_data
+ )
+
+ expect_dplyr_equal(
+ input %>%
+ mutate(
+ y = ifelse(int > 5, 1, 0)
+ ) %>% collect(),
+ example_data
+ )
+
+ expect_dplyr_equal(
+ input %>%
+ mutate(
+ y = if_else(dbl > 5, TRUE, FALSE)
+ ) %>% collect(),
+ example_data
+ )
+
+ expect_dplyr_equal(
+ input %>%
+ mutate(
+ y = if_else(chr %in% letters[1:3], 1L, 3L)
+ ) %>% collect(),
+ example_data
+ )
+
+ expect_dplyr_equal(
+ input %>%
+ mutate(
+ y = if_else(int > 5, "one", "zero")
+ ) %>% collect(),
+ example_data
+ )
+
+ expect_dplyr_equal(
+ input %>%
+ mutate(
+ y = if_else(int > 5, chr, chr)
+ ) %>% collect(),
+ example_data
+ )
+
+ expect_dplyr_equal(
+ input %>%
+ mutate(
+ y = if_else(int > 5, chr, chr, missing = "MISSING")
+ ) %>% collect(),
+ example_data
+ )
+
+ # TODO: remove the mutate + warning after ARROW-13358 is merged and Arrow
+ # supports factors in if(_)else
+ expect_dplyr_equal(
+ input %>%
+ mutate(
+ y = if_else(int > 5, fct, factor("a"))
+ ) %>% collect() %>%
+ # This is a no-op on the Arrow side, but necesary to make the results
equal
+ mutate(y = as.character(y)),
+ example_data,
+ warning = "Factors are currently converted to characters in if_else and
ifelse"
+ )
+
+ skip("ARROW-12055 for better NaN support")
+ # currently NaNs are not NAs and so the missing argument is not correctly
+ # applied
+ expect_dplyr_equal(
+ input %>%
+ mutate(
+ y = if_else(dbl > 5, chr, chr, missing = "MISSING")
+ ) %>% collect(),
+ example_data_for_sorting
+ )
Review comment:
~Unfortunately~ _as it turns out_, no. I've created
https://issues.apache.org/jira/browse/ARROW-13364 to track this, but Arrow's
comparison with `NaN`s results in `false` and not an `NA`(-like) value:
```
> example_data_for_sorting %>% mutate(
+ y = if_else(dbl > 5, chr, chr, missing = "MISSING")
+ ) %>% collect()
# A tibble: 10 x 7
int dbl chr lgl dttm grp y
<int> <dbl> <chr> <lgl> <dttm> <chr> <chr>
1 -2147483647 -Inf "" FALSE 0000-01-01 00:00:00 A ""
2 -101 -1.80e+308 "" FALSE 1919-05-29 13:08:55 A ""
3 -100 -2.23e-308 "\"" FALSE 1955-06-20 04:10:42 A "\""
4 0 0 "&" FALSE 1973-06-30 11:38:41 A "&"
5 0 2.23e-308 "ABC" TRUE 1987-03-29 12:49:47 A "ABC"
6 1 3.14e+ 0 "NULL" TRUE 1991-06-11 19:07:01 B "NULL"
7 100 1.80e+308 "a" TRUE NA B "a"
8 1000 Inf "abc" TRUE 2017-08-21 18:26:40 B "abc"
9 2147483647 NaN "zzz" TRUE 2017-08-21 18:26:40 B "MISSING"
10 NA NA NA NA 9999-12-31 23:59:59 B "MISSING"
> Table$create(example_data_for_sorting) %>% mutate(
+ y = if_else(dbl > 5, chr, chr, missing = "MISSING")
+ ) %>% collect()
# A tibble: 10 x 7
int dbl chr lgl dttm grp y
<int> <dbl> <chr> <lgl> <dttm> <chr> <chr>
1 -2147483647 -Inf "" FALSE 0000-01-01 00:00:00 A ""
2 -101 -1.80e+308 "" FALSE 1919-05-29 13:08:55 A ""
3 -100 -2.23e-308 "\"" FALSE 1955-06-20 04:10:42 A "\""
4 0 0 "&" FALSE 1973-06-30 11:38:41 A "&"
5 0 2.23e-308 "ABC" TRUE 1987-03-29 12:49:47 A "ABC"
6 1 3.14e+ 0 "NULL" TRUE 1991-06-11 19:07:01 B "NULL"
7 100 1.80e+308 "a" TRUE NA B "a"
8 1000 Inf "abc" TRUE 2017-08-21 18:26:40 B "abc"
9 2147483647 NaN "zzz" TRUE 2017-08-21 18:26:40 B "zzz"
10 NA NA NA NA 9999-12-31 23:59:59 B "MISSING"
```
That 9th row `NaN > 5` is evaluated to `NA` in R and therefore gets a
missing value, where as in Arrow `NaN > 5` evaluates to `false` so we get the
`"zzz"` from the `chr` column
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]