jonkeane commented on a change in pull request #10724:
URL: https://github.com/apache/arrow/pull/10724#discussion_r671549980



##########
File path: r/tests/testthat/test-dplyr.R
##########
@@ -1013,44 +1013,158 @@ test_that("log functions", {
   )
 
 })
-  
+
 test_that("trig functions", {
-  
+
   df <- tibble(x = c(seq(from = 0, to = 1, by = 0.1), NA))
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(y = sin(x)) %>%
       collect(),
     df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(y = cos(x)) %>%
       collect(),
     df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(y = tan(x)) %>%
       collect(),
     df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(y = asin(x)) %>%
       collect(),
     df
   )
-  
+
   expect_dplyr_equal(
     input %>%
       mutate(y = acos(x)) %>%
       collect(),
     df
   )
 
-})
\ No newline at end of file
+})
+
+test_that("if_else and ifelse", {
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(int > 5, 1, 0)
+      ) %>% collect(),
+    example_data
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(int > 5, int, 0L)
+      ) %>% collect(),
+    example_data
+  )
+
+  expect_error(
+    Table$create(example_data) %>%
+      mutate(
+        y = if_else(int > 5, 1, FALSE)
+      ) %>% collect(),
+    'NotImplemented: Function if_else has no kernel matching input types'
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(int > 5, 1, NA_real_)
+      ) %>% collect(),
+    example_data
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = ifelse(int > 5, 1, 0)
+      ) %>% collect(),
+    example_data
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(dbl > 5, TRUE, FALSE)
+      ) %>% collect(),
+    example_data
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(chr %in% letters[1:3], 1L, 3L)
+      ) %>% collect(),
+    example_data
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(int > 5, "one", "zero")
+      ) %>% collect(),
+    example_data
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(int > 5, chr, chr)
+      ) %>% collect(),
+    example_data
+  )
+
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(int > 5, chr, chr, missing = "MISSING")
+      ) %>% collect(),
+    example_data
+  )
+
+  # TODO: remove the mutate + warning after ARROW-13358 is merged and Arrow
+  # supports factors in if(_)else
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(int > 5, fct, factor("a"))
+      ) %>% collect() %>%
+      # This is a no-op on the Arrow side, but necesary to make the results 
equal
+      mutate(y = as.character(y)),
+    example_data,
+    warning = "Factors are currently converted to characters in if_else and 
ifelse"
+  )
+
+  skip("ARROW-12055 for better NaN support")
+  # currently NaNs are not NAs and so the missing argument is not correctly
+  # applied
+  expect_dplyr_equal(
+    input %>%
+      mutate(
+        y = if_else(dbl > 5, chr, chr, missing = "MISSING")
+      ) %>% collect(),
+    example_data_for_sorting
+  )

Review comment:
       ~Unfortunately~ _as it turns out_, no. I've created 
https://issues.apache.org/jira/browse/ARROW-13364 to track this, but Arrow's 
comparison with `NaN`s results in `false` and not an `NA`(-like) value: 
   
   ```
   > example_data_for_sorting %>% mutate(
   +     y = if_else(dbl > 5, chr, chr, missing = "MISSING")
   + ) %>% collect()
   # A tibble: 10 x 7
              int          dbl chr    lgl   dttm                grp   y        
            <int>        <dbl> <chr>  <lgl> <dttm>              <chr> <chr>    
    1 -2147483647 -Inf         ""     FALSE 0000-01-01 00:00:00 A     ""       
    2        -101   -1.80e+308 ""     FALSE 1919-05-29 13:08:55 A     ""       
    3        -100   -2.23e-308 "\""   FALSE 1955-06-20 04:10:42 A     "\""     
    4           0    0         "&"    FALSE 1973-06-30 11:38:41 A     "&"      
    5           0    2.23e-308 "ABC"  TRUE  1987-03-29 12:49:47 A     "ABC"    
    6           1    3.14e+  0 "NULL" TRUE  1991-06-11 19:07:01 B     "NULL"   
    7         100    1.80e+308 "a"    TRUE  NA                  B     "a"      
    8        1000  Inf         "abc"  TRUE  2017-08-21 18:26:40 B     "abc"    
    9  2147483647  NaN         "zzz"  TRUE  2017-08-21 18:26:40 B     "MISSING"
   10          NA   NA          NA    NA    9999-12-31 23:59:59 B     "MISSING"
   > Table$create(example_data_for_sorting) %>% mutate(
   +     y = if_else(dbl > 5, chr, chr, missing = "MISSING")
   + ) %>% collect()
   # A tibble: 10 x 7
              int          dbl chr    lgl   dttm                grp   y        
            <int>        <dbl> <chr>  <lgl> <dttm>              <chr> <chr>    
    1 -2147483647 -Inf         ""     FALSE 0000-01-01 00:00:00 A     ""       
    2        -101   -1.80e+308 ""     FALSE 1919-05-29 13:08:55 A     ""       
    3        -100   -2.23e-308 "\""   FALSE 1955-06-20 04:10:42 A     "\""     
    4           0    0         "&"    FALSE 1973-06-30 11:38:41 A     "&"      
    5           0    2.23e-308 "ABC"  TRUE  1987-03-29 12:49:47 A     "ABC"    
    6           1    3.14e+  0 "NULL" TRUE  1991-06-11 19:07:01 B     "NULL"   
    7         100    1.80e+308 "a"    TRUE  NA                  B     "a"      
    8        1000  Inf         "abc"  TRUE  2017-08-21 18:26:40 B     "abc"    
    9  2147483647  NaN         "zzz"  TRUE  2017-08-21 18:26:40 B     "zzz"    
   10          NA   NA          NA    NA    9999-12-31 23:59:59 B     "MISSING"
   ```
   
   That 9th row `NaN > 5` is evaluated to `NA` in R and therefore gets a 
missing value, where as in Arrow `NaN > 5` evaluates to `false` so we get the 
`"zzz"` from the `chr` column




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to