This is an automated email from the ASF dual-hosted git repository.

thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new bb4e492872 GH-49534: [R] Implement dplyr recode_values(), 
replace_values(), and replace_when() (#49536)
bb4e492872 is described below

commit bb4e492872c3112f68897e0046eae4088f15eff9
Author: Nic Crane <[email protected]>
AuthorDate: Tue Apr 7 20:27:13 2026 +0100

    GH-49534: [R] Implement dplyr recode_values(), replace_values(), and 
replace_when() (#49536)
    
    ### Rationale for this change
    
    Implement new dplyr functions
    
    ### What changes are included in this PR?
    
    Implement them
    
    ### Are these changes tested?
    
    Yeah
    
    ### Are there any user-facing changes?
    
    Moar functions
    
    ### AI Use
    
    Code generated using Claude, with plenty of input from me. I've gone 
through it in detail and refactored lots, but it needs a last pass before it's 
ready for review.
    
    * GitHub Issue: #49534
    
    Lead-authored-by: Nic Crane <[email protected]>
    Co-authored-by: Jonathan Keane <[email protected]>
    Signed-off-by: Nic Crane <[email protected]>
---
 r/NAMESPACE                                     |   1 +
 r/R/arrow-package.R                             |   2 +-
 r/R/dplyr-funcs-conditional.R                   | 260 +++++++++++++++++++---
 r/R/dplyr-funcs-doc.R                           |   5 +-
 r/man/acero.Rd                                  |   7 +-
 r/man/read_json_arrow.Rd                        |   2 +-
 r/man/schema.Rd                                 |   2 +-
 r/tests/testthat/test-dplyr-funcs-conditional.R | 281 +++++++++++++++++++++++-
 8 files changed, 521 insertions(+), 39 deletions(-)

diff --git a/r/NAMESPACE b/r/NAMESPACE
index 9dbbad9c45..f42944fb58 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -432,6 +432,7 @@ importFrom(bit64,str.integer64)
 importFrom(glue,glue)
 importFrom(methods,as)
 importFrom(purrr,as_mapper)
+importFrom(purrr,compact)
 importFrom(purrr,flatten)
 importFrom(purrr,imap)
 importFrom(purrr,imap_chr)
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 3964feb74d..9de7afe022 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -18,7 +18,7 @@
 #' @importFrom stats quantile median na.omit na.exclude na.pass na.fail
 #' @importFrom R6 R6Class
 #' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dbl map_dfr 
map_int map_lgl keep imap imap_chr
-#' @importFrom purrr flatten reduce walk
+#' @importFrom purrr compact flatten reduce walk
 #' @importFrom assertthat assert_that is.string
 #' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null 
enquos is_integerish quos quo
 #' @importFrom rlang eval_tidy new_data_mask syms env new_environment env_bind 
set_names exec
diff --git a/r/R/dplyr-funcs-conditional.R b/r/R/dplyr-funcs-conditional.R
index a7df2e8f0d..b2d5a6ac79 100644
--- a/r/R/dplyr-funcs-conditional.R
+++ b/r/R/dplyr-funcs-conditional.R
@@ -15,6 +15,169 @@
 # specific language governing permissions and limitations
 # under the License.
 
+#' Parse logical condition formulas
+#'
+#' Converts condition ~ value formulas into Arrow expressions. Unlike
+#' [parse_value_mapping()], the LHS must be a logical expression (not a value
+#' to match against).
+#'
+#' @param formulas A list of two-sided formulas where LHS is a logical 
condition
+#'   and RHS is the value to use when TRUE (e.g., `x > 5 ~ "high"`).
+#' @param mask The data mask for evaluating formula expressions.
+#'
+#' @return A list with `query` (list of logical expressions) and `value`
+#'   (list of replacement expressions).
+#'
+#' @keywords internal
+#' @noRd
+parse_condition_formulas <- function(formulas, mask) {
+  fn <- call_name(rlang::caller_call())
+  # Compact NULL entries (allows conditional formulas like: if (cond) x ~ y)
+  formulas <- compact(formulas)
+  n <- length(formulas)
+  query <- vector("list", n)
+  value <- vector("list", n)
+  # Process each formula: condition ~ value
+  for (i in seq_len(n)) {
+    f <- formulas[[i]]
+    if (!is_formula(f, lhs = TRUE)) {
+      validation_error(paste0("Each argument to ", fn, "() must be a two-sided 
formula"))
+    }
+    # f[[2]] is LHS (logical condition), f[[3]] is RHS (value when TRUE)
+    query[[i]] <- arrow_eval(f[[2]], mask)
+    value[[i]] <- arrow_eval(f[[3]], mask)
+    # Validate LHS is logical (unlike parse_value_mapping which does equality 
matching)
+    if (!call_binding("is.logical", query[[i]])) {
+      validation_error(paste0("Left side of each formula in ", fn, "() must be 
a logical expression"))
+    }
+  }
+  list(query = query, value = value)
+}
+
+#' Create case_when Expression from query/value lists
+#' @param query List of logical Arrow Expressions.
+#' @param value List of value Arrow Expressions.
+#' @return An Arrow Expression representing the case_when.
+#' @keywords internal
+#' @noRd
+build_case_when_expr <- function(query, value) {
+  Expression$create(
+    "case_when",
+    args = c(
+      Expression$create(
+        "make_struct",
+        args = query,
+        options = list(field_names = as.character(seq_along(query)))
+      ),
+      value
+    )
+  )
+}
+
+#' Build a match expression for x against a value (scalar, NA, or vector).
+#' @param x Arrow Expression for the column to match against.
+#' @param match_value Value to match - R scalar, vector, or NA. Expressions
+#'   are compared with equality.
+#' @return Arrow Expression that is TRUE when x matches match_value.
+#' @keywords internal
+#' @noRd
+build_match_expr <- function(x, match_value) {
+  # Expressions or length-1 non-NA: use equality directly
+  if (inherits(match_value, "Expression") || length(match_value) == 1 && 
!is.na(match_value)) {
+    return(x == match_value)
+  }
+
+  # R scalar NA requires is.na() since x == NA returns NA in Arrow
+  if (length(match_value) == 1) {
+    return(call_binding("is.na", x))
+  }
+
+  # R vector: use %in%, handling NA separately if present
+  has_na <- any(is.na(match_value))
+  non_na_values <- match_value[!is.na(match_value)]
+
+  if (length(non_na_values) == 0) {
+    call_binding("is.na", x)
+  } else if (has_na) {
+    call_binding("%in%", x, non_na_values) | call_binding("is.na", x)
+  } else {
+    call_binding("%in%", x, match_value)
+  }
+}
+
+#' Build query/value lists from parallel from/to vectors.
+#' NA values in `from` use is.na() for matching.
+#' @param x Arrow Expression for the column to match against.
+#' @param from Vector of values to match.
+#' @param to Vector of replacement values (recycled to length of `from`).
+#' @return list(query, value) for use with build_case_when_expr().
+#' @keywords internal
+#' @noRd
+parse_from_to_mapping <- function(x, from, to) {
+  n <- length(from)
+  to <- vctrs::vec_recycle(to, n)
+  query <- map(from, ~ build_match_expr(x, .x))
+  value <- map(to, Expression$scalar)
+  list(query = query, value = value)
+}
+
+#' Build query/value lists from value ~ replacement formulas.
+#' NA values on LHS use is.na() for matching.
+#' @param x Arrow Expression for the column to match against.
+#' @param formulas List of two-sided formulas (value ~ replacement).
+#' @param mask Data mask for evaluating formula expressions.
+#' @param fn Calling function name (for error messages).
+#' @return list(query, value) for use with build_case_when_expr().
+#' @keywords internal
+#' @noRd
+parse_formula_mapping <- function(x, formulas, mask, fn) {
+  # Compact NULL entries (allows conditional formulas like: if (cond) x ~ y)
+  formulas <- compact(formulas)
+  n <- length(formulas)
+  query <- vector("list", n)
+  value <- vector("list", n)
+  for (i in seq_len(n)) {
+    f <- formulas[[i]]
+    if (!is_formula(f, lhs = TRUE)) {
+      validation_error(paste0("Each argument to ", fn, "() must be a two-sided 
formula"))
+    }
+    # f[[2]] is LHS (value to match), f[[3]] is RHS (replacement)
+    lhs <- arrow_eval(f[[2]], mask)
+    query[[i]] <- build_match_expr(x, lhs)
+    value[[i]] <- arrow_eval(f[[3]], mask)
+  }
+  list(query = query, value = value)
+}
+
+#' Dispatch to formula or from/to parser based on which args are provided.
+#' Returns list(query, value) or NULL if no mappings.
+#' @param x Arrow Expression for the column to match against.
+#' @param formulas List of two-sided formulas (value ~ replacement).
+#' @param from Vector of values to match (alternative to formulas).
+#' @param to Vector of replacement values (used with `from`).
+#' @param mask The data mask for evaluating formula expressions.
+#' @keywords internal
+#' @noRd
+parse_value_mapping <- function(x, formulas = list(), from = NULL, to = NULL, 
mask) {
+  fn <- call_name(rlang::caller_call())
+  # Mutually exclusive interfaces
+  if (length(formulas) > 0 && !is.null(from)) {
+    validation_error(paste0("Can't use both `...` and `from`/`to` in ", fn, 
"()"))
+  }
+
+  if (length(formulas) > 0) {
+    parse_formula_mapping(x, formulas, mask, fn)
+  } else if (!is.null(from)) {
+    if (is.null(to)) {
+      validation_error("`to` must be provided when using `from`")
+    }
+    parse_from_to_mapping(x, from, to)
+  } else {
+    # No mappings provided
+    NULL
+  }
+}
+
 register_bindings_conditional <- function() {
   register_binding("%in%", function(x, table) {
     # We use `is_in` here, unlike with Arrays, which use `is_in_meta_binary`
@@ -133,44 +296,79 @@ register_bindings_conditional <- function() {
       }
 
       formulas <- list2(...)
-      n <- length(formulas)
-      if (n == 0) {
+      if (length(formulas) == 0) {
         validation_error("No cases provided")
       }
-      query <- vector("list", n)
-      value <- vector("list", n)
-      mask <- caller_env()
-      for (i in seq_len(n)) {
-        f <- formulas[[i]]
-        if (!inherits(f, "formula")) {
-          validation_error("Each argument to case_when() must be a two-sided 
formula")
-        }
-        query[[i]] <- arrow_eval(f[[2]], mask)
-        value[[i]] <- arrow_eval(f[[3]], mask)
-        if (!call_binding("is.logical", query[[i]])) {
-          validation_error("Left side of each formula in case_when() must be a 
logical expression")
-        }
-      }
+      parsed <- parse_condition_formulas(formulas, caller_env())
+      query <- parsed$query
+      value <- parsed$value
       if (!is.null(.default)) {
         if (length(.default) != 1) {
-          validation_error(paste0("`.default` must have size 1, not size ", 
length(.default), "."))
+          arrow_not_supported("`.default` must be size 1; vectors of length > 
1")
         }
-
-        query[n + 1] <- TRUE
-        value[n + 1] <- .default
+        n <- length(query)
+        query[[n + 1]] <- TRUE
+        value[[n + 1]] <- .default
       }
-      Expression$create(
-        "case_when",
-        args = c(
-          Expression$create(
-            "make_struct",
-            args = query,
-            options = list(field_names = as.character(seq_along(query)))
-          ),
-          value
-        )
-      )
+      build_case_when_expr(query, value)
     },
     notes = "`.ptype` and `.size` arguments not supported"
   )
+
+  register_binding("dplyr::replace_when", function(x, ...) {
+    formulas <- list2(...)
+    if (length(formulas) == 0) {
+      return(x)
+    }
+    parsed <- parse_condition_formulas(formulas, caller_env())
+    query <- parsed$query
+    value <- parsed$value
+    n <- length(query)
+    query[[n + 1]] <- TRUE
+    value[[n + 1]] <- x
+    build_case_when_expr(query, value)
+  })
+
+  register_binding("dplyr::replace_values", function(x, ..., from = NULL, to = 
NULL) {
+    parsed <- parse_value_mapping(x, list2(...), from, to, caller_env())
+    if (is.null(parsed)) {
+      return(x)
+    }
+    query <- parsed$query
+    value <- parsed$value
+    n <- length(query)
+    query[[n + 1]] <- TRUE
+    value[[n + 1]] <- x
+    build_case_when_expr(query, value)
+  })
+
+  register_binding(
+    "dplyr::recode_values",
+    function(x, ..., from = NULL, to = NULL, default = NULL, unmatched = 
"default", ptype = NULL) {
+      if (!is.null(ptype)) {
+        arrow_not_supported("`recode_values()` with `ptype` specified")
+      }
+      if (unmatched != "default") {
+        arrow_not_supported('`recode_values()` with `unmatched` other than 
"default"')
+      }
+
+      parsed <- parse_value_mapping(x, list2(...), from, to, caller_env())
+      if (is.null(parsed)) {
+        validation_error("`...` can't be empty")
+      }
+      query <- parsed$query
+      value <- parsed$value
+
+      if (!is.null(default)) {
+        if (length(default) != 1) {
+          arrow_not_supported("`default` must be size 1; vectors of length > 
1")
+        }
+        n <- length(query)
+        query[[n + 1]] <- TRUE
+        value[[n + 1]] <- Expression$scalar(default)
+      }
+      build_case_when_expr(query, value)
+    },
+    notes = "`ptype` argument and `unmatched = \"error\"` not supported"
+  )
 }
diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
index e0b3dd095c..f7ca29833c 100644
--- a/r/R/dplyr-funcs-doc.R
+++ b/r/R/dplyr-funcs-doc.R
@@ -21,7 +21,7 @@
 #'
 #' The `arrow` package contains methods for 38 `dplyr` table functions, many of
 #' which are "verbs" that do transformations to one or more tables.
-#' The package also has mappings of 226 R functions to the corresponding
+#' The package also has mappings of 229 R functions to the corresponding
 #' functions in the Arrow compute library. These allow you to write code inside
 #' of `dplyr` methods that call R functions, including many in packages like
 #' `stringr` and `lubridate`, and they will get translated to Arrow and run
@@ -214,6 +214,9 @@
 #' * [`if_else()`][dplyr::if_else()]
 #' * [`n()`][dplyr::n()]
 #' * [`n_distinct()`][dplyr::n_distinct()]
+#' * [`recode_values()`][dplyr::recode_values()]: `ptype` argument and 
`unmatched = "error"` not supported
+#' * [`replace_values()`][dplyr::replace_values()]
+#' * [`replace_when()`][dplyr::replace_when()]
 #' * [`when_all()`][dplyr::when_all()]
 #' * [`when_any()`][dplyr::when_any()]
 #'
diff --git a/r/man/acero.Rd b/r/man/acero.Rd
index a43617493a..f721aa5d9f 100644
--- a/r/man/acero.Rd
+++ b/r/man/acero.Rd
@@ -9,7 +9,7 @@
 \description{
 The \code{arrow} package contains methods for 38 \code{dplyr} table functions, 
many of
 which are "verbs" that do transformations to one or more tables.
-The package also has mappings of 226 R functions to the corresponding
+The package also has mappings of 229 R functions to the corresponding
 functions in the Arrow compute library. These allow you to write code inside
 of \code{dplyr} methods that call R functions, including many in packages like
 \code{stringr} and \code{lubridate}, and they will get translated to Arrow and 
run
@@ -72,7 +72,7 @@ can assume that the function works in Acero just as it does 
in R.
 Functions can be called either as \code{pkg::fun()} or just \code{fun()}, i.e. 
both
 \code{str_sub()} and \code{stringr::str_sub()} work.
 
-In addition to these functions, you can call any of Arrow's 281 compute
+In addition to these functions, you can call any of Arrow's 253 compute
 functions directly. Arrow has many functions that don't map to an existing R
 function. In other cases where there is an R function mapping, you can still
 call the Arrow function directly if you don't want the adaptations that the R
@@ -207,6 +207,9 @@ Valid values are "s", "ms" (default), "us", "ns".
 \item \code{\link[dplyr:if_else]{if_else()}}
 \item \code{\link[dplyr:context]{n()}}
 \item \code{\link[dplyr:n_distinct]{n_distinct()}}
+\item \code{\link[dplyr:recode-and-replace-values]{recode_values()}}: 
\code{ptype} argument and \code{unmatched = "error"} not supported
+\item \code{\link[dplyr:recode-and-replace-values]{replace_values()}}
+\item \code{\link[dplyr:case-and-replace-when]{replace_when()}}
 \item \code{\link[dplyr:when-any-all]{when_all()}}
 \item \code{\link[dplyr:when-any-all]{when_any()}}
 }
diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd
index abf6b8fc44..b809a63bcc 100644
--- a/r/man/read_json_arrow.Rd
+++ b/r/man/read_json_arrow.Rd
@@ -54,7 +54,7 @@ If \code{schema} is not provided, Arrow data types are 
inferred from the data:
 \item JSON numbers convert to \code{\link[=int64]{int64()}}, falling back to 
\code{\link[=float64]{float64()}} if a non-integer is encountered.
 \item JSON strings of the kind "YYYY-MM-DD" and "YYYY-MM-DD hh:mm:ss" convert 
to \code{\link[=timestamp]{timestamp(unit = "s")}},
 falling back to \code{\link[=utf8]{utf8()}} if a conversion error occurs.
-\item JSON arrays convert to a \code{\link[vctrs:list_of]{vctrs::list_of()}} 
type, and inference proceeds recursively on the JSON arrays' values.
+\item JSON arrays convert to a \code{\link[=list_of]{list_of()}} type, and 
inference proceeds recursively on the JSON arrays' values.
 \item Nested JSON objects convert to a \code{\link[=struct]{struct()}} type, 
and inference proceeds recursively on the JSON objects' values.
 }
 
diff --git a/r/man/schema.Rd b/r/man/schema.Rd
index ff77a05d84..65ab2eea0d 100644
--- a/r/man/schema.Rd
+++ b/r/man/schema.Rd
@@ -7,7 +7,7 @@
 schema(...)
 }
 \arguments{
-\item{...}{\link[vctrs:fields]{fields}, field name/\link[=data-type]{data 
type} pairs (or a list of), or object from which to extract
+\item{...}{\link[=field]{fields}, field name/\link[=data-type]{data type} 
pairs (or a list of), or object from which to extract
 a schema}
 }
 \description{
diff --git a/r/tests/testthat/test-dplyr-funcs-conditional.R 
b/r/tests/testthat/test-dplyr-funcs-conditional.R
index d99843ab9d..f7d5b4d6b9 100644
--- a/r/tests/testthat/test-dplyr-funcs-conditional.R
+++ b/r/tests/testthat/test-dplyr-funcs-conditional.R
@@ -296,8 +296,8 @@ test_that("case_when()", {
   )
   expect_arrow_eval_error(
     case_when(int > 5 ~ 1, .default = c(0, 1)),
-    "`.default` must have size 1, not size 2",
-    class = "validation_error"
+    "`.default` must be size 1; vectors of length > 1 not supported in Arrow",
+    class = "arrow_not_supported"
   )
 
   expect_arrow_eval_error(
@@ -599,3 +599,280 @@ test_that("when_all()", {
     class = "arrow_not_supported"
   )
 })
+
+test_that("replace_when()", {
+  # replaces matching values, keeps original otherwise
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = replace_when(int, int > 5 ~ 100L)) |>
+      collect(),
+    tbl
+  )
+
+  # multiple conditions
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = replace_when(int, int > 7 ~ 100L, int < 3 ~ 0L)) |>
+      collect(),
+    tbl
+  )
+
+  # overlapping conditions - first match wins
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = replace_when(int, int > 3 ~ 100L, int > 5 ~ 200L)) |>
+      collect(),
+    tbl
+  )
+
+  # no formulas returns x unchanged
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = replace_when(int)) |>
+      collect(),
+    tbl
+  )
+
+  # Conditions on LHS of formulas are compacted out
+  condition <- FALSE
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = replace_when(int, if (condition) int > 5 ~ 100L, int < 3 
~ 0L)) |>
+      collect(),
+    tbl
+  )
+
+  # validation errors
+  expect_arrow_eval_error(
+    replace_when(int, TRUE),
+    "Each argument to replace_when\\(\\) must be a two-sided formula",
+    class = "validation_error"
+  )
+  expect_arrow_eval_error(
+    replace_when(int, ~100L),
+    "Each argument to replace_when\\(\\) must be a two-sided formula",
+    class = "validation_error"
+  )
+  expect_arrow_eval_error(
+    replace_when(int, 0L ~ 100L),
+    "Left side of each formula in replace_when\\(\\) must be a logical 
expression",
+    class = "validation_error"
+  )
+})
+
+test_that("replace_values()", {
+  # formula interface
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = replace_values(chr, "a" ~ "A", "b" ~ "B")) |>
+      collect(),
+    tbl
+  )
+
+  # from/to interface
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = replace_values(chr, from = c("a", "b"), to = c("A", 
"B"))) |>
+      collect(),
+    tbl
+  )
+
+  # from/to with list of vectors - multiple values map to single replacement
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = replace_values(chr, from = list(c("a", "b"), "c"), to = 
c("AB", "C"))) |>
+      collect(),
+    tbl
+  )
+
+  # unmatched values kept
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = replace_values(chr, "a" ~ "A")) |>
+      collect(),
+    tbl
+  )
+
+  # works with numeric values
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = replace_values(int, 1L ~ 100L, 2L ~ 200L)) |>
+      collect(),
+    tbl
+  )
+
+  # explicit NA matching with formula
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = replace_values(chr, "a" ~ "A", NA ~ "missing")) |>
+      collect(),
+    tbl
+  )
+
+  # explicit NA matching with from/to
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = replace_values(chr, from = c("a", NA), to = c("A", 
"missing"))) |>
+      collect(),
+    tbl
+  )
+
+  # multiple values on LHS matches any
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = replace_values(chr, c("a", "b") ~ "AB")) |>
+      collect(),
+    tbl
+  )
+
+  # multiple values on LHS including NA matches any including NA
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = replace_values(chr, c(NA, "a") ~ "matched")) |>
+      collect(),
+    tbl
+  )
+
+  # from/to with list containing NA matches NA too
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = replace_values(chr, from = list(c(NA, "a"), "b"), to = 
c("matched", "B"))) |>
+      collect(),
+    tbl
+  )
+
+  # no replacements returns x unchanged
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = replace_values(chr)) |>
+      collect(),
+    tbl
+  )
+
+  # validation errors
+  expect_arrow_eval_error(
+    replace_values(chr, "A"),
+    "Each argument to replace_values\\(\\) must be a two-sided formula",
+    class = "validation_error"
+  )
+  expect_arrow_eval_error(
+    replace_values(chr, ~"A"),
+    "Each argument to replace_values\\(\\) must be a two-sided formula",
+    class = "validation_error"
+  )
+  expect_arrow_eval_error(
+    replace_values(chr, "a" ~ "A", from = "b"),
+    "Can't use both `...` and `from`/`to` in replace_values\\(\\)",
+    class = "validation_error"
+  )
+  expect_arrow_eval_error(
+    replace_values(chr, from = "a"),
+    "`to` must be provided when using `from`",
+    class = "validation_error"
+  )
+})
+
+test_that("recode_values()", {
+  # formula interface with default NA
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = recode_values(chr, "a" ~ "A", "b" ~ "B")) |>
+      collect(),
+    tbl
+  )
+
+  # from/to interface
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = recode_values(chr, from = c("a", "b"), to = c("A", 
"B"))) |>
+      collect(),
+    tbl
+  )
+
+  # from/to with list of vectors - multiple values map to single replacement
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = recode_values(chr, from = list(c("a", "b"), "c"), to = 
c("AB", "C"))) |>
+      collect(),
+    tbl
+  )
+
+  # custom default
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = recode_values(chr, "a" ~ "A", default = "other")) |>
+      collect(),
+    tbl
+  )
+
+  # works with numeric values
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = recode_values(int, 1L ~ 100L, 2L ~ 200L)) |>
+      collect(),
+    tbl
+  )
+
+  # NA input with default - NA also becomes default
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = recode_values(chr, "a" ~ "A", "b" ~ "B", default = 
"other")) |>
+      collect(),
+    tbl
+  )
+
+  # multiple values on LHS matches any
+  compare_dplyr_binding(
+    .input |>
+      mutate(result = recode_values(chr, c("a", "b") ~ "AB", default = 
"other")) |>
+      collect(),
+    tbl
+  )
+
+  # validation errors
+  expect_arrow_eval_error(
+    recode_values(chr),
+    "`\\.\\.\\.` can't be empty",
+    class = "validation_error"
+  )
+  expect_arrow_eval_error(
+    recode_values(chr, "A"),
+    "Each argument to recode_values\\(\\) must be a two-sided formula",
+    class = "validation_error"
+  )
+  expect_arrow_eval_error(
+    recode_values(chr, ~"A"),
+    "Each argument to recode_values\\(\\) must be a two-sided formula",
+    class = "validation_error"
+  )
+  expect_arrow_eval_error(
+    recode_values(chr, "a" ~ "A", from = "b"),
+    "Can't use both `...` and `from`/`to` in recode_values\\(\\)",
+    class = "validation_error"
+  )
+  expect_arrow_eval_error(
+    recode_values(chr, from = "a"),
+    "`to` must be provided when using `from`",
+    class = "validation_error"
+  )
+  expect_arrow_eval_error(
+    recode_values(chr, "a" ~ "A", ptype = character()),
+    "`recode_values\\(\\)` with `ptype` specified not supported in Arrow",
+    class = "arrow_not_supported"
+  )
+  expect_arrow_eval_error(
+    recode_values(chr, "a" ~ "A", unmatched = "error"),
+    "`recode_values\\(\\)` with `unmatched` other than \"default\" not 
supported in Arrow",
+    class = "arrow_not_supported"
+  )
+  expect_arrow_eval_error(
+    recode_values(chr, "a" ~ "A", unmatched = "wat"),
+    "`recode_values\\(\\)` with `unmatched` other than \"default\" not 
supported in Arrow",
+    class = "arrow_not_supported"
+  )
+  expect_arrow_eval_error(
+    recode_values(chr, "a" ~ "A", default = c("x", "y")),
+    "`default` must be size 1; vectors of length > 1 not supported in Arrow",
+    class = "arrow_not_supported"
+  )
+})

Reply via email to