This is an automated email from the ASF dual-hosted git repository.
thisisnic pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new bb4e492872 GH-49534: [R] Implement dplyr recode_values(),
replace_values(), and replace_when() (#49536)
bb4e492872 is described below
commit bb4e492872c3112f68897e0046eae4088f15eff9
Author: Nic Crane <[email protected]>
AuthorDate: Tue Apr 7 20:27:13 2026 +0100
GH-49534: [R] Implement dplyr recode_values(), replace_values(), and
replace_when() (#49536)
### Rationale for this change
Implement new dplyr functions
### What changes are included in this PR?
Implement them
### Are these changes tested?
Yeah
### Are there any user-facing changes?
Moar functions
### AI Use
Code generated using Claude, with plenty of input from me. I've gone
through it in detail and refactored lots, but it needs a last pass before it's
ready for review.
* GitHub Issue: #49534
Lead-authored-by: Nic Crane <[email protected]>
Co-authored-by: Jonathan Keane <[email protected]>
Signed-off-by: Nic Crane <[email protected]>
---
r/NAMESPACE | 1 +
r/R/arrow-package.R | 2 +-
r/R/dplyr-funcs-conditional.R | 260 +++++++++++++++++++---
r/R/dplyr-funcs-doc.R | 5 +-
r/man/acero.Rd | 7 +-
r/man/read_json_arrow.Rd | 2 +-
r/man/schema.Rd | 2 +-
r/tests/testthat/test-dplyr-funcs-conditional.R | 281 +++++++++++++++++++++++-
8 files changed, 521 insertions(+), 39 deletions(-)
diff --git a/r/NAMESPACE b/r/NAMESPACE
index 9dbbad9c45..f42944fb58 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -432,6 +432,7 @@ importFrom(bit64,str.integer64)
importFrom(glue,glue)
importFrom(methods,as)
importFrom(purrr,as_mapper)
+importFrom(purrr,compact)
importFrom(purrr,flatten)
importFrom(purrr,imap)
importFrom(purrr,imap_chr)
diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R
index 3964feb74d..9de7afe022 100644
--- a/r/R/arrow-package.R
+++ b/r/R/arrow-package.R
@@ -18,7 +18,7 @@
#' @importFrom stats quantile median na.omit na.exclude na.pass na.fail
#' @importFrom R6 R6Class
#' @importFrom purrr as_mapper map map2 map_chr map2_chr map_dbl map_dfr
map_int map_lgl keep imap imap_chr
-#' @importFrom purrr flatten reduce walk
+#' @importFrom purrr compact flatten reduce walk
#' @importFrom assertthat assert_that is.string
#' @importFrom rlang list2 %||% is_false abort dots_n warn enquo quo_is_null
enquos is_integerish quos quo
#' @importFrom rlang eval_tidy new_data_mask syms env new_environment env_bind
set_names exec
diff --git a/r/R/dplyr-funcs-conditional.R b/r/R/dplyr-funcs-conditional.R
index a7df2e8f0d..b2d5a6ac79 100644
--- a/r/R/dplyr-funcs-conditional.R
+++ b/r/R/dplyr-funcs-conditional.R
@@ -15,6 +15,169 @@
# specific language governing permissions and limitations
# under the License.
+#' Parse logical condition formulas
+#'
+#' Converts condition ~ value formulas into Arrow expressions. Unlike
+#' [parse_value_mapping()], the LHS must be a logical expression (not a value
+#' to match against).
+#'
+#' @param formulas A list of two-sided formulas where LHS is a logical
condition
+#' and RHS is the value to use when TRUE (e.g., `x > 5 ~ "high"`).
+#' @param mask The data mask for evaluating formula expressions.
+#'
+#' @return A list with `query` (list of logical expressions) and `value`
+#' (list of replacement expressions).
+#'
+#' @keywords internal
+#' @noRd
+parse_condition_formulas <- function(formulas, mask) {
+ fn <- call_name(rlang::caller_call())
+ # Compact NULL entries (allows conditional formulas like: if (cond) x ~ y)
+ formulas <- compact(formulas)
+ n <- length(formulas)
+ query <- vector("list", n)
+ value <- vector("list", n)
+ # Process each formula: condition ~ value
+ for (i in seq_len(n)) {
+ f <- formulas[[i]]
+ if (!is_formula(f, lhs = TRUE)) {
+ validation_error(paste0("Each argument to ", fn, "() must be a two-sided
formula"))
+ }
+ # f[[2]] is LHS (logical condition), f[[3]] is RHS (value when TRUE)
+ query[[i]] <- arrow_eval(f[[2]], mask)
+ value[[i]] <- arrow_eval(f[[3]], mask)
+ # Validate LHS is logical (unlike parse_value_mapping which does equality
matching)
+ if (!call_binding("is.logical", query[[i]])) {
+ validation_error(paste0("Left side of each formula in ", fn, "() must be
a logical expression"))
+ }
+ }
+ list(query = query, value = value)
+}
+
+#' Create case_when Expression from query/value lists
+#' @param query List of logical Arrow Expressions.
+#' @param value List of value Arrow Expressions.
+#' @return An Arrow Expression representing the case_when.
+#' @keywords internal
+#' @noRd
+build_case_when_expr <- function(query, value) {
+ Expression$create(
+ "case_when",
+ args = c(
+ Expression$create(
+ "make_struct",
+ args = query,
+ options = list(field_names = as.character(seq_along(query)))
+ ),
+ value
+ )
+ )
+}
+
+#' Build a match expression for x against a value (scalar, NA, or vector).
+#' @param x Arrow Expression for the column to match against.
+#' @param match_value Value to match - R scalar, vector, or NA. Expressions
+#' are compared with equality.
+#' @return Arrow Expression that is TRUE when x matches match_value.
+#' @keywords internal
+#' @noRd
+build_match_expr <- function(x, match_value) {
+ # Expressions or length-1 non-NA: use equality directly
+ if (inherits(match_value, "Expression") || length(match_value) == 1 &&
!is.na(match_value)) {
+ return(x == match_value)
+ }
+
+ # R scalar NA requires is.na() since x == NA returns NA in Arrow
+ if (length(match_value) == 1) {
+ return(call_binding("is.na", x))
+ }
+
+ # R vector: use %in%, handling NA separately if present
+ has_na <- any(is.na(match_value))
+ non_na_values <- match_value[!is.na(match_value)]
+
+ if (length(non_na_values) == 0) {
+ call_binding("is.na", x)
+ } else if (has_na) {
+ call_binding("%in%", x, non_na_values) | call_binding("is.na", x)
+ } else {
+ call_binding("%in%", x, match_value)
+ }
+}
+
+#' Build query/value lists from parallel from/to vectors.
+#' NA values in `from` use is.na() for matching.
+#' @param x Arrow Expression for the column to match against.
+#' @param from Vector of values to match.
+#' @param to Vector of replacement values (recycled to length of `from`).
+#' @return list(query, value) for use with build_case_when_expr().
+#' @keywords internal
+#' @noRd
+parse_from_to_mapping <- function(x, from, to) {
+ n <- length(from)
+ to <- vctrs::vec_recycle(to, n)
+ query <- map(from, ~ build_match_expr(x, .x))
+ value <- map(to, Expression$scalar)
+ list(query = query, value = value)
+}
+
+#' Build query/value lists from value ~ replacement formulas.
+#' NA values on LHS use is.na() for matching.
+#' @param x Arrow Expression for the column to match against.
+#' @param formulas List of two-sided formulas (value ~ replacement).
+#' @param mask Data mask for evaluating formula expressions.
+#' @param fn Calling function name (for error messages).
+#' @return list(query, value) for use with build_case_when_expr().
+#' @keywords internal
+#' @noRd
+parse_formula_mapping <- function(x, formulas, mask, fn) {
+ # Compact NULL entries (allows conditional formulas like: if (cond) x ~ y)
+ formulas <- compact(formulas)
+ n <- length(formulas)
+ query <- vector("list", n)
+ value <- vector("list", n)
+ for (i in seq_len(n)) {
+ f <- formulas[[i]]
+ if (!is_formula(f, lhs = TRUE)) {
+ validation_error(paste0("Each argument to ", fn, "() must be a two-sided
formula"))
+ }
+ # f[[2]] is LHS (value to match), f[[3]] is RHS (replacement)
+ lhs <- arrow_eval(f[[2]], mask)
+ query[[i]] <- build_match_expr(x, lhs)
+ value[[i]] <- arrow_eval(f[[3]], mask)
+ }
+ list(query = query, value = value)
+}
+
+#' Dispatch to formula or from/to parser based on which args are provided.
+#' Returns list(query, value) or NULL if no mappings.
+#' @param x Arrow Expression for the column to match against.
+#' @param formulas List of two-sided formulas (value ~ replacement).
+#' @param from Vector of values to match (alternative to formulas).
+#' @param to Vector of replacement values (used with `from`).
+#' @param mask The data mask for evaluating formula expressions.
+#' @keywords internal
+#' @noRd
+parse_value_mapping <- function(x, formulas = list(), from = NULL, to = NULL,
mask) {
+ fn <- call_name(rlang::caller_call())
+ # Mutually exclusive interfaces
+ if (length(formulas) > 0 && !is.null(from)) {
+ validation_error(paste0("Can't use both `...` and `from`/`to` in ", fn,
"()"))
+ }
+
+ if (length(formulas) > 0) {
+ parse_formula_mapping(x, formulas, mask, fn)
+ } else if (!is.null(from)) {
+ if (is.null(to)) {
+ validation_error("`to` must be provided when using `from`")
+ }
+ parse_from_to_mapping(x, from, to)
+ } else {
+ # No mappings provided
+ NULL
+ }
+}
+
register_bindings_conditional <- function() {
register_binding("%in%", function(x, table) {
# We use `is_in` here, unlike with Arrays, which use `is_in_meta_binary`
@@ -133,44 +296,79 @@ register_bindings_conditional <- function() {
}
formulas <- list2(...)
- n <- length(formulas)
- if (n == 0) {
+ if (length(formulas) == 0) {
validation_error("No cases provided")
}
- query <- vector("list", n)
- value <- vector("list", n)
- mask <- caller_env()
- for (i in seq_len(n)) {
- f <- formulas[[i]]
- if (!inherits(f, "formula")) {
- validation_error("Each argument to case_when() must be a two-sided
formula")
- }
- query[[i]] <- arrow_eval(f[[2]], mask)
- value[[i]] <- arrow_eval(f[[3]], mask)
- if (!call_binding("is.logical", query[[i]])) {
- validation_error("Left side of each formula in case_when() must be a
logical expression")
- }
- }
+ parsed <- parse_condition_formulas(formulas, caller_env())
+ query <- parsed$query
+ value <- parsed$value
if (!is.null(.default)) {
if (length(.default) != 1) {
- validation_error(paste0("`.default` must have size 1, not size ",
length(.default), "."))
+ arrow_not_supported("`.default` must be size 1; vectors of length >
1")
}
-
- query[n + 1] <- TRUE
- value[n + 1] <- .default
+ n <- length(query)
+ query[[n + 1]] <- TRUE
+ value[[n + 1]] <- .default
}
- Expression$create(
- "case_when",
- args = c(
- Expression$create(
- "make_struct",
- args = query,
- options = list(field_names = as.character(seq_along(query)))
- ),
- value
- )
- )
+ build_case_when_expr(query, value)
},
notes = "`.ptype` and `.size` arguments not supported"
)
+
+ register_binding("dplyr::replace_when", function(x, ...) {
+ formulas <- list2(...)
+ if (length(formulas) == 0) {
+ return(x)
+ }
+ parsed <- parse_condition_formulas(formulas, caller_env())
+ query <- parsed$query
+ value <- parsed$value
+ n <- length(query)
+ query[[n + 1]] <- TRUE
+ value[[n + 1]] <- x
+ build_case_when_expr(query, value)
+ })
+
+ register_binding("dplyr::replace_values", function(x, ..., from = NULL, to =
NULL) {
+ parsed <- parse_value_mapping(x, list2(...), from, to, caller_env())
+ if (is.null(parsed)) {
+ return(x)
+ }
+ query <- parsed$query
+ value <- parsed$value
+ n <- length(query)
+ query[[n + 1]] <- TRUE
+ value[[n + 1]] <- x
+ build_case_when_expr(query, value)
+ })
+
+ register_binding(
+ "dplyr::recode_values",
+ function(x, ..., from = NULL, to = NULL, default = NULL, unmatched =
"default", ptype = NULL) {
+ if (!is.null(ptype)) {
+ arrow_not_supported("`recode_values()` with `ptype` specified")
+ }
+ if (unmatched != "default") {
+ arrow_not_supported('`recode_values()` with `unmatched` other than
"default"')
+ }
+
+ parsed <- parse_value_mapping(x, list2(...), from, to, caller_env())
+ if (is.null(parsed)) {
+ validation_error("`...` can't be empty")
+ }
+ query <- parsed$query
+ value <- parsed$value
+
+ if (!is.null(default)) {
+ if (length(default) != 1) {
+ arrow_not_supported("`default` must be size 1; vectors of length >
1")
+ }
+ n <- length(query)
+ query[[n + 1]] <- TRUE
+ value[[n + 1]] <- Expression$scalar(default)
+ }
+ build_case_when_expr(query, value)
+ },
+ notes = "`ptype` argument and `unmatched = \"error\"` not supported"
+ )
}
diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
index e0b3dd095c..f7ca29833c 100644
--- a/r/R/dplyr-funcs-doc.R
+++ b/r/R/dplyr-funcs-doc.R
@@ -21,7 +21,7 @@
#'
#' The `arrow` package contains methods for 38 `dplyr` table functions, many of
#' which are "verbs" that do transformations to one or more tables.
-#' The package also has mappings of 226 R functions to the corresponding
+#' The package also has mappings of 229 R functions to the corresponding
#' functions in the Arrow compute library. These allow you to write code inside
#' of `dplyr` methods that call R functions, including many in packages like
#' `stringr` and `lubridate`, and they will get translated to Arrow and run
@@ -214,6 +214,9 @@
#' * [`if_else()`][dplyr::if_else()]
#' * [`n()`][dplyr::n()]
#' * [`n_distinct()`][dplyr::n_distinct()]
+#' * [`recode_values()`][dplyr::recode_values()]: `ptype` argument and
`unmatched = "error"` not supported
+#' * [`replace_values()`][dplyr::replace_values()]
+#' * [`replace_when()`][dplyr::replace_when()]
#' * [`when_all()`][dplyr::when_all()]
#' * [`when_any()`][dplyr::when_any()]
#'
diff --git a/r/man/acero.Rd b/r/man/acero.Rd
index a43617493a..f721aa5d9f 100644
--- a/r/man/acero.Rd
+++ b/r/man/acero.Rd
@@ -9,7 +9,7 @@
\description{
The \code{arrow} package contains methods for 38 \code{dplyr} table functions,
many of
which are "verbs" that do transformations to one or more tables.
-The package also has mappings of 226 R functions to the corresponding
+The package also has mappings of 229 R functions to the corresponding
functions in the Arrow compute library. These allow you to write code inside
of \code{dplyr} methods that call R functions, including many in packages like
\code{stringr} and \code{lubridate}, and they will get translated to Arrow and
run
@@ -72,7 +72,7 @@ can assume that the function works in Acero just as it does
in R.
Functions can be called either as \code{pkg::fun()} or just \code{fun()}, i.e.
both
\code{str_sub()} and \code{stringr::str_sub()} work.
-In addition to these functions, you can call any of Arrow's 281 compute
+In addition to these functions, you can call any of Arrow's 253 compute
functions directly. Arrow has many functions that don't map to an existing R
function. In other cases where there is an R function mapping, you can still
call the Arrow function directly if you don't want the adaptations that the R
@@ -207,6 +207,9 @@ Valid values are "s", "ms" (default), "us", "ns".
\item \code{\link[dplyr:if_else]{if_else()}}
\item \code{\link[dplyr:context]{n()}}
\item \code{\link[dplyr:n_distinct]{n_distinct()}}
+\item \code{\link[dplyr:recode-and-replace-values]{recode_values()}}:
\code{ptype} argument and \code{unmatched = "error"} not supported
+\item \code{\link[dplyr:recode-and-replace-values]{replace_values()}}
+\item \code{\link[dplyr:case-and-replace-when]{replace_when()}}
\item \code{\link[dplyr:when-any-all]{when_all()}}
\item \code{\link[dplyr:when-any-all]{when_any()}}
}
diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd
index abf6b8fc44..b809a63bcc 100644
--- a/r/man/read_json_arrow.Rd
+++ b/r/man/read_json_arrow.Rd
@@ -54,7 +54,7 @@ If \code{schema} is not provided, Arrow data types are
inferred from the data:
\item JSON numbers convert to \code{\link[=int64]{int64()}}, falling back to
\code{\link[=float64]{float64()}} if a non-integer is encountered.
\item JSON strings of the kind "YYYY-MM-DD" and "YYYY-MM-DD hh:mm:ss" convert
to \code{\link[=timestamp]{timestamp(unit = "s")}},
falling back to \code{\link[=utf8]{utf8()}} if a conversion error occurs.
-\item JSON arrays convert to a \code{\link[vctrs:list_of]{vctrs::list_of()}}
type, and inference proceeds recursively on the JSON arrays' values.
+\item JSON arrays convert to a \code{\link[=list_of]{list_of()}} type, and
inference proceeds recursively on the JSON arrays' values.
\item Nested JSON objects convert to a \code{\link[=struct]{struct()}} type,
and inference proceeds recursively on the JSON objects' values.
}
diff --git a/r/man/schema.Rd b/r/man/schema.Rd
index ff77a05d84..65ab2eea0d 100644
--- a/r/man/schema.Rd
+++ b/r/man/schema.Rd
@@ -7,7 +7,7 @@
schema(...)
}
\arguments{
-\item{...}{\link[vctrs:fields]{fields}, field name/\link[=data-type]{data
type} pairs (or a list of), or object from which to extract
+\item{...}{\link[=field]{fields}, field name/\link[=data-type]{data type}
pairs (or a list of), or object from which to extract
a schema}
}
\description{
diff --git a/r/tests/testthat/test-dplyr-funcs-conditional.R
b/r/tests/testthat/test-dplyr-funcs-conditional.R
index d99843ab9d..f7d5b4d6b9 100644
--- a/r/tests/testthat/test-dplyr-funcs-conditional.R
+++ b/r/tests/testthat/test-dplyr-funcs-conditional.R
@@ -296,8 +296,8 @@ test_that("case_when()", {
)
expect_arrow_eval_error(
case_when(int > 5 ~ 1, .default = c(0, 1)),
- "`.default` must have size 1, not size 2",
- class = "validation_error"
+ "`.default` must be size 1; vectors of length > 1 not supported in Arrow",
+ class = "arrow_not_supported"
)
expect_arrow_eval_error(
@@ -599,3 +599,280 @@ test_that("when_all()", {
class = "arrow_not_supported"
)
})
+
+test_that("replace_when()", {
+ # replaces matching values, keeps original otherwise
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = replace_when(int, int > 5 ~ 100L)) |>
+ collect(),
+ tbl
+ )
+
+ # multiple conditions
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = replace_when(int, int > 7 ~ 100L, int < 3 ~ 0L)) |>
+ collect(),
+ tbl
+ )
+
+ # overlapping conditions - first match wins
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = replace_when(int, int > 3 ~ 100L, int > 5 ~ 200L)) |>
+ collect(),
+ tbl
+ )
+
+ # no formulas returns x unchanged
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = replace_when(int)) |>
+ collect(),
+ tbl
+ )
+
+ # Conditions on LHS of formulas are compacted out
+ condition <- FALSE
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = replace_when(int, if (condition) int > 5 ~ 100L, int < 3
~ 0L)) |>
+ collect(),
+ tbl
+ )
+
+ # validation errors
+ expect_arrow_eval_error(
+ replace_when(int, TRUE),
+ "Each argument to replace_when\\(\\) must be a two-sided formula",
+ class = "validation_error"
+ )
+ expect_arrow_eval_error(
+ replace_when(int, ~100L),
+ "Each argument to replace_when\\(\\) must be a two-sided formula",
+ class = "validation_error"
+ )
+ expect_arrow_eval_error(
+ replace_when(int, 0L ~ 100L),
+ "Left side of each formula in replace_when\\(\\) must be a logical
expression",
+ class = "validation_error"
+ )
+})
+
+test_that("replace_values()", {
+ # formula interface
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = replace_values(chr, "a" ~ "A", "b" ~ "B")) |>
+ collect(),
+ tbl
+ )
+
+ # from/to interface
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = replace_values(chr, from = c("a", "b"), to = c("A",
"B"))) |>
+ collect(),
+ tbl
+ )
+
+ # from/to with list of vectors - multiple values map to single replacement
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = replace_values(chr, from = list(c("a", "b"), "c"), to =
c("AB", "C"))) |>
+ collect(),
+ tbl
+ )
+
+ # unmatched values kept
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = replace_values(chr, "a" ~ "A")) |>
+ collect(),
+ tbl
+ )
+
+ # works with numeric values
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = replace_values(int, 1L ~ 100L, 2L ~ 200L)) |>
+ collect(),
+ tbl
+ )
+
+ # explicit NA matching with formula
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = replace_values(chr, "a" ~ "A", NA ~ "missing")) |>
+ collect(),
+ tbl
+ )
+
+ # explicit NA matching with from/to
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = replace_values(chr, from = c("a", NA), to = c("A",
"missing"))) |>
+ collect(),
+ tbl
+ )
+
+ # multiple values on LHS matches any
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = replace_values(chr, c("a", "b") ~ "AB")) |>
+ collect(),
+ tbl
+ )
+
+ # multiple values on LHS including NA matches any including NA
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = replace_values(chr, c(NA, "a") ~ "matched")) |>
+ collect(),
+ tbl
+ )
+
+ # from/to with list containing NA matches NA too
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = replace_values(chr, from = list(c(NA, "a"), "b"), to =
c("matched", "B"))) |>
+ collect(),
+ tbl
+ )
+
+ # no replacements returns x unchanged
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = replace_values(chr)) |>
+ collect(),
+ tbl
+ )
+
+ # validation errors
+ expect_arrow_eval_error(
+ replace_values(chr, "A"),
+ "Each argument to replace_values\\(\\) must be a two-sided formula",
+ class = "validation_error"
+ )
+ expect_arrow_eval_error(
+ replace_values(chr, ~"A"),
+ "Each argument to replace_values\\(\\) must be a two-sided formula",
+ class = "validation_error"
+ )
+ expect_arrow_eval_error(
+ replace_values(chr, "a" ~ "A", from = "b"),
+ "Can't use both `...` and `from`/`to` in replace_values\\(\\)",
+ class = "validation_error"
+ )
+ expect_arrow_eval_error(
+ replace_values(chr, from = "a"),
+ "`to` must be provided when using `from`",
+ class = "validation_error"
+ )
+})
+
+test_that("recode_values()", {
+ # formula interface with default NA
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = recode_values(chr, "a" ~ "A", "b" ~ "B")) |>
+ collect(),
+ tbl
+ )
+
+ # from/to interface
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = recode_values(chr, from = c("a", "b"), to = c("A",
"B"))) |>
+ collect(),
+ tbl
+ )
+
+ # from/to with list of vectors - multiple values map to single replacement
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = recode_values(chr, from = list(c("a", "b"), "c"), to =
c("AB", "C"))) |>
+ collect(),
+ tbl
+ )
+
+ # custom default
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = recode_values(chr, "a" ~ "A", default = "other")) |>
+ collect(),
+ tbl
+ )
+
+ # works with numeric values
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = recode_values(int, 1L ~ 100L, 2L ~ 200L)) |>
+ collect(),
+ tbl
+ )
+
+ # NA input with default - NA also becomes default
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = recode_values(chr, "a" ~ "A", "b" ~ "B", default =
"other")) |>
+ collect(),
+ tbl
+ )
+
+ # multiple values on LHS matches any
+ compare_dplyr_binding(
+ .input |>
+ mutate(result = recode_values(chr, c("a", "b") ~ "AB", default =
"other")) |>
+ collect(),
+ tbl
+ )
+
+ # validation errors
+ expect_arrow_eval_error(
+ recode_values(chr),
+ "`\\.\\.\\.` can't be empty",
+ class = "validation_error"
+ )
+ expect_arrow_eval_error(
+ recode_values(chr, "A"),
+ "Each argument to recode_values\\(\\) must be a two-sided formula",
+ class = "validation_error"
+ )
+ expect_arrow_eval_error(
+ recode_values(chr, ~"A"),
+ "Each argument to recode_values\\(\\) must be a two-sided formula",
+ class = "validation_error"
+ )
+ expect_arrow_eval_error(
+ recode_values(chr, "a" ~ "A", from = "b"),
+ "Can't use both `...` and `from`/`to` in recode_values\\(\\)",
+ class = "validation_error"
+ )
+ expect_arrow_eval_error(
+ recode_values(chr, from = "a"),
+ "`to` must be provided when using `from`",
+ class = "validation_error"
+ )
+ expect_arrow_eval_error(
+ recode_values(chr, "a" ~ "A", ptype = character()),
+ "`recode_values\\(\\)` with `ptype` specified not supported in Arrow",
+ class = "arrow_not_supported"
+ )
+ expect_arrow_eval_error(
+ recode_values(chr, "a" ~ "A", unmatched = "error"),
+ "`recode_values\\(\\)` with `unmatched` other than \"default\" not
supported in Arrow",
+ class = "arrow_not_supported"
+ )
+ expect_arrow_eval_error(
+ recode_values(chr, "a" ~ "A", unmatched = "wat"),
+ "`recode_values\\(\\)` with `unmatched` other than \"default\" not
supported in Arrow",
+ class = "arrow_not_supported"
+ )
+ expect_arrow_eval_error(
+ recode_values(chr, "a" ~ "A", default = c("x", "y")),
+ "`default` must be size 1; vectors of length > 1 not supported in Arrow",
+ class = "arrow_not_supported"
+ )
+})