This is an automated email from the ASF dual-hosted git repository.
jonkeane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 187197c369 GH-43349: [R] Fix altrep string columns from readr (#43351)
187197c369 is described below
commit 187197c369058f7d1377c1b161c469a9e4542caf
Author: Jonathan Keane <[email protected]>
AuthorDate: Sat Jul 27 11:53:05 2024 -0500
GH-43349: [R] Fix altrep string columns from readr (#43351)
### Rationale for this change
To resolve the reverse dependency issue with `parquetize`
### What changes are included in this PR?
One step towards resolving the issue
### Are these changes tested?
yes
### Are there any user-facing changes?
no
* GitHub Issue: #43349
Authored-by: Jonathan Keane <[email protected]>
Signed-off-by: Jonathan Keane <[email protected]>
---
r/src/arrow_cpp11.h | 11 ++++++++++-
r/tests/testthat/test-csv.R | 17 +++++++++++++++++
2 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/r/src/arrow_cpp11.h b/r/src/arrow_cpp11.h
index b2ed66b83c..073b577d63 100644
--- a/r/src/arrow_cpp11.h
+++ b/r/src/arrow_cpp11.h
@@ -138,7 +138,13 @@ inline R_xlen_t r_string_size(SEXP s) {
} // namespace unsafe
inline SEXP utf8_strings(SEXP x) {
- return cpp11::unwind_protect([x] {
+ return cpp11::unwind_protect([&] {
+ // ensure that x is not actually altrep first this also ensures that
+ // x is not altrep even after it is materialized
+ bool was_altrep = ALTREP(x);
+ if (was_altrep) {
+ x = PROTECT(Rf_duplicate(x));
+ }
R_xlen_t n = XLENGTH(x);
// if `x` is an altrep of some sort, this will
@@ -152,6 +158,9 @@ inline SEXP utf8_strings(SEXP x) {
SET_STRING_ELT(x, i, Rf_mkCharCE(Rf_translateCharUTF8(s), CE_UTF8));
}
}
+ if (was_altrep) {
+ UNPROTECT(1);
+ }
return x;
});
}
diff --git a/r/tests/testthat/test-csv.R b/r/tests/testthat/test-csv.R
index 36f1f229a6..a6291ebda0 100644
--- a/r/tests/testthat/test-csv.R
+++ b/r/tests/testthat/test-csv.R
@@ -738,5 +738,22 @@ test_that("read_csv2_arrow correctly parses comma
decimals", {
tf <- tempfile()
writeLines("x;y\n1,2;c", con = tf)
expect_equal(read_csv2_arrow(tf), tibble(x = 1.2, y = "c"))
+})
+
+test_that("altrep columns can roundtrip to table", {
+ tf <- tempfile()
+ on.exit(unlink(tf))
+ write.csv(tbl, tf, row.names = FALSE)
+
+ # read in, some columns will be altrep by default
+ new_df <- read_csv_arrow(tf)
+ expect_equal(tbl, as_tibble(arrow_table(new_df)))
+
+ # but also if we materialize the vector
+ # this could also be accomplished with printing
+ new_df <- read_csv_arrow(tf)
+ test_arrow_altrep_force_materialize(new_df$chr)
+ # we should still be able to turn this into a table
+ expect_equal(tbl, as_tibble(arrow_table(new_df)))
})