This is an automated email from the ASF dual-hosted git repository.

jonkeane pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 187197c369 GH-43349: [R] Fix altrep string columns from readr (#43351)
187197c369 is described below

commit 187197c369058f7d1377c1b161c469a9e4542caf
Author: Jonathan Keane <[email protected]>
AuthorDate: Sat Jul 27 11:53:05 2024 -0500

    GH-43349: [R] Fix altrep string columns from readr (#43351)
    
    ### Rationale for this change
    
    To resolve the reverse dependency issue with `parquetize`
    
    ### What changes are included in this PR?
    
    One step towards resolving the issue
    
    ### Are these changes tested?
    
    yes
    
    ### Are there any user-facing changes?
    
    no
    * GitHub Issue: #43349
    
    Authored-by: Jonathan Keane <[email protected]>
    Signed-off-by: Jonathan Keane <[email protected]>
---
 r/src/arrow_cpp11.h         | 11 ++++++++++-
 r/tests/testthat/test-csv.R | 17 +++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/r/src/arrow_cpp11.h b/r/src/arrow_cpp11.h
index b2ed66b83c..073b577d63 100644
--- a/r/src/arrow_cpp11.h
+++ b/r/src/arrow_cpp11.h
@@ -138,7 +138,13 @@ inline R_xlen_t r_string_size(SEXP s) {
 }  // namespace unsafe
 
 inline SEXP utf8_strings(SEXP x) {
-  return cpp11::unwind_protect([x] {
+  return cpp11::unwind_protect([&] {
+    // ensure that x is not actually altrep first this also ensures that
+    // x is not altrep even after it is materialized
+    bool was_altrep = ALTREP(x);
+    if (was_altrep) {
+      x = PROTECT(Rf_duplicate(x));
+    }
     R_xlen_t n = XLENGTH(x);
 
     // if `x` is an altrep of some sort, this will
@@ -152,6 +158,9 @@ inline SEXP utf8_strings(SEXP x) {
         SET_STRING_ELT(x, i, Rf_mkCharCE(Rf_translateCharUTF8(s), CE_UTF8));
       }
     }
+    if (was_altrep) {
+      UNPROTECT(1);
+    }
     return x;
   });
 }
diff --git a/r/tests/testthat/test-csv.R b/r/tests/testthat/test-csv.R
index 36f1f229a6..a6291ebda0 100644
--- a/r/tests/testthat/test-csv.R
+++ b/r/tests/testthat/test-csv.R
@@ -738,5 +738,22 @@ test_that("read_csv2_arrow correctly parses comma 
decimals", {
   tf <- tempfile()
   writeLines("x;y\n1,2;c", con = tf)
   expect_equal(read_csv2_arrow(tf), tibble(x = 1.2, y = "c"))
+})
+
+test_that("altrep columns can roundtrip to table", {
+  tf <- tempfile()
+  on.exit(unlink(tf))
+  write.csv(tbl, tf, row.names = FALSE)
+
+  # read in, some columns will be altrep by default
+  new_df <- read_csv_arrow(tf)
+  expect_equal(tbl, as_tibble(arrow_table(new_df)))
+
+  # but also if we materialize the vector
+  # this could also be accomplished with printing
+  new_df <- read_csv_arrow(tf)
+  test_arrow_altrep_force_materialize(new_df$chr)
 
+  # we should still be able to turn this into a table
+  expect_equal(tbl, as_tibble(arrow_table(new_df)))
 })

Reply via email to