paleolimbot commented on code in PR #45951:
URL: https://github.com/apache/arrow/pull/45951#discussion_r2023917967


##########
r/src/altrep.cpp:
##########
@@ -1267,23 +1285,23 @@ sexp test_arrow_altrep_copy_by_dataptr(sexp x) {
 
   if (TYPEOF(x) == INTSXP) {
     cpp11::writable::integers out(Rf_xlength(x));
-    int* ptr = reinterpret_cast<int*>(DATAPTR(x));
+    int* ptr = reinterpret_cast<int*>(INTEGER(x));

Review Comment:
   ```suggestion
       int* ptr = INTEGER(x);
   ```
   
   (and below as well)



##########
r/src/altrep.cpp:
##########
@@ -1267,23 +1293,23 @@ sexp test_arrow_altrep_copy_by_dataptr(sexp x) {
 
   if (TYPEOF(x) == INTSXP) {
     cpp11::writable::integers out(Rf_xlength(x));
-    int* ptr = reinterpret_cast<int*>(DATAPTR(x));
+    int* ptr = reinterpret_cast<int*>(INTEGER(x));
     for (R_xlen_t i = 0; i < n; i++) {
       out[i] = ptr[i];
     }
     return out;
   } else if (TYPEOF(x) == REALSXP) {
     cpp11::writable::doubles out(Rf_xlength(x));
-    double* ptr = reinterpret_cast<double*>(DATAPTR(x));
+    double* ptr = reinterpret_cast<double*>(REAL(x));
     for (R_xlen_t i = 0; i < n; i++) {
       out[i] = ptr[i];
     }
     return out;
   } else if (TYPEOF(x) == STRSXP) {
     cpp11::writable::strings out(Rf_xlength(x));
-    SEXP* ptr = reinterpret_cast<SEXP*>(DATAPTR(x));
     for (R_xlen_t i = 0; i < n; i++) {
-      out[i] = ptr[i];
+      SEXP str_elt = reinterpret_cast<SEXP>(STRING_ELT(x, i));
+      out[i] = str_elt;

Review Comment:
   Yes, you'd have to remove the test that depends on it, too. This test was 
added in a follow up to https://github.com/apache/arrow/pull/13415 which fixed 
a case of multiple materializations when R's internals called DATAPTR(). As 
written, this test no longer calls DATAPTR() for a character vector, so it may 
be less confusing to remove it.
   
   In R  some things that might call DATAPTR are `altrep_vctr[1] = "something 
else"`, `unique()`, and `match(altrep_vctr, c("one", "two", "three"))` (one 
could check with the debugger or a print to see if it's actually getting 
called).



##########
r/tests/testthat/test-altrep.R:
##########
@@ -244,11 +244,12 @@ test_that("element access methods for character ALTREP", {
   expect_identical(test_arrow_altrep_copy_by_element(altrep), original)
   expect_false(test_arrow_altrep_is_materialized(altrep))
 
-  # DATAPTR() should always materialize for strings
+  # DATAPTR() does not materialize

Review Comment:
   Yes, this was the crux of https://github.com/apache/arrow/pull/13415 (we 
were materializing the entire array multiple times when R's internals called 
DATAPTR()).



##########
r/src/altrep.cpp:
##########
@@ -220,7 +220,11 @@ struct AltrepVectorPrimitive : public 
AltrepVectorBase<AltrepVectorPrimitive<sex
       SEXP copy = PROTECT(Rf_allocVector(sexp_type, size));
 
       // copy the data from the array, through Get_region
-      Get_region(alt, 0, size, reinterpret_cast<c_type*>(DATAPTR(copy)));
+      if constexpr (std::is_same_v<c_type, double>) {
+        Get_region(alt, 0, size, REAL(copy));
+      } else {

Review Comment:
   Here and one other place would benefit from the `if constexpr 
(std::is_same_v<c_type, int>)` suggestion as well



##########
r/src/arrow_types.h:
##########
@@ -173,14 +173,33 @@ template <typename RVector>
 class RBuffer : public MutableBuffer {
  public:
   explicit RBuffer(RVector vec)
-      : MutableBuffer(reinterpret_cast<uint8_t*>(DATAPTR(vec)),
+      : MutableBuffer(reinterpret_cast<uint8_t*>(getDataPointer(vec)),
                       vec.size() * sizeof(typename RVector::value_type),
                       arrow::CPUDevice::memory_manager(gc_memory_pool())),
         vec_(vec) {}
 
  private:
   // vec_ holds the memory
   RVector vec_;
+
+  static void* getDataPointer(RVector& vec) {
+    if (TYPEOF(vec) == LGLSXP) {
+      return LOGICAL(vec);
+    } else if (TYPEOF(vec) == INTSXP) {
+      return INTEGER(vec);
+    } else if (TYPEOF(vec) == REALSXP) {
+      return REAL(vec);
+    } else if (TYPEOF(vec) == CPLXSXP) {
+      return COMPLEX(vec);
+    } else if (TYPEOF(vec) == STRSXP) {
+      // Technically this returns the address to the first element of the 
string vector

Review Comment:
   I think erroring is a better option here (I believe exposing the bytes of a 
STRSXP's data buffer would be a programming error on our end).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to