paleolimbot commented on code in PR #45951:
URL: https://github.com/apache/arrow/pull/45951#discussion_r2021335064


##########
r/src/altrep.cpp:
##########
@@ -269,13 +273,21 @@ struct AltrepVectorPrimitive : public 
AltrepVectorBase<AltrepVectorPrimitive<sex
     }
 
     // Otherwise we have to materialize and hand the pointer to data2
-    return DATAPTR(Materialize(alt));
+    if constexpr (std::is_same_v<c_type, double>) {
+      return REAL(Materialize(alt));
+    } else {
+      return INTEGER(Materialize(alt));
+    }

Review Comment:
   ```suggestion
       } else if constexpr (std::is_same_v<c_type, int>) {
         return INTEGER(Materialize(alt));
       } else {
         static_assert(false, "ALTREP not implemented for c_type");
       }
   ```
   
   ...maybe safer in case we ever add altraw support?



##########
r/src/altrep.cpp:
##########
@@ -220,7 +220,11 @@ struct AltrepVectorPrimitive : public 
AltrepVectorBase<AltrepVectorPrimitive<sex
       SEXP copy = PROTECT(Rf_allocVector(sexp_type, size));
 
       // copy the data from the array, through Get_region
-      Get_region(alt, 0, size, reinterpret_cast<c_type*>(DATAPTR(copy)));
+      if constexpr (std::is_same_v<c_type, double>) {
+        Get_region(alt, 0, size, reinterpret_cast<double*>(REAL(copy)));

Review Comment:
   ```suggestion
           Get_region(alt, 0, size, REAL(copy));
   ```
   
   (I think this should work)



##########
r/tests/testthat/test-altrep.R:
##########
@@ -170,7 +170,7 @@ test_that("element access methods for int32 ALTREP with 
nulls", {
   expect_identical(test_arrow_altrep_copy_by_region(altrep, 123), original)
   expect_false(test_arrow_altrep_is_materialized(altrep))
 
-  # because there are no nulls, DATAPTR() does not materialize
+  # because there are nulls, DATAPTR() does materialize

Review Comment:
   You're correct here that my earlier comment was simply wrong 🙂 



##########
r/tests/testthat/test-altrep.R:
##########
@@ -265,11 +266,12 @@ test_that("element access methods for character ALTREP 
from large_utf8()", {
   expect_identical(test_arrow_altrep_copy_by_element(altrep), original)
   expect_false(test_arrow_altrep_is_materialized(altrep))
 
-  # DATAPTR() should always materialize for strings
+  # DATAPTR() sdoes not materialize

Review Comment:
   Again, I'm almost positive that DATAPTR() should still materialize (but 
check with somebody who has kept up on the internals here!)



##########
r/src/altrep.cpp:
##########
@@ -892,7 +904,19 @@ struct AltrepVectorString : public 
AltrepVectorBase<AltrepVectorString<Type>> {
     return s;
   }
 
-  static void* Dataptr(SEXP alt, Rboolean writeable) { return 
DATAPTR(Materialize(alt)); }
+  static void* Dataptr(SEXP alt, Rboolean writeable) {
+    // DATAPTR(Materialize(alt)) is disallowed by CRAN, so we need to createt 
the array of
+    // string and place the SEXPs in it ourselves with STRING_ELT()
+    SEXP materialized = Materialize(alt);
+
+    R_xlen_t len = XLENGTH(materialized);
+    void** str_array = (void**)R_alloc(len, sizeof(void*));
+    for (R_xlen_t i = 0; i < len; i++) {
+      str_array[i] = (void*)STRING_ELT(materialized, i);
+    }
+
+    return str_array;

Review Comment:
   ```suggestion
       return const_cast<void*>(DATAPTR_RO(Materialize(alt)));
   ```
   
   (Not sure if this works but it's worth a shot!)



##########
r/src/arrow_types.h:
##########
@@ -173,14 +173,33 @@ template <typename RVector>
 class RBuffer : public MutableBuffer {
  public:
   explicit RBuffer(RVector vec)
-      : MutableBuffer(reinterpret_cast<uint8_t*>(DATAPTR(vec)),
+      : MutableBuffer(reinterpret_cast<uint8_t*>(getDataPointer(vec)),
                       vec.size() * sizeof(typename RVector::value_type),
                       arrow::CPUDevice::memory_manager(gc_memory_pool())),
         vec_(vec) {}
 
  private:
   // vec_ holds the memory
   RVector vec_;
+
+  static void* getDataPointer(RVector& vec) {
+    if (TYPEOF(vec) == LGLSXP) {
+      return LOGICAL(vec);
+    } else if (TYPEOF(vec) == INTSXP) {
+      return INTEGER(vec);
+    } else if (TYPEOF(vec) == REALSXP) {
+      return REAL(vec);
+    } else if (TYPEOF(vec) == CPLXSXP) {
+      return COMPLEX(vec);
+    } else if (TYPEOF(vec) == STRSXP) {
+      // Technically this returns the address to the first element of the 
string vector

Review Comment:
   This should almost certainly error if I'm reading this properly (I don't 
think we should ever be exposing the internal `SEXP**` to Arrow and I don't 
think we ever do?)



##########
r/tests/testthat/test-altrep.R:
##########
@@ -244,11 +244,12 @@ test_that("element access methods for character ALTREP", {
   expect_identical(test_arrow_altrep_copy_by_element(altrep), original)
   expect_false(test_arrow_altrep_is_materialized(altrep))
 
-  # DATAPTR() should always materialize for strings
+  # DATAPTR() does not materialize

Review Comment:
   I believe that a call to DATAPTR() should still materialize?



##########
r/src/altrep.cpp:
##########
@@ -1267,23 +1293,23 @@ sexp test_arrow_altrep_copy_by_dataptr(sexp x) {
 
   if (TYPEOF(x) == INTSXP) {
     cpp11::writable::integers out(Rf_xlength(x));
-    int* ptr = reinterpret_cast<int*>(DATAPTR(x));
+    int* ptr = reinterpret_cast<int*>(INTEGER(x));
     for (R_xlen_t i = 0; i < n; i++) {
       out[i] = ptr[i];
     }
     return out;
   } else if (TYPEOF(x) == REALSXP) {
     cpp11::writable::doubles out(Rf_xlength(x));
-    double* ptr = reinterpret_cast<double*>(DATAPTR(x));
+    double* ptr = reinterpret_cast<double*>(REAL(x));
     for (R_xlen_t i = 0; i < n; i++) {
       out[i] = ptr[i];
     }
     return out;
   } else if (TYPEOF(x) == STRSXP) {
     cpp11::writable::strings out(Rf_xlength(x));
-    SEXP* ptr = reinterpret_cast<SEXP*>(DATAPTR(x));
     for (R_xlen_t i = 0; i < n; i++) {
-      out[i] = ptr[i];
+      SEXP str_elt = reinterpret_cast<SEXP>(STRING_ELT(x, i));
+      out[i] = str_elt;

Review Comment:
   I think you can remove this branch since it quite literally peeks into the 
internals they're trying to make opaque and it's just test code. A more 
real-world application would be to find where r-source does this and make sure 
that method works on our ALTREP. I think, but don't know, that 
`SET_STRING_ELT()` and `match()` will call DATAPTR().



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to