paleolimbot commented on code in PR #45951: URL: https://github.com/apache/arrow/pull/45951#discussion_r2021335064
########## r/src/altrep.cpp: ########## @@ -269,13 +273,21 @@ struct AltrepVectorPrimitive : public AltrepVectorBase<AltrepVectorPrimitive<sex } // Otherwise we have to materialize and hand the pointer to data2 - return DATAPTR(Materialize(alt)); + if constexpr (std::is_same_v<c_type, double>) { + return REAL(Materialize(alt)); + } else { + return INTEGER(Materialize(alt)); + } Review Comment: ```suggestion } else if constexpr (std::is_same_v<c_type, int>) { return INTEGER(Materialize(alt)); } else { static_assert(false, "ALTREP not implemented for c_type"); } ``` ...maybe safer in case we ever add altraw support? ########## r/src/altrep.cpp: ########## @@ -220,7 +220,11 @@ struct AltrepVectorPrimitive : public AltrepVectorBase<AltrepVectorPrimitive<sex SEXP copy = PROTECT(Rf_allocVector(sexp_type, size)); // copy the data from the array, through Get_region - Get_region(alt, 0, size, reinterpret_cast<c_type*>(DATAPTR(copy))); + if constexpr (std::is_same_v<c_type, double>) { + Get_region(alt, 0, size, reinterpret_cast<double*>(REAL(copy))); Review Comment: ```suggestion Get_region(alt, 0, size, REAL(copy)); ``` (I think this should work) ########## r/tests/testthat/test-altrep.R: ########## @@ -170,7 +170,7 @@ test_that("element access methods for int32 ALTREP with nulls", { expect_identical(test_arrow_altrep_copy_by_region(altrep, 123), original) expect_false(test_arrow_altrep_is_materialized(altrep)) - # because there are no nulls, DATAPTR() does not materialize + # because there are nulls, DATAPTR() does materialize Review Comment: You're correct here that my earlier comment was simply wrong 🙂 ########## r/tests/testthat/test-altrep.R: ########## @@ -265,11 +266,12 @@ test_that("element access methods for character ALTREP from large_utf8()", { expect_identical(test_arrow_altrep_copy_by_element(altrep), original) expect_false(test_arrow_altrep_is_materialized(altrep)) - # DATAPTR() should always materialize for strings + # DATAPTR() sdoes not materialize Review Comment: Again, I'm almost positive that DATAPTR() should still materialize (but check with somebody who has kept up on the internals here!) ########## r/src/altrep.cpp: ########## @@ -892,7 +904,19 @@ struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> { return s; } - static void* Dataptr(SEXP alt, Rboolean writeable) { return DATAPTR(Materialize(alt)); } + static void* Dataptr(SEXP alt, Rboolean writeable) { + // DATAPTR(Materialize(alt)) is disallowed by CRAN, so we need to createt the array of + // string and place the SEXPs in it ourselves with STRING_ELT() + SEXP materialized = Materialize(alt); + + R_xlen_t len = XLENGTH(materialized); + void** str_array = (void**)R_alloc(len, sizeof(void*)); + for (R_xlen_t i = 0; i < len; i++) { + str_array[i] = (void*)STRING_ELT(materialized, i); + } + + return str_array; Review Comment: ```suggestion return const_cast<void*>(DATAPTR_RO(Materialize(alt))); ``` (Not sure if this works but it's worth a shot!) ########## r/src/arrow_types.h: ########## @@ -173,14 +173,33 @@ template <typename RVector> class RBuffer : public MutableBuffer { public: explicit RBuffer(RVector vec) - : MutableBuffer(reinterpret_cast<uint8_t*>(DATAPTR(vec)), + : MutableBuffer(reinterpret_cast<uint8_t*>(getDataPointer(vec)), vec.size() * sizeof(typename RVector::value_type), arrow::CPUDevice::memory_manager(gc_memory_pool())), vec_(vec) {} private: // vec_ holds the memory RVector vec_; + + static void* getDataPointer(RVector& vec) { + if (TYPEOF(vec) == LGLSXP) { + return LOGICAL(vec); + } else if (TYPEOF(vec) == INTSXP) { + return INTEGER(vec); + } else if (TYPEOF(vec) == REALSXP) { + return REAL(vec); + } else if (TYPEOF(vec) == CPLXSXP) { + return COMPLEX(vec); + } else if (TYPEOF(vec) == STRSXP) { + // Technically this returns the address to the first element of the string vector Review Comment: This should almost certainly error if I'm reading this properly (I don't think we should ever be exposing the internal `SEXP**` to Arrow and I don't think we ever do?) ########## r/tests/testthat/test-altrep.R: ########## @@ -244,11 +244,12 @@ test_that("element access methods for character ALTREP", { expect_identical(test_arrow_altrep_copy_by_element(altrep), original) expect_false(test_arrow_altrep_is_materialized(altrep)) - # DATAPTR() should always materialize for strings + # DATAPTR() does not materialize Review Comment: I believe that a call to DATAPTR() should still materialize? ########## r/src/altrep.cpp: ########## @@ -1267,23 +1293,23 @@ sexp test_arrow_altrep_copy_by_dataptr(sexp x) { if (TYPEOF(x) == INTSXP) { cpp11::writable::integers out(Rf_xlength(x)); - int* ptr = reinterpret_cast<int*>(DATAPTR(x)); + int* ptr = reinterpret_cast<int*>(INTEGER(x)); for (R_xlen_t i = 0; i < n; i++) { out[i] = ptr[i]; } return out; } else if (TYPEOF(x) == REALSXP) { cpp11::writable::doubles out(Rf_xlength(x)); - double* ptr = reinterpret_cast<double*>(DATAPTR(x)); + double* ptr = reinterpret_cast<double*>(REAL(x)); for (R_xlen_t i = 0; i < n; i++) { out[i] = ptr[i]; } return out; } else if (TYPEOF(x) == STRSXP) { cpp11::writable::strings out(Rf_xlength(x)); - SEXP* ptr = reinterpret_cast<SEXP*>(DATAPTR(x)); for (R_xlen_t i = 0; i < n; i++) { - out[i] = ptr[i]; + SEXP str_elt = reinterpret_cast<SEXP>(STRING_ELT(x, i)); + out[i] = str_elt; Review Comment: I think you can remove this branch since it quite literally peeks into the internals they're trying to make opaque and it's just test code. A more real-world application would be to find where r-source does this and make sure that method works on our ALTREP. I think, but don't know, that `SET_STRING_ELT()` and `match()` will call DATAPTR(). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org