bkietz commented on a change in pull request #10730:
URL: https://github.com/apache/arrow/pull/10730#discussion_r671149781
##########
File path: r/src/altrep.cpp
##########
@@ -44,25 +44,68 @@ extern "C" {
#endif
#include <arrow/array.h>
+#include <arrow/util/bitmap_reader.h>
+
+#include "./r_task_group.h"
namespace arrow {
namespace r {
+template <typename T>
+T na_sentinel();
+
+template <>
+inline double na_sentinel<double>() {
+ return NA_REAL;
+}
+
+template <>
+inline int na_sentinel<int>() {
+ return NA_INTEGER;
+}
+
+template <typename T>
+void UseSentinel(const std::shared_ptr<Array>& array) {
+ auto n = array->length();
+ auto null_count = array->null_count();
+ internal::BitmapReader bitmap_reader(array->null_bitmap()->data(),
array->offset(), n);
+
+ auto* data = array->data()->GetMutableValues<T>(1);
+
+ for (R_xlen_t i = 0, k = 0; k < null_count; i++, bitmap_reader.Next()) {
+ if (bitmap_reader.IsNotSet()) {
+ k++;
+ data[i] = na_sentinel<T>();
+ }
+ }
+}
+
template <int sexp_type>
-struct ArrayNoNull {
+struct AltrepVector {
using data_type = typename std::conditional<sexp_type == INTSXP, int,
double>::type;
Review comment:
```suggestion
using data_type = typename std::conditional<sexp_type == REALSXP, double,
int>::type;
```
(Speculative support for logical)
##########
File path: r/src/array_to_vector.cpp
##########
@@ -69,13 +69,25 @@ class Converter {
// special case when there is only one array
if (chunked_array_->num_chunks() == 1) {
const auto& array = chunked_array_->chunk(0);
- if (arrow::r::GetBoolOption("arrow.use_altrep", true) && array->length()
> 0 &&
- array->null_count() == 0) {
+ // using altrep if
+ // - the arrow.use_altrep is set to TRUE or unset
+ // - the array has at least one element
+ // - either it has no nulls or the data is mutable
+ if (arrow::r::GetBoolOption("arrow.use_altrep", true) && array->length()
> 0) {
switch (array->type()->id()) {
case arrow::Type::DOUBLE:
- return arrow::r::MakeDoubleArrayNoNull(array);
+ if (array->null_count() == 0 ||
array->data()->buffers[1]->is_mutable()) {
Review comment:
It's not safe to do this if anyone else might mutate this buffer. We
don't have a completely safe way to check uniqueness, either. As a stop gap:
```suggestion
if (array->null_count() == 0 || array->data().use_count() == 1 &&
array->data()->buffers[1].use_count() == 1 &&
array->data()->buffers[1]->is_mutable()) {
```
##########
File path: r/src/altrep.cpp
##########
@@ -44,25 +44,68 @@ extern "C" {
#endif
#include <arrow/array.h>
+#include <arrow/util/bitmap_reader.h>
+
+#include "./r_task_group.h"
namespace arrow {
namespace r {
+template <typename T>
+T na_sentinel();
+
+template <>
+inline double na_sentinel<double>() {
+ return NA_REAL;
+}
+
+template <>
+inline int na_sentinel<int>() {
+ return NA_INTEGER;
+}
+
+template <typename T>
+void UseSentinel(const std::shared_ptr<Array>& array) {
+ auto n = array->length();
+ auto null_count = array->null_count();
+ internal::BitmapReader bitmap_reader(array->null_bitmap()->data(),
array->offset(), n);
+
+ auto* data = array->data()->GetMutableValues<T>(1);
+
+ for (R_xlen_t i = 0, k = 0; k < null_count; i++, bitmap_reader.Next()) {
+ if (bitmap_reader.IsNotSet()) {
+ k++;
+ data[i] = na_sentinel<T>();
+ }
+ }
+}
+
template <int sexp_type>
-struct ArrayNoNull {
+struct AltrepVector {
using data_type = typename std::conditional<sexp_type == INTSXP, int,
double>::type;
static void DeleteArray(std::shared_ptr<Array>* ptr) { delete ptr; }
using Pointer = cpp11::external_pointer<std::shared_ptr<Array>, DeleteArray>;
- // altrep object around an Array with no nulls
+ // altrep object around an Array
// data1: an external pointer to a shared pointer to the Array
// data2: not used
- static SEXP Make(R_altrep_class_t class_t, const std::shared_ptr<Array>&
array) {
+ static SEXP Make(R_altrep_class_t class_t, const std::shared_ptr<Array>&
array,
+ RTasks& tasks) {
// we don't need the whole r6 object, just an external pointer
- // that retain the array
+ // that retain the Array
Pointer xp(new std::shared_ptr<Array>(array));
+ // we only get here if the Array data buffer is mutable
+ // UseSentinel() puts the R sentinel where the data is null
+ auto null_count = array->null_count();
+ if (null_count > 0) {
+ tasks.Append(true, [array]() {
+ UseSentinel<data_type>(array);
Review comment:
In the case of a few long arrays, it'd be more performant to divide this
work into L2 sized chunks
##########
File path: r/src/altrep.cpp
##########
@@ -44,25 +44,68 @@ extern "C" {
#endif
#include <arrow/array.h>
+#include <arrow/util/bitmap_reader.h>
+
+#include "./r_task_group.h"
namespace arrow {
namespace r {
+template <typename T>
+T na_sentinel();
+
+template <>
+inline double na_sentinel<double>() {
+ return NA_REAL;
+}
+
+template <>
+inline int na_sentinel<int>() {
+ return NA_INTEGER;
+}
+
+template <typename T>
+void UseSentinel(const std::shared_ptr<Array>& array) {
+ auto n = array->length();
+ auto null_count = array->null_count();
+ internal::BitmapReader bitmap_reader(array->null_bitmap()->data(),
array->offset(), n);
+
+ auto* data = array->data()->GetMutableValues<T>(1);
+
+ for (R_xlen_t i = 0, k = 0; k < null_count; i++, bitmap_reader.Next()) {
+ if (bitmap_reader.IsNotSet()) {
+ k++;
+ data[i] = na_sentinel<T>();
+ }
+ }
+}
+
template <int sexp_type>
-struct ArrayNoNull {
+struct AltrepVector {
using data_type = typename std::conditional<sexp_type == INTSXP, int,
double>::type;
static void DeleteArray(std::shared_ptr<Array>* ptr) { delete ptr; }
using Pointer = cpp11::external_pointer<std::shared_ptr<Array>, DeleteArray>;
- // altrep object around an Array with no nulls
+ // altrep object around an Array
// data1: an external pointer to a shared pointer to the Array
// data2: not used
- static SEXP Make(R_altrep_class_t class_t, const std::shared_ptr<Array>&
array) {
+ static SEXP Make(R_altrep_class_t class_t, const std::shared_ptr<Array>&
array,
+ RTasks& tasks) {
// we don't need the whole r6 object, just an external pointer
- // that retain the array
+ // that retain the Array
Pointer xp(new std::shared_ptr<Array>(array));
+ // we only get here if the Array data buffer is mutable
+ // UseSentinel() puts the R sentinel where the data is null
+ auto null_count = array->null_count();
+ if (null_count > 0) {
Review comment:
Would it make sense to do this lazily in dataptr/getregion?
##########
File path: r/src/altrep.cpp
##########
@@ -104,63 +148,144 @@ struct ArrayNoNull {
return const_cast<void*>(Dataptr_or_null(vec));
}
- // by definition, there are no NA
- static int No_NA(SEXP vec) { return 1; }
+ static int No_NA(SEXP vec) { return Get(vec)->null_count() == 0; }
static void Init(R_altrep_class_t class_t, DllInfo* dll) {
// altrep
- R_set_altrep_Length_method(class_t, ArrayNoNull::Length);
- R_set_altrep_Inspect_method(class_t, ArrayNoNull::Inspect);
- R_set_altrep_Duplicate_method(class_t, ArrayNoNull::Duplicate);
+ R_set_altrep_Length_method(class_t, AltrepVector::Length);
+ R_set_altrep_Inspect_method(class_t, AltrepVector::Inspect);
+ R_set_altrep_Duplicate_method(class_t, AltrepVector::Duplicate);
// altvec
- R_set_altvec_Dataptr_method(class_t, ArrayNoNull::Dataptr);
- R_set_altvec_Dataptr_or_null_method(class_t, ArrayNoNull::Dataptr_or_null);
+ R_set_altvec_Dataptr_method(class_t, AltrepVector::Dataptr);
+ R_set_altvec_Dataptr_or_null_method(class_t,
AltrepVector::Dataptr_or_null);
}
};
-struct DoubleArrayNoNull {
+struct AltrepVectorDouble {
+ using Base = AltrepVector<REALSXP>;
static R_altrep_class_t class_t;
+ static SEXP Sum(SEXP x, Rboolean narm) {
+ const auto& array = Base::Get(x);
+
+ if (narm == FALSE && array->null_count()) {
+ return Rf_ScalarReal(NA_REAL);
+ }
+
+ // TODO: instead of returning NULL, use arrow::compute() something
+ // to calculate the sum of `array`
+ return NULL;
+ }
+
+ static SEXP Min(SEXP x, Rboolean narm) {
+ const auto& array = Base::Get(x);
+
+ if (narm == FALSE && array->null_count()) {
+ return Rf_ScalarReal(NA_REAL);
+ }
+
+ // TODO: instead of returning NULL, use arrow::compute() something
+ // to calculate the min of `array`
+ return NULL;
+ }
+
+ static SEXP Max(SEXP x, Rboolean narm) {
+ const auto& array = Base::Get(x);
+
+ if (narm == FALSE && array->null_count()) {
+ return Rf_ScalarReal(NA_REAL);
+ }
+
+ // TODO: instead of returning NULL, use arrow::compute() something
+ // to calculate the max of `array`
+ return NULL;
+ }
+
static void Init(DllInfo* dll) {
- class_t = R_make_altreal_class("array_nonull_dbl_vector", "arrow", dll);
- ArrayNoNull<REALSXP>::Init(class_t, dll);
- R_set_altreal_No_NA_method(class_t, ArrayNoNull<REALSXP>::No_NA);
+ class_t = R_make_altreal_class("array_dbl_vector", "arrow", dll);
+ AltrepVector<REALSXP>::Init(class_t, dll);
+
+ R_set_altreal_No_NA_method(class_t, AltrepVector<REALSXP>::No_NA);
+
+ R_set_altreal_Sum_method(class_t, Sum);
+ R_set_altreal_Min_method(class_t, Min);
+ R_set_altreal_Max_method(class_t, Max);
}
- static SEXP Make(const std::shared_ptr<Array>& array) {
- return ArrayNoNull<REALSXP>::Make(class_t, array);
+ static SEXP Make(const std::shared_ptr<Array>& array, RTasks& tasks) {
+ return AltrepVector<REALSXP>::Make(class_t, array, tasks);
}
};
-struct Int32ArrayNoNull {
+struct AltrepVectorInt32 {
+ using Base = AltrepVector<INTSXP>;
static R_altrep_class_t class_t;
+ static SEXP Sum(SEXP x, Rboolean narm) {
+ const auto& array = Base::Get(x);
+
+ if (narm == FALSE && array->null_count()) {
+ return Rf_ScalarInteger(NA_INTEGER);
+ }
+
+ // TODO: instead of returning NULL, use arrow::compute() something
+ // to calculate the sum of `array`
+ return NULL;
+ }
+
+ static SEXP Min(SEXP x, Rboolean narm) {
+ const auto& array = Base::Get(x);
+
+ if (narm == FALSE && array->null_count()) {
+ return Rf_ScalarInteger(NA_INTEGER);
+ }
+
+ // TODO: instead of returning NULL, use arrow::compute() something
+ // to calculate the min of `array`
+ return NULL;
+ }
+
+ static SEXP Max(SEXP x, Rboolean narm) {
+ const auto& array = Base::Get(x);
+
+ if (narm == FALSE && array->null_count()) {
+ return Rf_ScalarInteger(NA_INTEGER);
+ }
+
+ // TODO: instead of returning NULL, use arrow::compute() something
+ // to calculate the max of `array`
+ return NULL;
+ }
static void Init(DllInfo* dll) {
- class_t = R_make_altinteger_class("array_nonull_int_vector", "arrow", dll);
- ArrayNoNull<INTSXP>::Init(class_t, dll);
- R_set_altinteger_No_NA_method(class_t, ArrayNoNull<INTSXP>::No_NA);
+ class_t = R_make_altinteger_class("array_int_vector", "arrow", dll);
+ AltrepVector<INTSXP>::Init(class_t, dll);
Review comment:
Please remove Base or use it consistently
##########
File path: r/src/altrep.cpp
##########
@@ -44,25 +44,68 @@ extern "C" {
#endif
#include <arrow/array.h>
+#include <arrow/util/bitmap_reader.h>
+
+#include "./r_task_group.h"
namespace arrow {
namespace r {
+template <typename T>
+T na_sentinel();
Review comment:
Why can't we use `cpp11::na`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]