romainfrancois commented on a change in pull request #10730:
URL: https://github.com/apache/arrow/pull/10730#discussion_r671330603
##########
File path: r/src/altrep.cpp
##########
@@ -44,25 +44,68 @@ extern "C" {
#endif
#include <arrow/array.h>
+#include <arrow/util/bitmap_reader.h>
+
+#include "./r_task_group.h"
namespace arrow {
namespace r {
+template <typename T>
+T na_sentinel();
+
+template <>
+inline double na_sentinel<double>() {
+ return NA_REAL;
+}
+
+template <>
+inline int na_sentinel<int>() {
+ return NA_INTEGER;
+}
+
+template <typename T>
+void UseSentinel(const std::shared_ptr<Array>& array) {
+ auto n = array->length();
+ auto null_count = array->null_count();
+ internal::BitmapReader bitmap_reader(array->null_bitmap()->data(),
array->offset(), n);
+
+ auto* data = array->data()->GetMutableValues<T>(1);
+
+ for (R_xlen_t i = 0, k = 0; k < null_count; i++, bitmap_reader.Next()) {
+ if (bitmap_reader.IsNotSet()) {
+ k++;
+ data[i] = na_sentinel<T>();
+ }
+ }
+}
+
template <int sexp_type>
-struct ArrayNoNull {
+struct AltrepVector {
using data_type = typename std::conditional<sexp_type == INTSXP, int,
double>::type;
static void DeleteArray(std::shared_ptr<Array>* ptr) { delete ptr; }
using Pointer = cpp11::external_pointer<std::shared_ptr<Array>, DeleteArray>;
- // altrep object around an Array with no nulls
+ // altrep object around an Array
// data1: an external pointer to a shared pointer to the Array
// data2: not used
- static SEXP Make(R_altrep_class_t class_t, const std::shared_ptr<Array>&
array) {
+ static SEXP Make(R_altrep_class_t class_t, const std::shared_ptr<Array>&
array,
+ RTasks& tasks) {
// we don't need the whole r6 object, just an external pointer
- // that retain the array
+ // that retain the Array
Pointer xp(new std::shared_ptr<Array>(array));
+ // we only get here if the Array data buffer is mutable
+ // UseSentinel() puts the R sentinel where the data is null
+ auto null_count = array->null_count();
+ if (null_count > 0) {
Review comment:
maybe, but once we're in Dataptr/Getregion we no longer have the
possibility to do this in a task.
Here, we always do it (although we might not need to), but in a task (so in
parallel).
If we did it in Dataptr/Getregion we would have to bookkeep if it was done
already and if not do it serially.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]