felipecrv commented on code in PR #36489:
URL: https://github.com/apache/arrow/pull/36489#discussion_r1253434847


##########
cpp/src/arrow/buffer.h:
##########
@@ -57,18 +57,25 @@ class ARROW_EXPORT Buffer {
   ///
   /// \note The passed memory must be kept alive through some other means
   Buffer(const uint8_t* data, int64_t size)
-      : is_mutable_(false), is_cpu_(true), data_(data), size_(size), 
capacity_(size) {
+      : is_mutable_(false), is_cpu_(true), data_(data), size_(size), 
capacity_(size), device_type_(DeviceType::CPU) {
     SetMemoryManager(default_cpu_memory_manager());
   }
 
   Buffer(const uint8_t* data, int64_t size, std::shared_ptr<MemoryManager> mm,
-         std::shared_ptr<Buffer> parent = NULLPTR)
+         std::shared_ptr<Buffer> parent = NULLPTR, DeviceType device_type = 
DeviceType::UNKNOWN)
       : is_mutable_(false),
         data_(data),
         size_(size),
         capacity_(size),
         parent_(std::move(parent)) {
+    // will set device_type from the memory manager
     SetMemoryManager(std::move(mm));
+    // if a device type is specified, use that instead. for example:
+    // CUDA_HOST. The CudaMemoryManager will set device_type_ to CUDA,
+    // but you can specify CUDA_HOST as the device type to override it.
+    if (device_type != DeviceType::UNKNOWN) {
+      device_type_ = device_type;
+    }

Review Comment:
   Would a DCHECK be useful here to confirm that `device_type_` ended-up being 
compatible with the memory manager?



##########
cpp/src/arrow/c/bridge.cc:
##########
@@ -663,6 +672,115 @@ Status ExportRecordBatch(const RecordBatch& batch, struct 
ArrowArray* out,
   return Status::OK();
 }
 
+//////////////////////////////////////////////////////////////////////////
+// C device arrays
+
+Result<std::pair<DeviceType, int64_t>> validate_device_info(const ArrayData& 
data) {
+  DeviceType device_type = DeviceType::UNKNOWN;
+  int64_t device_id = -1;
+
+  for (const auto& buf : data.buffers) {
+    if (!buf) {
+      // some buffers might be null
+      // for example, the null bitmap is optional if there's no nulls
+      continue;
+    }
+
+    if (device_type == DeviceType::UNKNOWN) {
+      device_type = buf->device_type();
+      device_id = buf->device()->device_id();
+      continue;
+    }
+
+    if (buf->device_type() != device_type) {
+      return Status::Invalid(
+          "exporting device array with buffers on more than one device.");
+    }
+
+    if (buf->device()->device_id() != device_id) {
+      return Status::Invalid(
+          "exporting device array with buffers on multiple device ids.");
+    }
+  }
+
+  // recursively check the children
+  auto info = std::make_pair(device_type, device_id);

Review Comment:
   If `data.buffers` is empty (that's possible on REEs I think), you're still 
on `device_type == UNKNOWN` here.
   
   I think that best way here is to have this auxiliary/recursive function be
   
   `Status validate_device_info(const ArrayData& data, DeviceType *device_type, 
int64_t *device_id);`
   
   so at any point, the values can change from unknown to some actual value.
   
   



##########
cpp/src/arrow/c/bridge.cc:
##########
@@ -663,6 +672,115 @@ Status ExportRecordBatch(const RecordBatch& batch, struct 
ArrowArray* out,
   return Status::OK();
 }
 
+//////////////////////////////////////////////////////////////////////////
+// C device arrays
+
+Result<std::pair<DeviceType, int64_t>> validate_device_info(const ArrayData& 
data) {
+  DeviceType device_type = DeviceType::UNKNOWN;
+  int64_t device_id = -1;
+
+  for (const auto& buf : data.buffers) {
+    if (!buf) {
+      // some buffers might be null
+      // for example, the null bitmap is optional if there's no nulls
+      continue;
+    }
+
+    if (device_type == DeviceType::UNKNOWN) {
+      device_type = buf->device_type();
+      device_id = buf->device()->device_id();
+      continue;
+    }
+
+    if (buf->device_type() != device_type) {
+      return Status::Invalid(
+          "exporting device array with buffers on more than one device.");

Review Comment:
   Errors messages that form a sentence are usually Capitalized.



##########
cpp/src/arrow/buffer.h:
##########
@@ -57,18 +57,25 @@ class ARROW_EXPORT Buffer {
   ///
   /// \note The passed memory must be kept alive through some other means
   Buffer(const uint8_t* data, int64_t size)
-      : is_mutable_(false), is_cpu_(true), data_(data), size_(size), 
capacity_(size) {
+      : is_mutable_(false), is_cpu_(true), data_(data), size_(size), 
capacity_(size), device_type_(DeviceType::CPU) {

Review Comment:
   Do we still need the `is_cpu_` boolean? Is there a situation in which 
`is_cpu_` is `false` and `device_type_ != CPU`?



##########
cpp/src/arrow/c/bridge.cc:
##########
@@ -663,6 +672,115 @@ Status ExportRecordBatch(const RecordBatch& batch, struct 
ArrowArray* out,
   return Status::OK();
 }
 
+//////////////////////////////////////////////////////////////////////////
+// C device arrays
+
+Result<std::pair<DeviceType, int64_t>> validate_device_info(const ArrayData& 
data) {

Review Comment:
   is snake_case (`validate_device_info`) common in this file?



##########
cpp/src/arrow/c/bridge.cc:
##########
@@ -522,6 +522,9 @@ struct ExportedArrayPrivateData : 
PoolAllocationMixin<ExportedArrayPrivateData>
 
   std::shared_ptr<ArrayData> data_;
 
+  ReleaseEventFunc sync_release_;
+  void* sync_event_;

Review Comment:
   Should these two be `= nullptr;` to ensure they don't get implicitly 
initialized with bad data?



##########
cpp/src/arrow/c/bridge.h:
##########
@@ -166,6 +166,139 @@ Result<std::shared_ptr<RecordBatch>> 
ImportRecordBatch(struct ArrowArray* array,
 
 /// @}
 
+/// \defgroup c-data-device-interface Functions for working with the C data 
device
+/// interface.
+///
+/// @{
+
+/// \brief EXPERIMENTAL: Type for freeing a sync event
+///
+/// If synchronization is necessary for accessing the data on a device,
+/// a pointer to an event needs to be passed when exporting the device
+/// array. It's the responsibility of the release function for the array
+/// to release the event.
+using ReleaseEventFunc = void (*)(void*);
+
+/// \brief EXPERIMENTAL: Export C++ Array as an ArrowDeviceArray.
+///
+/// The resulting ArrowDeviceArray struct keeps the array data and buffers 
alive
+/// until its release callback is called by the consumer. All buffers in
+/// the provided array MUST have the same device_type, otherwise an error
+/// will be returned.
+///
+/// If a non-null sync_event is provided, then the sync_release func must also 
be
+/// non-null. If the sync_event is null, then the sync_release parameter is 
ignored.

Review Comment:
   can this be changed from "is ignored" to "is not called"?



##########
cpp/src/arrow/c/bridge.cc:
##########
@@ -1275,7 +1398,19 @@ class ImportedBuffer : public Buffer {
 
 struct ArrayImporter {
   explicit ArrayImporter(const std::shared_ptr<DataType>& type)
-      : type_(type), zero_size_buffer_(std::make_shared<Buffer>(kZeroSizeArea, 
0)) {}
+      : type_(type),
+        zero_size_buffer_(std::make_shared<Buffer>(kZeroSizeArea, 0)),
+        device_type_(DeviceType::CPU) {}
+
+  Status Import(struct ArrowDeviceArray* src, const DeviceMemoryMgr& mapper) {
+    ARROW_ASSIGN_OR_RAISE(memory_mgr_,
+                          mapper.get_manager(src->device_type, 
src->device_id));
+    device_type_ = static_cast<DeviceType>(src->device_type);
+    RETURN_NOT_OK(Import(&src->array));
+    import_->sync_event_ = src->sync_event;
+    memory_mgr_.reset();

Review Comment:
   If not, I would initialize it to `UNKNOWN` in the ctor and set it to 
`UNKNOWN` here as `device_type_` doesn't seem to be used when importing from 
CPU arrays.



##########
cpp/src/arrow/c/bridge.cc:
##########
@@ -1275,7 +1398,19 @@ class ImportedBuffer : public Buffer {
 
 struct ArrayImporter {
   explicit ArrayImporter(const std::shared_ptr<DataType>& type)
-      : type_(type), zero_size_buffer_(std::make_shared<Buffer>(kZeroSizeArea, 
0)) {}
+      : type_(type),
+        zero_size_buffer_(std::make_shared<Buffer>(kZeroSizeArea, 0)),
+        device_type_(DeviceType::CPU) {}
+
+  Status Import(struct ArrowDeviceArray* src, const DeviceMemoryMgr& mapper) {
+    ARROW_ASSIGN_OR_RAISE(memory_mgr_,
+                          mapper.get_manager(src->device_type, 
src->device_id));
+    device_type_ = static_cast<DeviceType>(src->device_type);
+    RETURN_NOT_OK(Import(&src->array));
+    import_->sync_event_ = src->sync_event;
+    memory_mgr_.reset();

Review Comment:
   Should `device_type_` be restored as well?



##########
cpp/src/arrow/c/bridge.h:
##########
@@ -166,6 +166,139 @@ Result<std::shared_ptr<RecordBatch>> 
ImportRecordBatch(struct ArrowArray* array,
 
 /// @}
 
+/// \defgroup c-data-device-interface Functions for working with the C data 
device
+/// interface.
+///
+/// @{
+
+/// \brief EXPERIMENTAL: Type for freeing a sync event
+///
+/// If synchronization is necessary for accessing the data on a device,
+/// a pointer to an event needs to be passed when exporting the device
+/// array. It's the responsibility of the release function for the array
+/// to release the event.
+using ReleaseEventFunc = void (*)(void*);
+
+/// \brief EXPERIMENTAL: Export C++ Array as an ArrowDeviceArray.
+///
+/// The resulting ArrowDeviceArray struct keeps the array data and buffers 
alive
+/// until its release callback is called by the consumer. All buffers in
+/// the provided array MUST have the same device_type, otherwise an error
+/// will be returned.
+///
+/// If a non-null sync_event is provided, then the sync_release func must also 
be
+/// non-null. If the sync_event is null, then the sync_release parameter is 
ignored.
+///
+/// \param[in] array Array object to export
+/// \param[in] sync_event A pointer to an event-like object if necessary for
+/// synchronization, otherwise null. \param[in] sync_release Function pointer 
to release
+/// the sync event \param[out] out C struct to export the array to \param[out] 
out_schema
+/// optional C struct to export the array type to

Review Comment:
   line break needed before \param



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to