This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new c651e84a fix: Ensure we don't call cuMemAlloc with 0 bytesize (#534)
c651e84a is described below

commit c651e84a38f5766545f6cedf1786cc62d5710b13
Author: Ashwin Srinath <[email protected]>
AuthorDate: Thu Jun 20 17:11:07 2024 -0400

    fix: Ensure we don't call cuMemAlloc with 0 bytesize (#534)
    
    According to the CUDA docs for
    
[cuMemAlloc](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1gb82d2a09844a58dd9e744dc31e8aa467):
    
    > If bytesize is 0,
    
[cuMemAlloc()](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__MEM.html#group__CUDA__MEM_1gb82d2a09844a58dd9e744dc31e8aa467)
    returns
    
[CUDA_ERROR_INVALID_VALUE](https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TYPES.html#group__CUDA__TYPES_1ggc6c391505e117393cc2558fff6bfc2e990696c86fcee1f536a1ec7d25867feeb).
    
    We end up calling `cuMemAlloc()` with `0` bytesize when allocating
    device buffers with no null mask. Thus, the following change was causing
    `nanoarrow_device_cuda_test` to fail:
    
    ```diff
    @@ -207,7 +207,8 @@ TEST_P(StringTypeParameterizedTestFixture, 
ArrowDeviceCudaArrayViewString) {
       ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
       ASSERT_EQ(ArrowArrayAppendString(&array, ArrowCharView("abc")), 
NANOARROW_OK);
       ASSERT_EQ(ArrowArrayAppendString(&array, ArrowCharView("defg")), 
NANOARROW_OK);
    -  ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK);
    +  ASSERT_EQ(ArrowArrayAppendString(&array, ArrowCharView("defg")), 
NANOARROW_OK);
    +  // ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK);
       ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), 
NANOARROW_OK);
    
       ASSERT_EQ(ArrowDeviceArrayInit(cpu, &device_array, &array, nullptr), 
NANOARROW_OK);
    ```
    
    In this PR, I've fixed this by simply skipping the call to `cuMemAlloc`.
    The resulting buffer will have `nullptr` as its `data` member and `0` as
    its `size_bytes`, which I believe is the desired outcome.
    
    I also modified the test above to include cases with no nulls.
---
 src/nanoarrow/nanoarrow_device_cuda.c       |  6 ++-
 src/nanoarrow/nanoarrow_device_cuda_test.cc | 72 ++++++++++++++++++++---------
 2 files changed, 54 insertions(+), 24 deletions(-)

diff --git a/src/nanoarrow/nanoarrow_device_cuda.c 
b/src/nanoarrow/nanoarrow_device_cuda.c
index a39dfa1b..56ff4dfd 100644
--- a/src/nanoarrow/nanoarrow_device_cuda.c
+++ b/src/nanoarrow/nanoarrow_device_cuda.c
@@ -124,7 +124,11 @@ static ArrowErrorCode ArrowDeviceCudaAllocateBuffer(struct 
ArrowDevice* device,
   switch (device->device_type) {
     case ARROW_DEVICE_CUDA: {
       CUdeviceptr dptr = 0;
-      err = cuMemAlloc(&dptr, (size_t)size_bytes);
+      if (size_bytes > 0) {  // cuMemalloc requires non-zero size_bytes
+        err = cuMemAlloc(&dptr, (size_t)size_bytes);
+      } else {
+        err = CUDA_SUCCESS;
+      }
       ptr = (void*)dptr;
       op = "cuMemAlloc";
       break;
diff --git a/src/nanoarrow/nanoarrow_device_cuda_test.cc 
b/src/nanoarrow/nanoarrow_device_cuda_test.cc
index d3aad566..0a861443 100644
--- a/src/nanoarrow/nanoarrow_device_cuda_test.cc
+++ b/src/nanoarrow/nanoarrow_device_cuda_test.cc
@@ -15,10 +15,10 @@
 // specific language governing permissions and limitations
 // under the License.
 
-#include <errno.h>
-
 #include <cuda.h>
+#include <errno.h>
 #include <gtest/gtest.h>
+#include <tuple>
 
 #include "nanoarrow/nanoarrow_device.h"
 #include "nanoarrow/nanoarrow_device_cuda.h"
@@ -185,29 +185,38 @@ TEST(NanoarrowDeviceCuda, DeviceCudaBufferCopy) {
 }
 
 class StringTypeParameterizedTestFixture
-    : public ::testing::TestWithParam<std::pair<ArrowDeviceType, enum 
ArrowType>> {
+    : public ::testing::TestWithParam<std::tuple<ArrowDeviceType, enum 
ArrowType, bool>> {
  protected:
   std::pair<ArrowDeviceType, enum ArrowType> info;
 };
 
-std::pair<ArrowDeviceType, enum ArrowType> DeviceAndType(ArrowDeviceType 
device_type,
-                                                         enum ArrowType 
arrow_type) {
-  return {device_type, arrow_type};
+std::tuple<ArrowDeviceType, enum ArrowType, bool> TestParams(ArrowDeviceType 
device_type,
+                                                             enum ArrowType 
arrow_type,
+                                                             bool 
include_null) {
+  return {device_type, arrow_type, include_null};
 }
 
 TEST_P(StringTypeParameterizedTestFixture, ArrowDeviceCudaArrayViewString) {
   struct ArrowDevice* cpu = ArrowDeviceCpu();
-  struct ArrowDevice* gpu = ArrowDeviceCuda(GetParam().first, 0);
+  struct ArrowDevice* gpu = ArrowDeviceCuda(std::get<0>(GetParam()), 0);
   struct ArrowArray array;
   struct ArrowDeviceArray device_array;
   struct ArrowDeviceArrayView device_array_view;
-  enum ArrowType string_type = GetParam().second;
+  enum ArrowType string_type = std::get<1>(GetParam());
+  bool include_null = std::get<2>(GetParam());
+  int64_t expected_data_size;  // expected
 
   ASSERT_EQ(ArrowArrayInitFromType(&array, string_type), NANOARROW_OK);
   ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
   ASSERT_EQ(ArrowArrayAppendString(&array, ArrowCharView("abc")), 
NANOARROW_OK);
   ASSERT_EQ(ArrowArrayAppendString(&array, ArrowCharView("defg")), 
NANOARROW_OK);
-  ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK);
+  if (include_null) {
+    ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK);
+    expected_data_size = 7;
+  } else {
+    ASSERT_EQ(ArrowArrayAppendString(&array, ArrowCharView("hjk")), 
NANOARROW_OK);
+    expected_data_size = 10;
+  }
   ASSERT_EQ(ArrowArrayFinishBuildingDefault(&array, nullptr), NANOARROW_OK);
 
   ASSERT_EQ(ArrowDeviceArrayInit(cpu, &device_array, &array, nullptr), 
NANOARROW_OK);
@@ -217,7 +226,7 @@ TEST_P(StringTypeParameterizedTestFixture, 
ArrowDeviceCudaArrayViewString) {
   ASSERT_EQ(ArrowDeviceArrayViewSetArray(&device_array_view, &device_array, 
nullptr),
             NANOARROW_OK);
 
-  EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, 7);
+  EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, 
expected_data_size);
   EXPECT_EQ(device_array.array.length, 3);
 
   // Copy required to Cuda
@@ -232,7 +241,7 @@ TEST_P(StringTypeParameterizedTestFixture, 
ArrowDeviceCudaArrayViewString) {
   ASSERT_EQ(device_array2.device_id, gpu->device_id);
   ASSERT_EQ(ArrowDeviceArrayViewSetArray(&device_array_view, &device_array2, 
nullptr),
             NANOARROW_OK);
-  EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, 7);
+  EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, 
expected_data_size);
   EXPECT_EQ(device_array_view.array_view.length, 3);
   EXPECT_EQ(device_array2.array.length, 3);
 
@@ -251,9 +260,16 @@ TEST_P(StringTypeParameterizedTestFixture, 
ArrowDeviceCudaArrayViewString) {
   ASSERT_EQ(ArrowDeviceArrayViewSetArray(&device_array_view, &device_array, 
nullptr),
             NANOARROW_OK);
 
-  EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, 7);
-  EXPECT_EQ(memcmp(device_array_view.array_view.buffer_views[2].data.data, 
"abcdefg", 7),
-            0);
+  EXPECT_EQ(device_array_view.array_view.buffer_views[2].size_bytes, 
expected_data_size);
+
+  if (include_null) {
+    EXPECT_EQ(
+        memcmp(device_array_view.array_view.buffer_views[2].data.data, 
"abcdefg", 7), 0);
+  } else {
+    EXPECT_EQ(
+        memcmp(device_array_view.array_view.buffer_views[2].data.data, 
"abcdefghjk", 7),
+        0);
+  }
 
   ArrowArrayRelease(&device_array.array);
   ArrowDeviceArrayViewReset(&device_array_view);
@@ -261,12 +277,22 @@ TEST_P(StringTypeParameterizedTestFixture, 
ArrowDeviceCudaArrayViewString) {
 
 INSTANTIATE_TEST_SUITE_P(
     NanoarrowDeviceCuda, StringTypeParameterizedTestFixture,
-    ::testing::Values(DeviceAndType(ARROW_DEVICE_CUDA, NANOARROW_TYPE_STRING),
-                      DeviceAndType(ARROW_DEVICE_CUDA, 
NANOARROW_TYPE_LARGE_STRING),
-                      DeviceAndType(ARROW_DEVICE_CUDA, NANOARROW_TYPE_BINARY),
-                      DeviceAndType(ARROW_DEVICE_CUDA, 
NANOARROW_TYPE_LARGE_BINARY),
-                      DeviceAndType(ARROW_DEVICE_CUDA_HOST, 
NANOARROW_TYPE_STRING),
-                      DeviceAndType(ARROW_DEVICE_CUDA_HOST, 
NANOARROW_TYPE_LARGE_STRING),
-                      DeviceAndType(ARROW_DEVICE_CUDA_HOST, 
NANOARROW_TYPE_BINARY),
-                      DeviceAndType(ARROW_DEVICE_CUDA_HOST,
-                                    NANOARROW_TYPE_LARGE_BINARY)));
+    ::testing::Values(
+        TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_STRING, true),
+        TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_STRING, false),
+        TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_LARGE_STRING, true),
+        TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_LARGE_STRING, false),
+        TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_BINARY, true),
+        TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_BINARY, false),
+        TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_LARGE_BINARY, true),
+        TestParams(ARROW_DEVICE_CUDA, NANOARROW_TYPE_LARGE_BINARY, false),
+        TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_STRING, true),
+        TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_STRING, false),
+        TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_LARGE_STRING, true),
+        TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_LARGE_STRING, false),
+        TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_BINARY, true),
+        TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_BINARY, false),
+        TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_LARGE_BINARY, true),
+        TestParams(ARROW_DEVICE_CUDA_HOST, NANOARROW_TYPE_LARGE_BINARY, false)
+
+            ));

Reply via email to