This is an automated email from the ASF dual-hosted git repository.

jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 31fe24dd33 GH-41126: [Python] Basic bindings for Device and 
MemoryManager classes (#41685)
31fe24dd33 is described below

commit 31fe24dd3345d387ba52d46c2915a909a5667813
Author: Joris Van den Bossche <[email protected]>
AuthorDate: Fri May 31 09:48:54 2024 +0200

    GH-41126: [Python] Basic bindings for Device and MemoryManager classes 
(#41685)
    
    ### Rationale for this change
    
    Add bindings for the C++ `arrow::Device` and `arrow::MemoryManager` classes.
    
    ### What changes are included in this PR?
    
    Basic bindings by adding the `pyarrow.Device` and `pyarrow.MemoryManager` 
classes, and just tested for CPU.
    
    What is not included here are additional methods on the `MemoryManager` 
class (eg to allocate or copy buffers), and this is also not yet tested for 
CUDA. Planning to do this as follow-ups, and first doing those basic bindings 
should enable further enhancements to be done in parallel.
    
    ### Are these changes tested?
    
    Yes, for the CPU device only.
    
    * GitHub Issue: #41126
    
    Authored-by: Joris Van den Bossche <[email protected]>
    Signed-off-by: Joris Van den Bossche <[email protected]>
---
 python/pyarrow/__init__.py           |   3 +
 python/pyarrow/device.pxi            | 162 +++++++++++++++++++++++++++++++++++
 python/pyarrow/includes/libarrow.pxd |  35 ++++++++
 python/pyarrow/io.pxi                |  33 +++++++
 python/pyarrow/lib.pxd               |  20 +++++
 python/pyarrow/lib.pyx               |   3 +
 python/pyarrow/tests/test_device.py  |  43 ++++++++++
 python/pyarrow/tests/test_misc.py    |   2 +
 8 files changed, 301 insertions(+)

diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 936f473697..e52e0d242b 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -236,6 +236,9 @@ from pyarrow.lib import (null, bool_,
                          RunEndEncodedScalar, ExtensionScalar)
 
 # Buffers, allocation
+from pyarrow.lib import (DeviceAllocationType, Device, MemoryManager,
+                         default_cpu_memory_manager)
+
 from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer,
                          Codec, compress, decompress, allocate_buffer)
 
diff --git a/python/pyarrow/device.pxi b/python/pyarrow/device.pxi
new file mode 100644
index 0000000000..6e60347520
--- /dev/null
+++ b/python/pyarrow/device.pxi
@@ -0,0 +1,162 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+
+
+cpdef enum DeviceAllocationType:
+    CPU = <char> CDeviceAllocationType_kCPU
+    CUDA = <char> CDeviceAllocationType_kCUDA
+    CUDA_HOST = <char> CDeviceAllocationType_kCUDA_HOST
+    OPENCL = <char> CDeviceAllocationType_kOPENCL
+    VULKAN = <char> CDeviceAllocationType_kVULKAN
+    METAL = <char> CDeviceAllocationType_kMETAL
+    VPI = <char> CDeviceAllocationType_kVPI
+    ROCM = <char> CDeviceAllocationType_kROCM
+    ROCM_HOST = <char> CDeviceAllocationType_kROCM_HOST
+    EXT_DEV = <char> CDeviceAllocationType_kEXT_DEV
+    CUDA_MANAGED = <char> CDeviceAllocationType_kCUDA_MANAGED
+    ONEAPI = <char> CDeviceAllocationType_kONEAPI
+    WEBGPU = <char> CDeviceAllocationType_kWEBGPU
+    HEXAGON = <char> CDeviceAllocationType_kHEXAGON
+
+
+cdef object _wrap_device_allocation_type(CDeviceAllocationType device_type):
+    return DeviceAllocationType(<char> device_type)
+
+
+cdef class Device(_Weakrefable):
+    """
+    Abstract interface for hardware devices
+
+    This object represents a device with access to some memory spaces.
+    When handling a Buffer or raw memory address, it allows deciding in which
+    context the raw memory address should be interpreted
+    (e.g. CPU-accessible memory, or embedded memory on some particular GPU).
+    """
+
+    def __init__(self):
+        raise TypeError("Do not call Device's constructor directly, "
+                        "use the device attribute of the MemoryManager 
instead.")
+
+    cdef void init(self, const shared_ptr[CDevice]& device):
+        self.device = device
+
+    @staticmethod
+    cdef wrap(const shared_ptr[CDevice]& device):
+        cdef Device self = Device.__new__(Device)
+        self.init(device)
+        return self
+
+    def __eq__(self, other):
+        if not isinstance(other, Device):
+            return False
+        return self.device.get().Equals(deref((<Device>other).device.get()))
+
+    def __repr__(self):
+        return "<pyarrow.Device: 
{}>".format(frombytes(self.device.get().ToString()))
+
+    @property
+    def type_name(self):
+        """
+        A shorthand for this device's type.
+        """
+        return frombytes(self.device.get().type_name())
+
+    @property
+    def device_id(self):
+        """
+        A device ID to identify this device if there are multiple of this type.
+
+        If there is no "device_id" equivalent (such as for the main CPU device 
on
+        non-numa systems) returns -1.
+        """
+        return self.device.get().device_id()
+
+    @property
+    def is_cpu(self):
+        """
+        Whether this device is the main CPU device.
+
+        This shorthand method is very useful when deciding whether a memory 
address
+        is CPU-accessible.
+        """
+        return self.device.get().is_cpu()
+
+    @property
+    def device_type(self):
+        """
+        Return the DeviceAllocationType of this device.
+        """
+        return _wrap_device_allocation_type(self.device.get().device_type())
+
+
+cdef class MemoryManager(_Weakrefable):
+    """
+    An object that provides memory management primitives.
+
+    A MemoryManager is always tied to a particular Device instance.
+    It can also have additional parameters (such as a MemoryPool to
+    allocate CPU memory).
+
+    """
+
+    def __init__(self):
+        raise TypeError("Do not call MemoryManager's constructor directly, "
+                        "use pyarrow.default_cpu_memory_manager() instead.")
+
+    cdef void init(self, const shared_ptr[CMemoryManager]& mm):
+        self.memory_manager = mm
+
+    @staticmethod
+    cdef wrap(const shared_ptr[CMemoryManager]& mm):
+        cdef MemoryManager self = MemoryManager.__new__(MemoryManager)
+        self.init(mm)
+        return self
+
+    def __repr__(self):
+        return "<pyarrow.MemoryManager device: {}>".format(
+            frombytes(self.memory_manager.get().device().get().ToString())
+        )
+
+    @property
+    def device(self):
+        """
+        The device this MemoryManager is tied to.
+        """
+        return Device.wrap(self.memory_manager.get().device())
+
+    @property
+    def is_cpu(self):
+        """
+        Whether this MemoryManager is tied to the main CPU device.
+
+        This shorthand method is very useful when deciding whether a memory
+        address is CPU-accessible.
+        """
+        return self.memory_manager.get().is_cpu()
+
+
+def default_cpu_memory_manager():
+    """
+    Return the default CPU MemoryManager instance.
+
+    The returned singleton instance uses the default MemoryPool.
+    """
+    return MemoryManager.wrap(c_default_cpu_memory_manager())
diff --git a/python/pyarrow/includes/libarrow.pxd 
b/python/pyarrow/includes/libarrow.pxd
index 8bfc31edc7..a66f584b83 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -316,6 +316,38 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
     cdef cppclass CProxyMemoryPool" arrow::ProxyMemoryPool"(CMemoryPool):
         CProxyMemoryPool(CMemoryPool*)
 
+    ctypedef enum CDeviceAllocationType "arrow::DeviceAllocationType":
+        CDeviceAllocationType_kCPU "arrow::DeviceAllocationType::kCPU"
+        CDeviceAllocationType_kCUDA "arrow::DeviceAllocationType::kCUDA"
+        CDeviceAllocationType_kCUDA_HOST 
"arrow::DeviceAllocationType::kCUDA_HOST"
+        CDeviceAllocationType_kOPENCL "arrow::DeviceAllocationType::kOPENCL"
+        CDeviceAllocationType_kVULKAN "arrow::DeviceAllocationType::kVULKAN"
+        CDeviceAllocationType_kMETAL "arrow::DeviceAllocationType::kMETAL"
+        CDeviceAllocationType_kVPI "arrow::DeviceAllocationType::kVPI"
+        CDeviceAllocationType_kROCM "arrow::DeviceAllocationType::kROCM"
+        CDeviceAllocationType_kROCM_HOST 
"arrow::DeviceAllocationType::kROCM_HOST"
+        CDeviceAllocationType_kEXT_DEV "arrow::DeviceAllocationType::kEXT_DEV"
+        CDeviceAllocationType_kCUDA_MANAGED 
"arrow::DeviceAllocationType::kCUDA_MANAGED"
+        CDeviceAllocationType_kONEAPI "arrow::DeviceAllocationType::kONEAPI"
+        CDeviceAllocationType_kWEBGPU "arrow::DeviceAllocationType::kWEBGPU"
+        CDeviceAllocationType_kHEXAGON "arrow::DeviceAllocationType::kHEXAGON"
+
+    cdef cppclass CDevice" arrow::Device":
+        const char* type_name()
+        c_string ToString()
+        c_bool Equals(const CDevice& other)
+        int64_t device_id()
+        c_bool is_cpu() const
+        shared_ptr[CMemoryManager] default_memory_manager()
+        CDeviceAllocationType device_type()
+
+    cdef cppclass CMemoryManager" arrow::MemoryManager":
+        const shared_ptr[CDevice] device()
+        c_bool is_cpu() const
+
+    shared_ptr[CMemoryManager] c_default_cpu_memory_manager \
+        " arrow::default_cpu_memory_manager"()
+
     cdef cppclass CBuffer" arrow::Buffer":
         CBuffer(const uint8_t* data, int64_t size)
         const uint8_t* data()
@@ -328,6 +360,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
         c_bool is_mutable() const
         c_string ToHexString()
         c_bool Equals(const CBuffer& other)
+        shared_ptr[CDevice] device()
+        const shared_ptr[CMemoryManager] memory_manager()
+        CDeviceAllocationType device_type()
 
     CResult[shared_ptr[CBuffer]] SliceBufferSafe(
         const shared_ptr[CBuffer]& buffer, int64_t offset)
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 9e8026deb4..48b7934209 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -1327,6 +1327,39 @@ cdef class Buffer(_Weakrefable):
         """
         return self.buffer.get().is_cpu()
 
+    @property
+    def device(self):
+        """
+        The device where the buffer resides.
+
+        Returns
+        -------
+        Device
+        """
+        return Device.wrap(self.buffer.get().device())
+
+    @property
+    def memory_manager(self):
+        """
+        The memory manager associated with the buffer.
+
+        Returns
+        -------
+        MemoryManager
+        """
+        return MemoryManager.wrap(self.buffer.get().memory_manager())
+
+    @property
+    def device_type(self):
+        """
+        The device type where the buffer resides.
+
+        Returns
+        -------
+        DeviceAllocationType
+        """
+        return _wrap_device_allocation_type(self.buffer.get().device_type())
+
     @property
     def parent(self):
         cdef shared_ptr[CBuffer] parent_buf = self.buffer.get().parent()
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index bfd266a807..1bc639cc8d 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -524,6 +524,26 @@ cdef class RecordBatch(_Tabular):
     cdef void init(self, const shared_ptr[CRecordBatch]& table)
 
 
+cdef class Device(_Weakrefable):
+    cdef:
+        shared_ptr[CDevice] device
+
+    cdef void init(self, const shared_ptr[CDevice]& device)
+
+    @staticmethod
+    cdef wrap(const shared_ptr[CDevice]& device)
+
+
+cdef class MemoryManager(_Weakrefable):
+    cdef:
+        shared_ptr[CMemoryManager] memory_manager
+
+    cdef void init(self, const shared_ptr[CMemoryManager]& memory_manager)
+
+    @staticmethod
+    cdef wrap(const shared_ptr[CMemoryManager]& mm)
+
+
 cdef class Buffer(_Weakrefable):
     cdef:
         shared_ptr[CBuffer] buffer
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index 3245e50f0f..904e018ffd 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -162,6 +162,9 @@ include "pandas-shim.pxi"
 # Memory pools and allocation
 include "memory.pxi"
 
+# Device type and memory manager
+include "device.pxi"
+
 # DataType, Field, Schema
 include "types.pxi"
 
diff --git a/python/pyarrow/tests/test_device.py 
b/python/pyarrow/tests/test_device.py
new file mode 100644
index 0000000000..6bdb015be1
--- /dev/null
+++ b/python/pyarrow/tests/test_device.py
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pyarrow as pa
+
+
+def test_device_memory_manager():
+    mm = pa.default_cpu_memory_manager()
+    assert mm.is_cpu
+    device = mm.device
+    assert device.is_cpu
+    assert device.device_id == -1
+    assert device.device_type == pa.DeviceAllocationType.CPU
+    assert device.type_name == "arrow::CPUDevice"
+    assert device == device
+    assert repr(device) == "<pyarrow.Device: CPUDevice()>"
+    assert repr(mm) == "<pyarrow.MemoryManager device: CPUDevice()>"
+
+
+def test_buffer_device():
+    arr = pa.array([0, 1, 2])
+    buf = arr.buffers()[1]
+    assert buf.device_type == pa.DeviceAllocationType.CPU
+    assert isinstance(buf.device, pa.Device)
+    assert isinstance(buf.memory_manager, pa.MemoryManager)
+    assert buf.is_cpu
+    assert buf.device.is_cpu
+    assert buf.device == pa.default_cpu_memory_manager().device
+    assert buf.memory_manager.is_cpu
diff --git a/python/pyarrow/tests/test_misc.py 
b/python/pyarrow/tests/test_misc.py
index 39dac4eb81..308c37fd0d 100644
--- a/python/pyarrow/tests/test_misc.py
+++ b/python/pyarrow/tests/test_misc.py
@@ -242,6 +242,8 @@ def test_set_timezone_db_path_non_windows():
     pa.MemoryPool,
     pa.LoggingMemoryPool,
     pa.ProxyMemoryPool,
+    pa.Device,
+    pa.MemoryManager,
 ])
 def test_extension_type_constructor_errors(klass):
     # ARROW-2638: prevent calling extension class constructors directly

Reply via email to