This is an automated email from the ASF dual-hosted git repository.
jorisvandenbossche pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 31fe24dd33 GH-41126: [Python] Basic bindings for Device and
MemoryManager classes (#41685)
31fe24dd33 is described below
commit 31fe24dd3345d387ba52d46c2915a909a5667813
Author: Joris Van den Bossche <[email protected]>
AuthorDate: Fri May 31 09:48:54 2024 +0200
GH-41126: [Python] Basic bindings for Device and MemoryManager classes
(#41685)
### Rationale for this change
Add bindings for the C++ `arrow::Device` and `arrow::MemoryManager` classes.
### What changes are included in this PR?
Basic bindings by adding the `pyarrow.Device` and `pyarrow.MemoryManager`
classes, and just tested for CPU.
What is not included here are additional methods on the `MemoryManager`
class (eg to allocate or copy buffers), and this is also not yet tested for
CUDA. Planning to do this as follow-ups, and first doing those basic bindings
should enable further enhancements to be done in parallel.
### Are these changes tested?
Yes, for the CPU device only.
* GitHub Issue: #41126
Authored-by: Joris Van den Bossche <[email protected]>
Signed-off-by: Joris Van den Bossche <[email protected]>
---
python/pyarrow/__init__.py | 3 +
python/pyarrow/device.pxi | 162 +++++++++++++++++++++++++++++++++++
python/pyarrow/includes/libarrow.pxd | 35 ++++++++
python/pyarrow/io.pxi | 33 +++++++
python/pyarrow/lib.pxd | 20 +++++
python/pyarrow/lib.pyx | 3 +
python/pyarrow/tests/test_device.py | 43 ++++++++++
python/pyarrow/tests/test_misc.py | 2 +
8 files changed, 301 insertions(+)
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 936f473697..e52e0d242b 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -236,6 +236,9 @@ from pyarrow.lib import (null, bool_,
RunEndEncodedScalar, ExtensionScalar)
# Buffers, allocation
+from pyarrow.lib import (DeviceAllocationType, Device, MemoryManager,
+ default_cpu_memory_manager)
+
from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer,
Codec, compress, decompress, allocate_buffer)
diff --git a/python/pyarrow/device.pxi b/python/pyarrow/device.pxi
new file mode 100644
index 0000000000..6e60347520
--- /dev/null
+++ b/python/pyarrow/device.pxi
@@ -0,0 +1,162 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+
+
+cpdef enum DeviceAllocationType:
+ CPU = <char> CDeviceAllocationType_kCPU
+ CUDA = <char> CDeviceAllocationType_kCUDA
+ CUDA_HOST = <char> CDeviceAllocationType_kCUDA_HOST
+ OPENCL = <char> CDeviceAllocationType_kOPENCL
+ VULKAN = <char> CDeviceAllocationType_kVULKAN
+ METAL = <char> CDeviceAllocationType_kMETAL
+ VPI = <char> CDeviceAllocationType_kVPI
+ ROCM = <char> CDeviceAllocationType_kROCM
+ ROCM_HOST = <char> CDeviceAllocationType_kROCM_HOST
+ EXT_DEV = <char> CDeviceAllocationType_kEXT_DEV
+ CUDA_MANAGED = <char> CDeviceAllocationType_kCUDA_MANAGED
+ ONEAPI = <char> CDeviceAllocationType_kONEAPI
+ WEBGPU = <char> CDeviceAllocationType_kWEBGPU
+ HEXAGON = <char> CDeviceAllocationType_kHEXAGON
+
+
+cdef object _wrap_device_allocation_type(CDeviceAllocationType device_type):
+ return DeviceAllocationType(<char> device_type)
+
+
+cdef class Device(_Weakrefable):
+ """
+ Abstract interface for hardware devices
+
+ This object represents a device with access to some memory spaces.
+ When handling a Buffer or raw memory address, it allows deciding in which
+ context the raw memory address should be interpreted
+ (e.g. CPU-accessible memory, or embedded memory on some particular GPU).
+ """
+
+ def __init__(self):
+ raise TypeError("Do not call Device's constructor directly, "
+ "use the device attribute of the MemoryManager
instead.")
+
+ cdef void init(self, const shared_ptr[CDevice]& device):
+ self.device = device
+
+ @staticmethod
+ cdef wrap(const shared_ptr[CDevice]& device):
+ cdef Device self = Device.__new__(Device)
+ self.init(device)
+ return self
+
+ def __eq__(self, other):
+ if not isinstance(other, Device):
+ return False
+ return self.device.get().Equals(deref((<Device>other).device.get()))
+
+ def __repr__(self):
+ return "<pyarrow.Device:
{}>".format(frombytes(self.device.get().ToString()))
+
+ @property
+ def type_name(self):
+ """
+ A shorthand for this device's type.
+ """
+ return frombytes(self.device.get().type_name())
+
+ @property
+ def device_id(self):
+ """
+ A device ID to identify this device if there are multiple of this type.
+
+ If there is no "device_id" equivalent (such as for the main CPU device
on
+ non-numa systems) returns -1.
+ """
+ return self.device.get().device_id()
+
+ @property
+ def is_cpu(self):
+ """
+ Whether this device is the main CPU device.
+
+ This shorthand method is very useful when deciding whether a memory
address
+ is CPU-accessible.
+ """
+ return self.device.get().is_cpu()
+
+ @property
+ def device_type(self):
+ """
+ Return the DeviceAllocationType of this device.
+ """
+ return _wrap_device_allocation_type(self.device.get().device_type())
+
+
+cdef class MemoryManager(_Weakrefable):
+ """
+ An object that provides memory management primitives.
+
+ A MemoryManager is always tied to a particular Device instance.
+ It can also have additional parameters (such as a MemoryPool to
+ allocate CPU memory).
+
+ """
+
+ def __init__(self):
+ raise TypeError("Do not call MemoryManager's constructor directly, "
+ "use pyarrow.default_cpu_memory_manager() instead.")
+
+ cdef void init(self, const shared_ptr[CMemoryManager]& mm):
+ self.memory_manager = mm
+
+ @staticmethod
+ cdef wrap(const shared_ptr[CMemoryManager]& mm):
+ cdef MemoryManager self = MemoryManager.__new__(MemoryManager)
+ self.init(mm)
+ return self
+
+ def __repr__(self):
+ return "<pyarrow.MemoryManager device: {}>".format(
+ frombytes(self.memory_manager.get().device().get().ToString())
+ )
+
+ @property
+ def device(self):
+ """
+ The device this MemoryManager is tied to.
+ """
+ return Device.wrap(self.memory_manager.get().device())
+
+ @property
+ def is_cpu(self):
+ """
+ Whether this MemoryManager is tied to the main CPU device.
+
+ This shorthand method is very useful when deciding whether a memory
+ address is CPU-accessible.
+ """
+ return self.memory_manager.get().is_cpu()
+
+
+def default_cpu_memory_manager():
+ """
+ Return the default CPU MemoryManager instance.
+
+ The returned singleton instance uses the default MemoryPool.
+ """
+ return MemoryManager.wrap(c_default_cpu_memory_manager())
diff --git a/python/pyarrow/includes/libarrow.pxd
b/python/pyarrow/includes/libarrow.pxd
index 8bfc31edc7..a66f584b83 100644
--- a/python/pyarrow/includes/libarrow.pxd
+++ b/python/pyarrow/includes/libarrow.pxd
@@ -316,6 +316,38 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
cdef cppclass CProxyMemoryPool" arrow::ProxyMemoryPool"(CMemoryPool):
CProxyMemoryPool(CMemoryPool*)
+ ctypedef enum CDeviceAllocationType "arrow::DeviceAllocationType":
+ CDeviceAllocationType_kCPU "arrow::DeviceAllocationType::kCPU"
+ CDeviceAllocationType_kCUDA "arrow::DeviceAllocationType::kCUDA"
+ CDeviceAllocationType_kCUDA_HOST
"arrow::DeviceAllocationType::kCUDA_HOST"
+ CDeviceAllocationType_kOPENCL "arrow::DeviceAllocationType::kOPENCL"
+ CDeviceAllocationType_kVULKAN "arrow::DeviceAllocationType::kVULKAN"
+ CDeviceAllocationType_kMETAL "arrow::DeviceAllocationType::kMETAL"
+ CDeviceAllocationType_kVPI "arrow::DeviceAllocationType::kVPI"
+ CDeviceAllocationType_kROCM "arrow::DeviceAllocationType::kROCM"
+ CDeviceAllocationType_kROCM_HOST
"arrow::DeviceAllocationType::kROCM_HOST"
+ CDeviceAllocationType_kEXT_DEV "arrow::DeviceAllocationType::kEXT_DEV"
+ CDeviceAllocationType_kCUDA_MANAGED
"arrow::DeviceAllocationType::kCUDA_MANAGED"
+ CDeviceAllocationType_kONEAPI "arrow::DeviceAllocationType::kONEAPI"
+ CDeviceAllocationType_kWEBGPU "arrow::DeviceAllocationType::kWEBGPU"
+ CDeviceAllocationType_kHEXAGON "arrow::DeviceAllocationType::kHEXAGON"
+
+ cdef cppclass CDevice" arrow::Device":
+ const char* type_name()
+ c_string ToString()
+ c_bool Equals(const CDevice& other)
+ int64_t device_id()
+ c_bool is_cpu() const
+ shared_ptr[CMemoryManager] default_memory_manager()
+ CDeviceAllocationType device_type()
+
+ cdef cppclass CMemoryManager" arrow::MemoryManager":
+ const shared_ptr[CDevice] device()
+ c_bool is_cpu() const
+
+ shared_ptr[CMemoryManager] c_default_cpu_memory_manager \
+ " arrow::default_cpu_memory_manager"()
+
cdef cppclass CBuffer" arrow::Buffer":
CBuffer(const uint8_t* data, int64_t size)
const uint8_t* data()
@@ -328,6 +360,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
c_bool is_mutable() const
c_string ToHexString()
c_bool Equals(const CBuffer& other)
+ shared_ptr[CDevice] device()
+ const shared_ptr[CMemoryManager] memory_manager()
+ CDeviceAllocationType device_type()
CResult[shared_ptr[CBuffer]] SliceBufferSafe(
const shared_ptr[CBuffer]& buffer, int64_t offset)
diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi
index 9e8026deb4..48b7934209 100644
--- a/python/pyarrow/io.pxi
+++ b/python/pyarrow/io.pxi
@@ -1327,6 +1327,39 @@ cdef class Buffer(_Weakrefable):
"""
return self.buffer.get().is_cpu()
+ @property
+ def device(self):
+ """
+ The device where the buffer resides.
+
+ Returns
+ -------
+ Device
+ """
+ return Device.wrap(self.buffer.get().device())
+
+ @property
+ def memory_manager(self):
+ """
+ The memory manager associated with the buffer.
+
+ Returns
+ -------
+ MemoryManager
+ """
+ return MemoryManager.wrap(self.buffer.get().memory_manager())
+
+ @property
+ def device_type(self):
+ """
+ The device type where the buffer resides.
+
+ Returns
+ -------
+ DeviceAllocationType
+ """
+ return _wrap_device_allocation_type(self.buffer.get().device_type())
+
@property
def parent(self):
cdef shared_ptr[CBuffer] parent_buf = self.buffer.get().parent()
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index bfd266a807..1bc639cc8d 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -524,6 +524,26 @@ cdef class RecordBatch(_Tabular):
cdef void init(self, const shared_ptr[CRecordBatch]& table)
+cdef class Device(_Weakrefable):
+ cdef:
+ shared_ptr[CDevice] device
+
+ cdef void init(self, const shared_ptr[CDevice]& device)
+
+ @staticmethod
+ cdef wrap(const shared_ptr[CDevice]& device)
+
+
+cdef class MemoryManager(_Weakrefable):
+ cdef:
+ shared_ptr[CMemoryManager] memory_manager
+
+ cdef void init(self, const shared_ptr[CMemoryManager]& memory_manager)
+
+ @staticmethod
+ cdef wrap(const shared_ptr[CMemoryManager]& mm)
+
+
cdef class Buffer(_Weakrefable):
cdef:
shared_ptr[CBuffer] buffer
diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx
index 3245e50f0f..904e018ffd 100644
--- a/python/pyarrow/lib.pyx
+++ b/python/pyarrow/lib.pyx
@@ -162,6 +162,9 @@ include "pandas-shim.pxi"
# Memory pools and allocation
include "memory.pxi"
+# Device type and memory manager
+include "device.pxi"
+
# DataType, Field, Schema
include "types.pxi"
diff --git a/python/pyarrow/tests/test_device.py
b/python/pyarrow/tests/test_device.py
new file mode 100644
index 0000000000..6bdb015be1
--- /dev/null
+++ b/python/pyarrow/tests/test_device.py
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pyarrow as pa
+
+
+def test_device_memory_manager():
+ mm = pa.default_cpu_memory_manager()
+ assert mm.is_cpu
+ device = mm.device
+ assert device.is_cpu
+ assert device.device_id == -1
+ assert device.device_type == pa.DeviceAllocationType.CPU
+ assert device.type_name == "arrow::CPUDevice"
+ assert device == device
+ assert repr(device) == "<pyarrow.Device: CPUDevice()>"
+ assert repr(mm) == "<pyarrow.MemoryManager device: CPUDevice()>"
+
+
+def test_buffer_device():
+ arr = pa.array([0, 1, 2])
+ buf = arr.buffers()[1]
+ assert buf.device_type == pa.DeviceAllocationType.CPU
+ assert isinstance(buf.device, pa.Device)
+ assert isinstance(buf.memory_manager, pa.MemoryManager)
+ assert buf.is_cpu
+ assert buf.device.is_cpu
+ assert buf.device == pa.default_cpu_memory_manager().device
+ assert buf.memory_manager.is_cpu
diff --git a/python/pyarrow/tests/test_misc.py
b/python/pyarrow/tests/test_misc.py
index 39dac4eb81..308c37fd0d 100644
--- a/python/pyarrow/tests/test_misc.py
+++ b/python/pyarrow/tests/test_misc.py
@@ -242,6 +242,8 @@ def test_set_timezone_db_path_non_windows():
pa.MemoryPool,
pa.LoggingMemoryPool,
pa.ProxyMemoryPool,
+ pa.Device,
+ pa.MemoryManager,
])
def test_extension_type_constructor_errors(klass):
# ARROW-2638: prevent calling extension class constructors directly