This is an automated email from the ASF dual-hosted git repository.
wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 0bf0c1e ARROW-1021: [Python] Add documentation for C++ pyarrow API
0bf0c1e is described below
commit 0bf0c1eb4e69f39a6237077c5a6236a135b9c697
Author: Wes McKinney <[email protected]>
AuthorDate: Mon Feb 12 15:25:18 2018 -0500
ARROW-1021: [Python] Add documentation for C++ pyarrow API
Author: Wes McKinney <[email protected]>
Author: Antoine Pitrou <[email protected]>
Closes #1576 from pitrou/ARROW-1021-document-pyarrow-cpp-api and squashes
the following commits:
61f95852 [Wes McKinney] Include get_libraries, get_library_dirs in docs,
enable test to pass on Linux in local dev setup
3fced81d [Wes McKinney] Add get_libraries, get_library_dirs functions, get
Cython test case passing on Windows
d82ead24 [Antoine Pitrou] ARROW-1021: [Python] Add documentation for C++
pyarrow API
---
.gitignore | 1 +
ci/msvc-build.bat | 12 +-
python/doc/Makefile | 2 +-
python/doc/source/api.rst | 12 +
python/doc/source/extending.rst | 327 ++++++++++++++++++++-
python/pyarrow/__init__.py | 30 ++
python/pyarrow/lib.pxd | 27 +-
.../pyarrow/tests/pyarrow_cython_example.pyx | 25 +-
python/pyarrow/tests/test_cython.py | 100 +++++++
9 files changed, 504 insertions(+), 32 deletions(-)
diff --git a/.gitignore b/.gitignore
index c38694e..f5b235a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,6 +21,7 @@
*.o
*.py[ocd]
*.so
+*.so.*
*.dylib
.build_cache_dir
MANIFEST
diff --git a/ci/msvc-build.bat b/ci/msvc-build.bat
index 58dfc2a1..069ddf0 100644
--- a/ci/msvc-build.bat
+++ b/ci/msvc-build.bat
@@ -137,14 +137,20 @@ cmake -G "%GENERATOR%" ^
cmake --build . --target INSTALL --config %CONFIGURATION% || exit /B
popd
-@rem Build and import pyarrow
+@rem Build and install pyarrow
@rem parquet-cpp has some additional runtime dependencies that we need to
figure out
@rem see PARQUET-1018
pushd python
set PYARROW_CXXFLAGS=/WX
-python setup.py build_ext --inplace --with-parquet --bundle-arrow-cpp
bdist_wheel || exit /B
-py.test pyarrow -r sxX --durations=15 -v -s --parquet || exit /B
+python setup.py build_ext --with-parquet --bundle-arrow-cpp ^
+ install -q --single-version-externally-managed --record=record.text ^
+ bdist_wheel || exit /B
+
+@rem Test directly from installed location
+
+SET PYARROW_PATH=%CONDA_PREFIX%\Lib\site-packages\pyarrow
+py.test -r sxX --durations=15 -v %PYARROW_PATH% --parquet || exit /B
popd
diff --git a/python/doc/Makefile b/python/doc/Makefile
index 1b9f707..eacb124 100644
--- a/python/doc/Makefile
+++ b/python/doc/Makefile
@@ -20,7 +20,7 @@
#
# You can set these variables from the command line.
-SPHINXOPTS =
+SPHINXOPTS = -j4
SPHINXBUILD = sphinx-build
PAPER =
BUILDDIR = _build
diff --git a/python/doc/source/api.rst b/python/doc/source/api.rst
index 2d3e39c..a71e92b 100644
--- a/python/doc/source/api.rst
+++ b/python/doc/source/api.rst
@@ -342,3 +342,15 @@ Apache Parquet
read_schema
write_metadata
write_table
+
+.. currentmodule:: pyarrow
+
+Using with C extensions
+-----------------------
+
+.. autosummary::
+ :toctree: generated/
+
+ get_include
+ get_libraries
+ get_library_dirs
diff --git a/python/doc/source/extending.rst b/python/doc/source/extending.rst
index 2d6caed..4a35898 100644
--- a/python/doc/source/extending.rst
+++ b/python/doc/source/extending.rst
@@ -15,19 +15,332 @@
.. specific language governing permissions and limitations
.. under the License.
-.. currentmodule:: pyarrow
+.. currentmodule:: pyarrow.lib
.. _extending:
-Building C++ and Cython Extensions using pyarrow
-================================================
+Using pyarrow from C++ and Cython Code
+======================================
-pyarrow features both a Cython and C++ API. We intend to fully document the
-details of how to do this.
+pyarrow features both a Cython and C++ API.
-The Arrow C++ header files are bundled with a pyarrow installation. To get the
-absolute path to this directory (like ``numpy.get_include()``), use:
+C++ API
+-------
+
+.. default-domain:: cpp
+
+The Arrow C++ header files are bundled with a pyarrow installation.
+To get the absolute path to this directory (like ``numpy.get_include()``), use:
.. code-block:: python
import pyarrow as pa
pa.get_include()
+
+Assuming the path above is on your compiler's include path, the pyarrow API
+can be included using the following directive:
+
+.. code-block:: cpp
+
+ #include <arrow/python/pyarrow.h>
+
+This will not include other parts of the Arrow API, which you will need
+to include yourself (for example ``arrow/api.h``).
+
+When building C extensions that use the Arrow C++ libraries, you must add
+appropriate linker flags. We have provided functions ``pyarrow.get_libraries``
+and ``pyarrow.get_library_dirs`` which return a list of library names and
+likely library install locations (if you installed pyarrow with pip or
+conda). These must be included when declaring your C extensions with distutils
+(see below).
+
+Initializing the API
+~~~~~~~~~~~~~~~~~~~~
+
+.. function:: int import_pyarrow()
+
+ Initialize inner pointers of the pyarrow API. On success, 0 is
+ returned. Otherwise, -1 is returned and a Python exception is set.
+
+ It is mandatory to call this function before calling any other function
+ in the pyarrow C++ API. Failing to do so will likely lead to crashes.
+
+Wrapping and Unwrapping
+~~~~~~~~~~~~~~~~~~~~~~~
+
+pyarrow provides the following functions to go back and forth between
+Python wrappers (as exposed by the pyarrow Python API) and the underlying
+C++ objects.
+
+.. function:: bool is_array(PyObject* obj)
+
+ Return whether *obj* wraps an Arrow C++ :class:`Array` pointer;
+ in other words, whether *obj* is a :py:class:`pyarrow.Array` instance.
+
+.. function:: bool is_buffer(PyObject* obj)
+
+ Return whether *obj* wraps an Arrow C++ :class:`Buffer` pointer;
+ in other words, whether *obj* is a :py:class:`pyarrow.Buffer` instance.
+
+.. function:: bool is_column(PyObject* obj)
+
+ Return whether *obj* wraps an Arrow C++ :class:`Column` pointer;
+ in other words, whether *obj* is a :py:class:`pyarrow.Column` instance.
+
+.. function:: bool is_data_type(PyObject* obj)
+
+ Return whether *obj* wraps an Arrow C++ :class:`DataType` pointer;
+ in other words, whether *obj* is a :py:class:`pyarrow.DataType` instance.
+
+.. function:: bool is_field(PyObject* obj)
+
+ Return whether *obj* wraps an Arrow C++ :class:`Field` pointer;
+ in other words, whether *obj* is a :py:class:`pyarrow.Field` instance.
+
+.. function:: bool is_record_batch(PyObject* obj)
+
+ Return whether *obj* wraps an Arrow C++ :class:`RecordBatch` pointer;
+ in other words, whether *obj* is a :py:class:`pyarrow.RecordBatch` instance.
+
+.. function:: bool is_schema(PyObject* obj)
+
+ Return whether *obj* wraps an Arrow C++ :class:`Schema` pointer;
+ in other words, whether *obj* is a :py:class:`pyarrow.Schema` instance.
+
+.. function:: bool is_table(PyObject* obj)
+
+ Return whether *obj* wraps an Arrow C++ :class:`Table` pointer;
+ in other words, whether *obj* is a :py:class:`pyarrow.Table` instance.
+
+.. function:: bool is_tensor(PyObject* obj)
+
+ Return whether *obj* wraps an Arrow C++ :class:`Tensor` pointer;
+ in other words, whether *obj* is a :py:class:`pyarrow.Tensor` instance.
+
+The following functions expect a pyarrow object, unwrap the underlying
+Arrow C++ API pointer, and put it in the *out* parameter. The returned
+:class:`Status` object must be inspected first to know whether any error
+occurred. If successful, *out* is guaranteed to be non-NULL.
+
+.. function:: Status unwrap_array(PyObject* obj, std::shared_ptr<Array>* out)
+
+ Unwrap the Arrow C++ :class:`Array` pointer from *obj* and put it in *out*.
+
+.. function:: Status unwrap_buffer(PyObject* obj, std::shared_ptr<Buffer>* out)
+
+ Unwrap the Arrow C++ :class:`Buffer` pointer from *obj* and put it in *out*.
+
+.. function:: Status unwrap_column(PyObject* obj, std::shared_ptr<Column>* out)
+
+ Unwrap the Arrow C++ :class:`Column` pointer from *obj* and put it in *out*.
+
+.. function:: Status unwrap_data_type(PyObject* obj,
std::shared_ptr<DataType>* out)
+
+ Unwrap the Arrow C++ :class:`DataType` pointer from *obj* and put it in
*out*.
+
+.. function:: Status unwrap_field(PyObject* obj, std::shared_ptr<Field>* out)
+
+ Unwrap the Arrow C++ :class:`Field` pointer from *obj* and put it in *out*.
+
+.. function:: Status unwrap_record_batch(PyObject* obj,
std::shared_ptr<RecordBatch>* out)
+
+ Unwrap the Arrow C++ :class:`RecordBatch` pointer from *obj* and put it in
*out*.
+
+.. function:: Status unwrap_schema(PyObject* obj, std::shared_ptr<Schema>* out)
+
+ Unwrap the Arrow C++ :class:`Schema` pointer from *obj* and put it in *out*.
+
+.. function:: Status unwrap_table(PyObject* obj, std::shared_ptr<Table>* out)
+
+ Unwrap the Arrow C++ :class:`Table` pointer from *obj* and put it in *out*.
+
+.. function:: Status unwrap_tensor(PyObject* obj, std::shared_ptr<Tensor>* out)
+
+ Unwrap the Arrow C++ :class:`Tensor` pointer from *obj* and put it in *out*.
+
+The following functions take an Arrow C++ API pointer and wrap it in a
+pyarray object of the corresponding type. A new reference is returned.
+On error, NULL is returned and a Python exception is set.
+
+.. function:: PyObject* wrap_array(const std::shared_ptr<Array>& array)
+
+ Wrap the Arrow C++ *array* in a :py:class:`pyarrow.Array` instance.
+
+.. function:: PyObject* wrap_buffer(const std::shared_ptr<Buffer>& buffer)
+
+ Wrap the Arrow C++ *buffer* in a :py:class:`pyarrow.Buffer` instance.
+
+.. function:: PyObject* wrap_column(const std::shared_ptr<Column>& column)
+
+ Wrap the Arrow C++ *column* in a :py:class:`pyarrow.Column` instance.
+
+.. function:: PyObject* wrap_data_type(const std::shared_ptr<DataType>&
data_type)
+
+ Wrap the Arrow C++ *data_type* in a :py:class:`pyarrow.DataType` instance.
+
+.. function:: PyObject* wrap_field(const std::shared_ptr<Field>& field)
+
+ Wrap the Arrow C++ *field* in a :py:class:`pyarrow.Field` instance.
+
+.. function:: PyObject* wrap_record_batch(const std::shared_ptr<RecordBatch>&
batch)
+
+ Wrap the Arrow C++ record *batch* in a :py:class:`pyarrow.RecordBatch`
instance.
+
+.. function:: PyObject* wrap_schema(const std::shared_ptr<Schema>& schema)
+
+ Wrap the Arrow C++ *schema* in a :py:class:`pyarrow.Schema` instance.
+
+.. function:: PyObject* wrap_table(const std::shared_ptr<Table>& table)
+
+ Wrap the Arrow C++ *table* in a :py:class:`pyarrow.Table` instance.
+
+.. function:: PyObject* wrap_tensor(const std::shared_ptr<Tensor>& tensor)
+
+ Wrap the Arrow C++ *tensor* in a :py:class:`pyarrow.Tensor` instance.
+
+
+Cython API
+----------
+
+.. default-domain:: py
+
+The Cython API more or less mirrors the C++ API, but the calling convention
+can be different as required by Cython. In Cython, you don't need to
+initialize the API as that will be handled automaticalled by the ``cimport``
+directive.
+
+.. note::
+ Classes from the Arrow C++ API are renamed when exposed in Cython, to
+ avoid named clashes with the corresponding Python classes. For example,
+ C++ Arrow arrays have the ``CArray`` type and ``Array`` is the
+ corresponding Python wrapper class.
+
+Wrapping and Unwrapping
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The following functions expect a pyarrow object, unwrap the underlying
+Arrow C++ API pointer, and return it. NULL is returned (without setting
+an exception) if the input is not of the right type.
+
+.. function:: pyarrow_unwrap_array(obj) -> shared_ptr[CArray]
+
+ Unwrap the Arrow C++ :cpp:class:`Array` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_batch(obj) -> shared_ptr[CRecordBatch]
+
+ Unwrap the Arrow C++ :cpp:class:`RecordBatch` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_buffer(obj) -> shared_ptr[CBuffer]
+
+ Unwrap the Arrow C++ :cpp:class:`Buffer` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_column(obj) -> shared_ptr[CColumn]
+
+ Unwrap the Arrow C++ :cpp:class:`Column` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_data_type(obj) -> shared_ptr[CDataType]
+
+ Unwrap the Arrow C++ :cpp:class:`CDataType` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_field(obj) -> shared_ptr[CField]
+
+ Unwrap the Arrow C++ :cpp:class:`Field` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_schema(obj) -> shared_ptr[CSchema]
+
+ Unwrap the Arrow C++ :cpp:class:`Schema` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_table(obj) -> shared_ptr[CTable]
+
+ Unwrap the Arrow C++ :cpp:class:`Table` pointer from *obj*.
+
+.. function:: pyarrow_unwrap_tensor(obj) -> shared_ptr[CTensor]
+
+ Unwrap the Arrow C++ :cpp:class:`Tensor` pointer from *obj*.
+
+The following functions take a Arrow C++ API pointer and wrap it in a
+pyarray object of the corresponding type. An exception is raised on error.
+
+.. function:: pyarrow_wrap_array(sp_array: const shared_ptr[CArray]& array) ->
object
+
+ Wrap the Arrow C++ *array* in a Python :class:`pyarrow.Array` instance.
+
+.. function:: pyarrow_wrap_batch(sp_array: const shared_ptr[CRecordBatch]&
batch) -> object
+
+ Wrap the Arrow C++ record *batch* in a Python :class:`pyarrow.RecordBatch`
instance.
+
+.. function:: pyarrow_wrap_buffer(sp_array: const shared_ptr[CBuffer]& buffer)
-> object
+
+ Wrap the Arrow C++ *buffer* in a Python :class:`pyarrow.Buffer` instance.
+
+.. function:: pyarrow_wrap_column(sp_array: const shared_ptr[CColumn]& column)
-> object
+
+ Wrap the Arrow C++ *column* in a Python :class:`pyarrow.Column` instance.
+
+.. function:: pyarrow_wrap_data_type(sp_array: const shared_ptr[CDataType]&
data_type) -> object
+
+ Wrap the Arrow C++ *data_type* in a Python :class:`pyarrow.DataType`
instance.
+
+.. function:: pyarrow_wrap_field(sp_array: const shared_ptr[CField]& field) ->
object
+
+ Wrap the Arrow C++ *field* in a Python :class:`pyarrow.Field` instance.
+
+.. function:: pyarrow_wrap_resizable_buffer(sp_array: const
shared_ptr[CResizableBuffer]& buffer) -> object
+
+ Wrap the Arrow C++ resizable *buffer* in a Python
:class:`pyarrow.ResizableBuffer` instance.
+
+.. function:: pyarrow_wrap_schema(sp_array: const shared_ptr[CSchema]& schema)
-> object
+
+ Wrap the Arrow C++ *schema* in a Python :class:`pyarrow.Schema` instance.
+
+.. function:: pyarrow_wrap_table(sp_array: const shared_ptr[CTable]& table) ->
object
+
+ Wrap the Arrow C++ *table* in a Python :class:`pyarrow.Table` instance.
+
+.. function:: pyarrow_wrap_tensor(sp_array: const shared_ptr[CTensor]& tensor)
-> object
+
+ Wrap the Arrow C++ *tensor* in a Python :class:`pyarrow.Tensor` instance.
+
+Example
+~~~~~~~
+
+The following Cython module shows how to unwrap a Python object and call
+the underlying C++ object's API.
+
+.. code-block:: python
+
+ # distutils: language=c++
+
+ from pyarrow.lib cimport *
+
+ def get_array_length(obj):
+ # Just an example function accessing both the pyarrow Cython API
+ # and the Arrow C++ API
+ cdef shared_ptr[CArray] arr = pyarrow_unwrap_array(obj)
+ if arr.get() == NULL:
+ raise TypeError("not an array")
+ return arr.get().length()
+
+To build this module, you will need a slightly customized ``setup.py`` file
+(this is assuming the file above is named ``example.pyx``):
+
+.. code-block:: python
+
+ from distutils.core import setup
+ from Cython.Build import cythonize
+
+ import numpy as np
+
+ import pyarrow as pa
+
+ ext_modules = cythonize("example.pyx")
+
+ for ext in ext_modules:
+ # The Numpy C headers are currently required
+ ext.include_dirs.append(np.get_include())
+ ext.include_dirs.append(pa.get_include())
+ ext.libraries.extend(pa.get_libraries())
+ ext.library_dirs.append(pa.get_library_dirs())
+
+ setup(
+ ext_modules=ext_modules,
+ )
diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py
index 8b3cba9..d95954e 100644
--- a/python/pyarrow/__init__.py
+++ b/python/pyarrow/__init__.py
@@ -165,3 +165,33 @@ def get_include():
"""
import os
return os.path.join(os.path.dirname(__file__), 'include')
+
+
+def get_libraries():
+ """
+ Return list of library names to include in the `libraries` argument for C
+ or Cython extensions using pyarrow
+ """
+ return ['arrow_python']
+
+
+def get_library_dirs():
+ """
+ Return lists of directories likely to contain Arrow C++ libraries for
+ linking C or Cython extensions using pyarrow
+ """
+ import os
+ import sys
+ package_cwd = os.path.dirname(__file__)
+
+ library_dirs = [package_cwd]
+
+ if sys.platform == 'win32':
+ # TODO(wesm): Is this necessary, or does setuptools within a conda
+ # installation add Library\lib to the linker path for MSVC?
+ site_packages, _ = os.path.split(package_cwd)
+ python_base_install, _ = os.path.split(site_packages)
+ library_dirs.append(os.path.join(python_base_install,
+ 'Library', 'lib'))
+
+ return library_dirs
diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd
index 161562c..b1433ec 100644
--- a/python/pyarrow/lib.pxd
+++ b/python/pyarrow/lib.pxd
@@ -347,16 +347,29 @@ cdef class NativeFile:
cdef get_reader(object source, shared_ptr[RandomAccessFile]* reader)
cdef get_writer(object source, shared_ptr[OutputStream]* writer)
+cdef dict box_metadata(const CKeyValueMetadata* sp_metadata)
+
+# Public Cython API for 3rd party code
+
+cdef public object pyarrow_wrap_array(const shared_ptr[CArray]& sp_array)
+# XXX pyarrow.h calls it `wrap_record_batch`
+cdef public object pyarrow_wrap_batch(const shared_ptr[CRecordBatch]& cbatch)
cdef public object pyarrow_wrap_buffer(const shared_ptr[CBuffer]& buf)
-cdef public object pyarrow_wrap_resizable_buffer(
- const shared_ptr[CResizableBuffer]& buf)
+cdef public object pyarrow_wrap_column(const shared_ptr[CColumn]& ccolumn)
cdef public object pyarrow_wrap_data_type(const shared_ptr[CDataType]& type)
cdef public object pyarrow_wrap_field(const shared_ptr[CField]& field)
+cdef public object pyarrow_wrap_resizable_buffer(
+ const shared_ptr[CResizableBuffer]& buf)
cdef public object pyarrow_wrap_schema(const shared_ptr[CSchema]& type)
-cdef public object pyarrow_wrap_array(const shared_ptr[CArray]& sp_array)
-cdef public object pyarrow_wrap_tensor(const shared_ptr[CTensor]& sp_tensor)
-cdef public object pyarrow_wrap_column(const shared_ptr[CColumn]& ccolumn)
cdef public object pyarrow_wrap_table(const shared_ptr[CTable]& ctable)
-cdef public object pyarrow_wrap_batch(const shared_ptr[CRecordBatch]& cbatch)
+cdef public object pyarrow_wrap_tensor(const shared_ptr[CTensor]& sp_tensor)
-cdef dict box_metadata(const CKeyValueMetadata* sp_metadata)
+cdef public shared_ptr[CArray] pyarrow_unwrap_array(object array)
+cdef public shared_ptr[CRecordBatch] pyarrow_unwrap_batch(object batch)
+cdef public shared_ptr[CBuffer] pyarrow_unwrap_buffer(object buffer)
+cdef public shared_ptr[CColumn] pyarrow_unwrap_column(object column)
+cdef public shared_ptr[CDataType] pyarrow_unwrap_data_type(object data_type)
+cdef public shared_ptr[CField] pyarrow_unwrap_field(object field)
+cdef public shared_ptr[CSchema] pyarrow_unwrap_schema(object schema)
+cdef public shared_ptr[CTable] pyarrow_unwrap_table(object table)
+cdef public shared_ptr[CTensor] pyarrow_unwrap_tensor(object tensor)
diff --git a/.gitignore b/python/pyarrow/tests/pyarrow_cython_example.pyx
similarity index 70%
copy from .gitignore
copy to python/pyarrow/tests/pyarrow_cython_example.pyx
index c38694e..b5e5406 100644
--- a/.gitignore
+++ b/python/pyarrow/tests/pyarrow_cython_example.pyx
@@ -15,18 +15,15 @@
# specific language governing permissions and limitations
# under the License.
-# Compiled source
-*.a
-*.dll
-*.o
-*.py[ocd]
-*.so
-*.dylib
-.build_cache_dir
-MANIFEST
+# distutils: language=c++
-cpp/.idea/
-python/.eggs/
-.vscode
-.idea/
-.pytest_cache/
+from pyarrow.lib cimport *
+
+
+def get_array_length(obj):
+ # Just an example function accessing both the pyarrow Cython API
+ # and the Arrow C++ API
+ cdef shared_ptr[CArray] arr = pyarrow_unwrap_array(obj)
+ if arr.get() == NULL:
+ raise TypeError("not an array")
+ return arr.get().length()
diff --git a/python/pyarrow/tests/test_cython.py
b/python/pyarrow/tests/test_cython.py
new file mode 100644
index 0000000..51b1a48
--- /dev/null
+++ b/python/pyarrow/tests/test_cython.py
@@ -0,0 +1,100 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import os
+import shutil
+import subprocess
+import sys
+
+import pytest
+
+import pyarrow as pa
+
+
+here = os.path.dirname(os.path.abspath(__file__))
+
+setup_template = """if 1:
+ from distutils.core import setup
+ from Cython.Build import cythonize
+
+ import numpy as np
+
+ import pyarrow as pa
+
+ ext_modules = cythonize({pyx_file!r})
+ compiler_opts = {compiler_opts!r}
+ custom_ld_path = {test_ld_path!r}
+
+ for ext in ext_modules:
+ # XXX required for numpy/numpyconfig.h,
+ # included from arrow/python/api.h
+ ext.include_dirs.append(np.get_include())
+ ext.include_dirs.append(pa.get_include())
+ ext.libraries.extend(pa.get_libraries())
+ ext.library_dirs.extend(pa.get_library_dirs())
+ if custom_ld_path:
+ ext.library_dirs.append(custom_ld_path)
+ ext.extra_compile_args.extend(compiler_opts)
+
+ setup(
+ ext_modules=ext_modules,
+ )
+"""
+
+
+def test_cython_api(tmpdir):
+ """
+ Basic test for the Cython API.
+ """
+ pytest.importorskip('Cython')
+
+ if 'ARROW_HOME' in os.environ:
+ ld_path_default = os.path.join(os.environ['ARROW_HOME'], 'lib')
+
+ test_ld_path = os.environ.get('PYARROW_TEST_LD_PATH', ld_path_default)
+
+ with tmpdir.as_cwd():
+ # Set up temporary workspace
+ pyx_file = 'pyarrow_cython_example.pyx'
+ shutil.copyfile(os.path.join(here, pyx_file),
+ os.path.join(str(tmpdir), pyx_file))
+ # Create setup.py file
+ if os.name == 'posix':
+ compiler_opts = ['-std=c++11']
+ else:
+ compiler_opts = []
+ setup_code = setup_template.format(pyx_file=pyx_file,
+ compiler_opts=compiler_opts,
+ test_ld_path=test_ld_path)
+ with open('setup.py', 'w') as f:
+ f.write(setup_code)
+
+ # Compile extension module
+ subprocess.check_call([sys.executable, 'setup.py',
+ 'build_ext', '--inplace'])
+
+ # Check basic functionality
+ orig_path = sys.path[:]
+ sys.path.insert(0, str(tmpdir))
+ try:
+ mod = __import__('pyarrow_cython_example')
+ arr = pa.array([1, 2, 3])
+ assert mod.get_array_length(arr) == 3
+ with pytest.raises(TypeError, match="not an array"):
+ mod.get_array_length(None)
+ finally:
+ sys.path = orig_path
--
To stop receiving notification emails like this one, please contact
[email protected].