kou commented on code in PR #13311:
URL: https://github.com/apache/arrow/pull/13311#discussion_r921833223
##########
cpp/cmake_modules/FindArrowPython.cmake:
##########
@@ -46,8 +46,9 @@ endif()
find_package(Arrow ${find_package_arguments})
if(ARROW_FOUND)
+ message(STATUS "CPYARROW_HOME: ${CPYARROW_HOME}")
arrow_find_package(ARROW_PYTHON
- "${ARROW_HOME}"
+ "${CPYARROW_HOME}"
Review Comment:
How about `ARROW_PYTHON_HOME` because we use `ArrowPython` as package name?
##########
docs/source/developers/python.rst:
##########
@@ -131,6 +131,29 @@ for ``.py`` files or
for ``.pyx`` and ``.pxi`` files. In this case you will also need to
install the `pytest-cython <https://github.com/lgpage/pytest-cython>`_ plugin.
+Testing Arrow Python (C++ code)
+-------------------------------
+If you want to run ctest for the tests that are included in the Arrow Python
+module, you will need to build Arrow with ``DARROW_BUILD_TESTS=ON``.
Review Comment:
```suggestion
module, you will need to build Arrow with ``-DARROW_BUILD_TESTS=ON``.
```
##########
cpp/cmake_modules/FindArrowPythonFlight.cmake:
##########
@@ -50,7 +50,7 @@ find_package(ArrowPython ${find_package_arguments})
if(ARROW_PYTHON_FOUND AND ARROW_FLIGHT_FOUND)
arrow_find_package(ARROW_PYTHON_FLIGHT
- "${ARROW_HOME}"
+ "${CPYARROW_HOME}"
Review Comment:
```suggestion
"${ARROW_PYTHON_HOME}"
```
##########
python/pyarrow/src_arrow/CMakeLists.txt:
##########
@@ -0,0 +1,447 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#
+# arrow_python
+#
+
+cmake_minimum_required(VERSION 3.5)
+
+# RPATH settings on macOS do not affect install_name.
+# https://cmake.org/cmake/help/latest/policy/CMP0068.html
+if(POLICY CMP0068)
+ cmake_policy(SET CMP0068 NEW)
+endif()
+
+#
+# Define
+# ARROW_SOURCE_DIR: location of arrow/cpp
+# CMAKE_MODULE_PATH: location of cmake_modules in python
+#
+
+get_filename_component(PYARROW_SOURCE_DIR ${CMAKE_SOURCE_DIR} DIRECTORY)
+get_filename_component(PYTHON_SOURCE_DIR ${PYARROW_SOURCE_DIR} DIRECTORY)
+get_filename_component(ARROW_SOURCE ${PYTHON_SOURCE_DIR} DIRECTORY)
+set(ARROW_CPP_SOURCE_DIR "${ARROW_SOURCE}/cpp")
+set(ARROW_SOURCE_DIR "${ARROW_CPP_SOURCE_DIR}")
+
+# normalize ARROW_HOME path
+file(TO_CMAKE_PATH "$ENV{ARROW_HOME}" ARROW_HOME)
+set(CMAKE_MODULE_PATH "${PYTHON_SOURCE_DIR}/cmake_modules"
"${ARROW_HOME}/lib/cmake/arrow")
+
+#
+# Arrow version
+#
+
+set(ARROW_PYTHON_VERSION "9.0.0-SNAPSHOT")
Review Comment:
We need to update this in release process by `dev/release/utils-prepare.sh`.
##########
python/pyarrow/src_arrow/CMakeLists.txt:
##########
@@ -0,0 +1,447 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#
+# arrow_python
+#
+
+cmake_minimum_required(VERSION 3.5)
+
+# RPATH settings on macOS do not affect install_name.
+# https://cmake.org/cmake/help/latest/policy/CMP0068.html
+if(POLICY CMP0068)
+ cmake_policy(SET CMP0068 NEW)
+endif()
+
+#
+# Define
+# ARROW_SOURCE_DIR: location of arrow/cpp
+# CMAKE_MODULE_PATH: location of cmake_modules in python
+#
+
+get_filename_component(PYARROW_SOURCE_DIR ${CMAKE_SOURCE_DIR} DIRECTORY)
+get_filename_component(PYTHON_SOURCE_DIR ${PYARROW_SOURCE_DIR} DIRECTORY)
+get_filename_component(ARROW_SOURCE ${PYTHON_SOURCE_DIR} DIRECTORY)
+set(ARROW_CPP_SOURCE_DIR "${ARROW_SOURCE}/cpp")
+set(ARROW_SOURCE_DIR "${ARROW_CPP_SOURCE_DIR}")
+
+# normalize ARROW_HOME path
+file(TO_CMAKE_PATH "$ENV{ARROW_HOME}" ARROW_HOME)
+set(CMAKE_MODULE_PATH "${PYTHON_SOURCE_DIR}/cmake_modules"
"${ARROW_HOME}/lib/cmake/arrow")
+
+#
+# Arrow version
+#
+
+set(ARROW_PYTHON_VERSION "9.0.0-SNAPSHOT")
+string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_PYTHON_BASE_VERSION
"${ARROW_PYTHON_VERSION}")
+# Need to set to ARRROW_VERSION before finding Arrow package!
+project(arrow_python VERSION "${ARROW_PYTHON_BASE_VERSION}")
+
+if(NOT DEFINED CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Release)
+endif()
+
+#
+# Arrow
+#
+
+find_package(Arrow REQUIRED)
+include(ArrowOptions)
+
+#
+# Python
+#
+# Use the first Python installation on PATH, not the newest one
+set(Python3_FIND_STRATEGY "LOCATION")
+# On Windows, use registry last, not first
+set(Python3_FIND_REGISTRY "LAST")
+# On macOS, use framework last, not first
+set(Python3_FIND_FRAMEWORK "LAST")
+
+find_package(Python3Alt 3.7 REQUIRED)
+include_directories(SYSTEM ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS} src)
+
+add_custom_target(arrow_python-all)
+add_custom_target(arrow_python)
+add_custom_target(arrow_python-tests)
+add_dependencies(arrow_python-all arrow_python arrow_python-tests)
+
+set(ARROW_PYTHON_SRCS
+ arrow_to_pandas.cc
+ benchmark.cc
+ common.cc
+ datetime.cc
+ decimal.cc
+ deserialize.cc
+ extension_type.cc
+ gdb.cc
+ helpers.cc
+ inference.cc
+ init.cc
+ io.cc
+ ipc.cc
+ numpy_convert.cc
+ numpy_to_arrow.cc
+ python_to_arrow.cc
+ pyarrow.cc
+ serialize.cc
+ udf.cc)
+
+set_source_files_properties(init.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+ SKIP_UNITY_BUILD_INCLUSION ON)
+
+#
+# Arrow vs C PyArrow options
+#
+
+# Check all the options from Arrow and C PyArrow to be in line
+if(PYARROW_WITH_DATASET)
+ find_package(ArrowDataset REQUIRED)
+endif()
+
+if(PYARROW_WITH_PARQUET_ENCRYPTION)
+ if(PARQUET_REQUIRE_ENCRYPTION)
+ list(APPEND ARROW_PYTHON_SRCS parquet_encryption.cc)
+ find_package(Parquet REQUIRED)
+ else()
+ message(FATAL_ERROR "You must build Arrow C++ with
PARQUET_REQUIRE_ENCRYPTION=ON")
+ endif()
+endif()
+
+if(PYARROW_WITH_HDFS)
+ if(NOT ARROW_HDFS)
+ message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON")
+ endif()
+endif()
+
+# Check for only Arrow C++ options
+if(ARROW_CSV)
+ list(APPEND ARROW_PYTHON_SRCS csv.cc)
+endif()
+
+if(ARROW_FILESYSTEM)
+ list(APPEND ARROW_PYTHON_SRCS filesystem.cc)
+endif()
+
+# Link to arrow dependecies
+if(ARROW_BUILD_SHARED)
+ set(ARROW_PYTHON_DEPENDENCIES arrow_shared)
+else()
+ set(THREADS_PREFER_PTHREAD_FLAG ON)
+ find_package(Threads REQUIRED)
+ set(ARROW_PYTHON_DEPENDENCIES arrow_static Threads::Threads)
+endif()
+
+if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID
STREQUAL "Clang")
+ set_property(SOURCE pyarrow.cc
+ APPEND_STRING
+ PROPERTY COMPILE_FLAGS " -Wno-cast-qual ")
+endif()
+
+#
+# Compiler stuff
+#
+
+include(GNUInstallDirs)
+
+# This ensures that things like gnu++11 get passed correctly
+if(NOT DEFINED CMAKE_CXX_STANDARD)
+ set(CMAKE_CXX_STANDARD 11)
+endif()
+
+# We require a C++11 compliant compiler
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# Needed gdb flags
+include(SetupCxxFlags)
+
+#
+# shred/static link libs
+#
+
+set(ARROW_PYTHON_SHARED_LINK_LIBS arrow_shared)
+set(ARROW_PYTHON_SHARED_PRIVATE_LINK_LIBS)
+set(ARROW_PYTHON_STATIC_LINK_LIBS ${PYTHON_OTHER_LIBS})
+
+if(WIN32)
+ list(APPEND ARROW_PYTHON_SHARED_LINK_LIBS ${PYTHON_LIBRARIES}
${PYTHON_OTHER_LIBS})
+endif()
+
+if(PARQUET_REQUIRE_ENCRYPTION AND PYARROW_WITH_PARQUET_ENCRYPTION)
+ list(APPEND ARROW_PYTHON_SHARED_LINK_LIBS parquet_shared)
+endif()
+
+set(ARROW_PYTHON_INCLUDES ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS})
+
+# Inlude macros needed to find and use add_arrow_lib function
+include(BuildUtils)
+include(CMakePackageConfigHelpers)
+
+# Set the output directory for cmake module
+# (CMAKE_INSTALL_PREFIX = python/build/dist! should be set in setup.py!)
+set(ARROW_CMAKE_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}")
+
+# Changing ARROW_SOURCE_DIR for sdist build
+# In this case cpp/cmake_modules doesn't exist
+if(NOT EXISTS "${ARROW_SOURCE_DIR}/cmake_modules/Find${MODULE}.cmake")
+ set(ARROW_SOURCE_DIR ${PYTHON_SOURCE_DIR})
+endif()
+
+add_arrow_lib(arrow_python
+ CMAKE_PACKAGE_NAME
+ ArrowPython
+ PKG_CONFIG_NAME
+ arrow-python
+ SOURCES
+ ${ARROW_PYTHON_SRCS}
+ PRECOMPILED_HEADERS
+ "$<$<COMPILE_LANGUAGE:CXX>:pch.h>"
+ OUTPUTS
+ ARROW_PYTHON_LIBRARIES
+ DEPENDENCIES
+ ${ARROW_PYTHON_DEPENDENCIES}
+ SHARED_LINK_FLAGS
+ ${ARROW_VERSION_SCRIPT_FLAGS}
+ SHARED_LINK_LIBS
+ ${ARROW_PYTHON_SHARED_LINK_LIBS}
+ SHARED_PRIVATE_LINK_LIBS
+ ${ARROW_PYTHON_SHARED_PRIVATE_LINK_LIBS}
+ STATIC_LINK_LIBS
+ ${ARROW_PYTHON_STATIC_LINK_LIBS}
+ EXTRA_INCLUDES
+ "${ARROW_PYTHON_INCLUDES}")
+
+add_dependencies(arrow_python ${ARROW_PYTHON_LIBRARIES})
+
+foreach(LIB_TARGET ${ARROW_PYTHON_LIBRARIES})
+ target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_PYTHON_EXPORTING)
+endforeach()
+
+if(ARROW_BUILD_STATIC AND MSVC)
+ target_compile_definitions(arrow_python_static PUBLIC ARROW_STATIC)
+endif()
+
+if(ARROW_FLIGHT AND ARROW_BUILD_SHARED)
+ # Must link to shared libarrow_flight: we don't want to link more than one
+ # copy of gRPC into the eventual Cython shared object, otherwise gRPC calls
+ # fail with weird errors due to multiple copies of global static state (The
+ # other solution is to link gRPC shared everywhere instead of statically
only
+ # in Flight)
+ find_package(ArrowFlight REQUIRED)
+ include_directories("${ARROW_CPP_SOURCE_DIR}/src"
"${ARROW_CPP_SOURCE_DIR}/}/${ARROW_BUILD_DIR}/src")
Review Comment:
Could you use `EXTRA_INCLUDES` in the below `add_arrow_lib()` instead of
global `include_directories()`?
BTW, why is this needed? I think `arrow_flight_shared` adds needed include
directories.
##########
python/pyarrow/src_arrow/CMakeLists.txt:
##########
@@ -0,0 +1,447 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#
+# arrow_python
+#
+
+cmake_minimum_required(VERSION 3.5)
+
+# RPATH settings on macOS do not affect install_name.
+# https://cmake.org/cmake/help/latest/policy/CMP0068.html
+if(POLICY CMP0068)
+ cmake_policy(SET CMP0068 NEW)
+endif()
+
+#
+# Define
+# ARROW_SOURCE_DIR: location of arrow/cpp
+# CMAKE_MODULE_PATH: location of cmake_modules in python
+#
+
+get_filename_component(PYARROW_SOURCE_DIR ${CMAKE_SOURCE_DIR} DIRECTORY)
+get_filename_component(PYTHON_SOURCE_DIR ${PYARROW_SOURCE_DIR} DIRECTORY)
+get_filename_component(ARROW_SOURCE ${PYTHON_SOURCE_DIR} DIRECTORY)
+set(ARROW_CPP_SOURCE_DIR "${ARROW_SOURCE}/cpp")
+set(ARROW_SOURCE_DIR "${ARROW_CPP_SOURCE_DIR}")
+
+# normalize ARROW_HOME path
+file(TO_CMAKE_PATH "$ENV{ARROW_HOME}" ARROW_HOME)
+set(CMAKE_MODULE_PATH "${PYTHON_SOURCE_DIR}/cmake_modules"
"${ARROW_HOME}/lib/cmake/arrow")
+
+#
+# Arrow version
+#
+
+set(ARROW_PYTHON_VERSION "9.0.0-SNAPSHOT")
+string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_PYTHON_BASE_VERSION
"${ARROW_PYTHON_VERSION}")
+# Need to set to ARRROW_VERSION before finding Arrow package!
+project(arrow_python VERSION "${ARROW_PYTHON_BASE_VERSION}")
+
+if(NOT DEFINED CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Release)
+endif()
+
+#
+# Arrow
+#
+
+find_package(Arrow REQUIRED)
+include(ArrowOptions)
+
+#
+# Python
+#
+# Use the first Python installation on PATH, not the newest one
+set(Python3_FIND_STRATEGY "LOCATION")
+# On Windows, use registry last, not first
+set(Python3_FIND_REGISTRY "LAST")
+# On macOS, use framework last, not first
+set(Python3_FIND_FRAMEWORK "LAST")
+
+find_package(Python3Alt 3.7 REQUIRED)
+include_directories(SYSTEM ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS} src)
+
+add_custom_target(arrow_python-all)
+add_custom_target(arrow_python)
+add_custom_target(arrow_python-tests)
+add_dependencies(arrow_python-all arrow_python arrow_python-tests)
+
+set(ARROW_PYTHON_SRCS
+ arrow_to_pandas.cc
+ benchmark.cc
+ common.cc
+ datetime.cc
+ decimal.cc
+ deserialize.cc
+ extension_type.cc
+ gdb.cc
+ helpers.cc
+ inference.cc
+ init.cc
+ io.cc
+ ipc.cc
+ numpy_convert.cc
+ numpy_to_arrow.cc
+ python_to_arrow.cc
+ pyarrow.cc
+ serialize.cc
+ udf.cc)
+
+set_source_files_properties(init.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+ SKIP_UNITY_BUILD_INCLUSION ON)
+
+#
+# Arrow vs C PyArrow options
+#
+
+# Check all the options from Arrow and C PyArrow to be in line
+if(PYARROW_WITH_DATASET)
+ find_package(ArrowDataset REQUIRED)
+endif()
+
+if(PYARROW_WITH_PARQUET_ENCRYPTION)
+ if(PARQUET_REQUIRE_ENCRYPTION)
+ list(APPEND ARROW_PYTHON_SRCS parquet_encryption.cc)
+ find_package(Parquet REQUIRED)
+ else()
+ message(FATAL_ERROR "You must build Arrow C++ with
PARQUET_REQUIRE_ENCRYPTION=ON")
+ endif()
+endif()
+
+if(PYARROW_WITH_HDFS)
+ if(NOT ARROW_HDFS)
+ message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON")
+ endif()
+endif()
+
+# Check for only Arrow C++ options
+if(ARROW_CSV)
+ list(APPEND ARROW_PYTHON_SRCS csv.cc)
+endif()
+
+if(ARROW_FILESYSTEM)
+ list(APPEND ARROW_PYTHON_SRCS filesystem.cc)
+endif()
+
+# Link to arrow dependecies
+if(ARROW_BUILD_SHARED)
+ set(ARROW_PYTHON_DEPENDENCIES arrow_shared)
+else()
+ set(THREADS_PREFER_PTHREAD_FLAG ON)
+ find_package(Threads REQUIRED)
+ set(ARROW_PYTHON_DEPENDENCIES arrow_static Threads::Threads)
+endif()
+
+if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID
STREQUAL "Clang")
+ set_property(SOURCE pyarrow.cc
+ APPEND_STRING
+ PROPERTY COMPILE_FLAGS " -Wno-cast-qual ")
+endif()
+
+#
+# Compiler stuff
+#
+
+include(GNUInstallDirs)
+
+# This ensures that things like gnu++11 get passed correctly
+if(NOT DEFINED CMAKE_CXX_STANDARD)
+ set(CMAKE_CXX_STANDARD 11)
+endif()
+
+# We require a C++11 compliant compiler
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# Needed gdb flags
+include(SetupCxxFlags)
+
+#
+# shred/static link libs
+#
+
+set(ARROW_PYTHON_SHARED_LINK_LIBS arrow_shared)
+set(ARROW_PYTHON_SHARED_PRIVATE_LINK_LIBS)
+set(ARROW_PYTHON_STATIC_LINK_LIBS ${PYTHON_OTHER_LIBS})
+
+if(WIN32)
+ list(APPEND ARROW_PYTHON_SHARED_LINK_LIBS ${PYTHON_LIBRARIES}
${PYTHON_OTHER_LIBS})
+endif()
+
+if(PARQUET_REQUIRE_ENCRYPTION AND PYARROW_WITH_PARQUET_ENCRYPTION)
+ list(APPEND ARROW_PYTHON_SHARED_LINK_LIBS parquet_shared)
+endif()
+
+set(ARROW_PYTHON_INCLUDES ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS})
+
+# Inlude macros needed to find and use add_arrow_lib function
+include(BuildUtils)
+include(CMakePackageConfigHelpers)
+
+# Set the output directory for cmake module
+# (CMAKE_INSTALL_PREFIX = python/build/dist! should be set in setup.py!)
+set(ARROW_CMAKE_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}")
+
+# Changing ARROW_SOURCE_DIR for sdist build
+# In this case cpp/cmake_modules doesn't exist
+if(NOT EXISTS "${ARROW_SOURCE_DIR}/cmake_modules/Find${MODULE}.cmake")
+ set(ARROW_SOURCE_DIR ${PYTHON_SOURCE_DIR})
+endif()
+
+add_arrow_lib(arrow_python
+ CMAKE_PACKAGE_NAME
+ ArrowPython
+ PKG_CONFIG_NAME
+ arrow-python
+ SOURCES
+ ${ARROW_PYTHON_SRCS}
+ PRECOMPILED_HEADERS
+ "$<$<COMPILE_LANGUAGE:CXX>:pch.h>"
+ OUTPUTS
+ ARROW_PYTHON_LIBRARIES
+ DEPENDENCIES
+ ${ARROW_PYTHON_DEPENDENCIES}
+ SHARED_LINK_FLAGS
+ ${ARROW_VERSION_SCRIPT_FLAGS}
+ SHARED_LINK_LIBS
+ ${ARROW_PYTHON_SHARED_LINK_LIBS}
+ SHARED_PRIVATE_LINK_LIBS
+ ${ARROW_PYTHON_SHARED_PRIVATE_LINK_LIBS}
+ STATIC_LINK_LIBS
+ ${ARROW_PYTHON_STATIC_LINK_LIBS}
+ EXTRA_INCLUDES
+ "${ARROW_PYTHON_INCLUDES}")
+
+add_dependencies(arrow_python ${ARROW_PYTHON_LIBRARIES})
+
+foreach(LIB_TARGET ${ARROW_PYTHON_LIBRARIES})
+ target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_PYTHON_EXPORTING)
+endforeach()
+
+if(ARROW_BUILD_STATIC AND MSVC)
+ target_compile_definitions(arrow_python_static PUBLIC ARROW_STATIC)
+endif()
+
+if(ARROW_FLIGHT AND ARROW_BUILD_SHARED)
+ # Must link to shared libarrow_flight: we don't want to link more than one
+ # copy of gRPC into the eventual Cython shared object, otherwise gRPC calls
+ # fail with weird errors due to multiple copies of global static state (The
+ # other solution is to link gRPC shared everywhere instead of statically
only
+ # in Flight)
+ find_package(ArrowFlight REQUIRED)
+ include_directories("${ARROW_CPP_SOURCE_DIR}/src"
"${ARROW_CPP_SOURCE_DIR}/}/${ARROW_BUILD_DIR}/src")
+
+ set(FLIGHT_LINK_LIBS arrow_flight_shared)
+
+ add_arrow_lib(arrow_python_flight
+ CMAKE_PACKAGE_NAME
+ ArrowPythonFlight
+ PKG_CONFIG_NAME
+ arrow-python-flight
+ SOURCES
+ flight.cc
+ OUTPUTS
+ ARROW_PYFLIGHT_LIBRARIES
+ SHARED_LINK_FLAGS
+ ${ARROW_VERSION_SCRIPT_FLAGS}
+ SHARED_LINK_LIBS
+ arrow_python_shared
+ arrow_flight_shared
+ STATIC_LINK_LIBS
+ ${PYTHON_OTHER_LIBS}
+ EXTRA_INCLUDES
+ "${ARROW_PYTHON_INCLUDES}"
+ PRIVATE_INCLUDES
+ "${Protobuf_INCLUDE_DIRS}")
+
+ add_dependencies(arrow_python ${ARROW_PYFLIGHT_LIBRARIES})
+
+ foreach(LIB_TARGET ${ARROW_PYFLIGHT_LIBRARIES})
+ target_compile_definitions(${LIB_TARGET} PRIVATE ARROW_PYFLIGHT_EXPORTING)
+ endforeach()
+
+ if(ARROW_BUILD_STATIC AND MSVC)
+ target_compile_definitions(arrow_python_flight_static PUBLIC ARROW_STATIC)
+ endif()
+endif()
+
+if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID
STREQUAL "Clang")
+ # Clang, be quiet. Python C API has lots of macros
+ set_property(SOURCE ${ARROW_PYTHON_SRCS}
+ APPEND_STRING
+ PROPERTY COMPILE_FLAGS -Wno-parentheses-equality)
+endif()
+
+arrow_install_all_headers("arrow/python")
+
+# ----------------------------------------------------------------------
+
+#
+# Tests
+# The tests will be moved to Cython and are currently supported for bundled
GTest
+# Follow-up: https://issues.apache.org/jira/browse/ARROW-17016?filter=-1
Review Comment:
```suggestion
# Follow-up: https://issues.apache.org/jira/browse/ARROW-17016
```
##########
python/setup.py:
##########
@@ -227,6 +228,105 @@ def initialize_options(self):
'_hdfsio',
'gandiva']
+ def _run_cmake_arrow_python(self):
+ # check if build_type is correctly passed / set
+ if self.build_type.lower() not in ('release', 'debug'):
+ raise ValueError("--build-type (or PYARROW_BUILD_TYPE) needs to "
+ "be 'release' or 'debug'")
+
+ # The directory containing this setup.py
+ source = os.path.dirname(os.path.abspath(__file__))
+ # The directory containing this C PyArrow CMakeLists.txt
+ source_cpyarrow = pjoin(source, "pyarrow/src_arrow")
+
+ # The directory for the module being built
+ build_cmd = self.get_finalized_command('build')
+ saved_cwd = os.getcwd()
+ build_temp = pjoin(saved_cwd, 'build', 'dist', 'temp')
+ build_include = pjoin(saved_cwd, 'build', 'dist', 'include')
+ build_lib = pjoin(os.getcwd(), build_cmd.build_lib)
+
+ # The directory containing Arrow C++ build
+ arrow_build_dir = os.environ.get('ARROW_BUILD_DIR', 'build')
+
+ if self.inplace:
+ # a bit hacky
+ build_lib = saved_cwd
+
+ if not os.path.isdir(build_temp):
+ self.mkpath(build_temp)
+ if not os.path.isdir(build_lib):
+ self.mkpath(build_lib)
+ if not os.path.isdir(build_include):
+ self.mkpath(build_include)
+
+ # Change to the build directory
+ with changed_dir(build_temp):
+ # cmake args
+ cmake_options = [
+ '-DCMAKE_INSTALL_PREFIX=' +
+ str(pjoin(saved_cwd, 'build/dist')),
+ '-DCMAKE_BUILD_TYPE={0}'.format(self.build_type.lower()),
+ '-DARROW_BUILD_DIR=' + str(arrow_build_dir),
+ '-DPYTHON_EXECUTABLE=%s' % sys.executable,
+ '-DPython3_EXECUTABLE=%s' % sys.executable,
+ ]
+
+ # Check for specific options
+ def append_cmake_bool(value, varname):
+ cmake_options.append('-D{0}={1}'.format(
+ varname, 'on' if value else 'off'))
+
+ append_cmake_bool(self.with_dataset, 'PYARROW_WITH_DATASET')
+ append_cmake_bool(self.with_parquet_encryption,
+ 'PYARROW_WITH_PARQUET_ENCRYPTION')
+ append_cmake_bool(self.with_hdfs,
+ 'PYARROW_WITH_HDFS')
+
+ # Windows
+ if self.cmake_generator:
+ cmake_options += ['-G', self.cmake_generator]
+
+ # build args
+ build_tool_args = []
+ if os.environ.get('PYARROW_PARALLEL'):
+ build_tool_args.append('--')
+ build_tool_args.append(
+ '-j{0}'.format(os.environ['PYARROW_PARALLEL']))
+
+ # run cmake
+ print("-- Running cmake for arrow python")
+ self.spawn(['cmake'] + cmake_options + [source_cpyarrow])
+ print("-- Finished cmake for arrow python")
+ # run make & install
+ print("-- Running make build and install for arrow python")
Review Comment:
```suggestion
print("-- Running cmake build and install for arrow python")
```
##########
docs/source/developers/python.rst:
##########
@@ -131,6 +131,29 @@ for ``.py`` files or
for ``.pyx`` and ``.pxi`` files. In this case you will also need to
install the `pytest-cython <https://github.com/lgpage/pytest-cython>`_ plugin.
+Testing Arrow Python (C++ code)
+-------------------------------
+If you want to run ctest for the tests that are included in the Arrow Python
+module, you will need to build Arrow with ``DARROW_BUILD_TESTS=ON``.
+
+.. note::
+
+ Currently building the C++ unit tests does not work with googletest
+ from conda-forge, so we must use the ``BUNDLED`` source for building that
+ dependency
+
+ In case you use conda add ``-DGTest_SOURCE=BUNDLED`` to the cmake flags
Review Comment:
```suggestion
In case you use conda add ``-DGTest_SOURCE=BUNDLED`` to the CMake flags
```
##########
python/pyarrow/src_arrow/CMakeLists.txt:
##########
@@ -0,0 +1,447 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#
+# arrow_python
+#
+
+cmake_minimum_required(VERSION 3.5)
+
+# RPATH settings on macOS do not affect install_name.
+# https://cmake.org/cmake/help/latest/policy/CMP0068.html
+if(POLICY CMP0068)
+ cmake_policy(SET CMP0068 NEW)
+endif()
+
+#
+# Define
+# ARROW_SOURCE_DIR: location of arrow/cpp
+# CMAKE_MODULE_PATH: location of cmake_modules in python
+#
+
+get_filename_component(PYARROW_SOURCE_DIR ${CMAKE_SOURCE_DIR} DIRECTORY)
+get_filename_component(PYTHON_SOURCE_DIR ${PYARROW_SOURCE_DIR} DIRECTORY)
+get_filename_component(ARROW_SOURCE ${PYTHON_SOURCE_DIR} DIRECTORY)
+set(ARROW_CPP_SOURCE_DIR "${ARROW_SOURCE}/cpp")
Review Comment:
```suggestion
get_filename_component(ARROW_SOURCE_DIR ${PYTHON_SOURCE_DIR} DIRECTORY)
set(ARROW_CPP_SOURCE_DIR "${ARROW_SOURCE_DIR}/cpp")
```
##########
python/pyarrow/src_arrow/CMakeLists.txt:
##########
@@ -0,0 +1,447 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#
+# arrow_python
+#
+
+cmake_minimum_required(VERSION 3.5)
+
+# RPATH settings on macOS do not affect install_name.
+# https://cmake.org/cmake/help/latest/policy/CMP0068.html
+if(POLICY CMP0068)
+ cmake_policy(SET CMP0068 NEW)
+endif()
+
+#
+# Define
+# ARROW_SOURCE_DIR: location of arrow/cpp
+# CMAKE_MODULE_PATH: location of cmake_modules in python
+#
+
+get_filename_component(PYARROW_SOURCE_DIR ${CMAKE_SOURCE_DIR} DIRECTORY)
+get_filename_component(PYTHON_SOURCE_DIR ${PYARROW_SOURCE_DIR} DIRECTORY)
+get_filename_component(ARROW_SOURCE ${PYTHON_SOURCE_DIR} DIRECTORY)
+set(ARROW_CPP_SOURCE_DIR "${ARROW_SOURCE}/cpp")
+set(ARROW_SOURCE_DIR "${ARROW_CPP_SOURCE_DIR}")
+
+# normalize ARROW_HOME path
+file(TO_CMAKE_PATH "$ENV{ARROW_HOME}" ARROW_HOME)
+set(CMAKE_MODULE_PATH "${PYTHON_SOURCE_DIR}/cmake_modules"
"${ARROW_HOME}/lib/cmake/arrow")
+
+#
+# Arrow version
+#
+
+set(ARROW_PYTHON_VERSION "9.0.0-SNAPSHOT")
+string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_PYTHON_BASE_VERSION
"${ARROW_PYTHON_VERSION}")
+# Need to set to ARRROW_VERSION before finding Arrow package!
+project(arrow_python VERSION "${ARROW_PYTHON_BASE_VERSION}")
+
+if(NOT DEFINED CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Release)
+endif()
+
+#
+# Arrow
+#
+
+find_package(Arrow REQUIRED)
+include(ArrowOptions)
+
+#
+# Python
+#
+# Use the first Python installation on PATH, not the newest one
+set(Python3_FIND_STRATEGY "LOCATION")
+# On Windows, use registry last, not first
+set(Python3_FIND_REGISTRY "LAST")
+# On macOS, use framework last, not first
+set(Python3_FIND_FRAMEWORK "LAST")
Review Comment:
Why do we need to set `Python3_FIND_*` variables? I think that they should
be specified by users if they need.
##########
python/pyarrow/src_arrow/CMakeLists.txt:
##########
@@ -0,0 +1,447 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#
+# arrow_python
+#
+
+cmake_minimum_required(VERSION 3.5)
+
+# RPATH settings on macOS do not affect install_name.
+# https://cmake.org/cmake/help/latest/policy/CMP0068.html
+if(POLICY CMP0068)
+ cmake_policy(SET CMP0068 NEW)
+endif()
+
+#
+# Define
+# ARROW_SOURCE_DIR: location of arrow/cpp
+# CMAKE_MODULE_PATH: location of cmake_modules in python
+#
+
+get_filename_component(PYARROW_SOURCE_DIR ${CMAKE_SOURCE_DIR} DIRECTORY)
+get_filename_component(PYTHON_SOURCE_DIR ${PYARROW_SOURCE_DIR} DIRECTORY)
+get_filename_component(ARROW_SOURCE ${PYTHON_SOURCE_DIR} DIRECTORY)
+set(ARROW_CPP_SOURCE_DIR "${ARROW_SOURCE}/cpp")
+set(ARROW_SOURCE_DIR "${ARROW_CPP_SOURCE_DIR}")
+
+# normalize ARROW_HOME path
+file(TO_CMAKE_PATH "$ENV{ARROW_HOME}" ARROW_HOME)
+set(CMAKE_MODULE_PATH "${PYTHON_SOURCE_DIR}/cmake_modules"
"${ARROW_HOME}/lib/cmake/arrow")
+
+#
+# Arrow version
+#
+
+set(ARROW_PYTHON_VERSION "9.0.0-SNAPSHOT")
+string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_PYTHON_BASE_VERSION
"${ARROW_PYTHON_VERSION}")
+# Need to set to ARRROW_VERSION before finding Arrow package!
+project(arrow_python VERSION "${ARROW_PYTHON_BASE_VERSION}")
+
+if(NOT DEFINED CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Release)
+endif()
+
+#
+# Arrow
+#
+
+find_package(Arrow REQUIRED)
+include(ArrowOptions)
+
+#
+# Python
+#
+# Use the first Python installation on PATH, not the newest one
+set(Python3_FIND_STRATEGY "LOCATION")
+# On Windows, use registry last, not first
+set(Python3_FIND_REGISTRY "LAST")
+# On macOS, use framework last, not first
+set(Python3_FIND_FRAMEWORK "LAST")
+
+find_package(Python3Alt 3.7 REQUIRED)
+include_directories(SYSTEM ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS} src)
+
+add_custom_target(arrow_python-all)
+add_custom_target(arrow_python)
+add_custom_target(arrow_python-tests)
+add_dependencies(arrow_python-all arrow_python arrow_python-tests)
+
+set(ARROW_PYTHON_SRCS
+ arrow_to_pandas.cc
+ benchmark.cc
+ common.cc
+ datetime.cc
+ decimal.cc
+ deserialize.cc
+ extension_type.cc
+ gdb.cc
+ helpers.cc
+ inference.cc
+ init.cc
+ io.cc
+ ipc.cc
+ numpy_convert.cc
+ numpy_to_arrow.cc
+ python_to_arrow.cc
+ pyarrow.cc
+ serialize.cc
+ udf.cc)
+
+set_source_files_properties(init.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+ SKIP_UNITY_BUILD_INCLUSION ON)
+
+#
+# Arrow vs C PyArrow options
+#
+
+# Check all the options from Arrow and C PyArrow to be in line
Review Comment:
```suggestion
# Check all the options from Arrow and ArrowPython to be in line
```
##########
python/pyarrow/src_arrow/CMakeLists.txt:
##########
@@ -0,0 +1,447 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#
+# arrow_python
+#
+
+cmake_minimum_required(VERSION 3.5)
+
+# RPATH settings on macOS do not affect install_name.
+# https://cmake.org/cmake/help/latest/policy/CMP0068.html
+if(POLICY CMP0068)
+ cmake_policy(SET CMP0068 NEW)
+endif()
+
+#
+# Define
+# ARROW_SOURCE_DIR: location of arrow/cpp
+# CMAKE_MODULE_PATH: location of cmake_modules in python
+#
+
+get_filename_component(PYARROW_SOURCE_DIR ${CMAKE_SOURCE_DIR} DIRECTORY)
+get_filename_component(PYTHON_SOURCE_DIR ${PYARROW_SOURCE_DIR} DIRECTORY)
+get_filename_component(ARROW_SOURCE ${PYTHON_SOURCE_DIR} DIRECTORY)
+set(ARROW_CPP_SOURCE_DIR "${ARROW_SOURCE}/cpp")
+set(ARROW_SOURCE_DIR "${ARROW_CPP_SOURCE_DIR}")
+
+# normalize ARROW_HOME path
+file(TO_CMAKE_PATH "$ENV{ARROW_HOME}" ARROW_HOME)
+set(CMAKE_MODULE_PATH "${PYTHON_SOURCE_DIR}/cmake_modules"
"${ARROW_HOME}/lib/cmake/arrow")
+
+#
+# Arrow version
+#
+
+set(ARROW_PYTHON_VERSION "9.0.0-SNAPSHOT")
+string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_PYTHON_BASE_VERSION
"${ARROW_PYTHON_VERSION}")
+# Need to set to ARRROW_VERSION before finding Arrow package!
+project(arrow_python VERSION "${ARROW_PYTHON_BASE_VERSION}")
+
+if(NOT DEFINED CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Release)
+endif()
+
+#
+# Arrow
+#
+
+find_package(Arrow REQUIRED)
+include(ArrowOptions)
+
+#
+# Python
+#
+# Use the first Python installation on PATH, not the newest one
+set(Python3_FIND_STRATEGY "LOCATION")
+# On Windows, use registry last, not first
+set(Python3_FIND_REGISTRY "LAST")
+# On macOS, use framework last, not first
+set(Python3_FIND_FRAMEWORK "LAST")
+
+find_package(Python3Alt 3.7 REQUIRED)
+include_directories(SYSTEM ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS} src)
Review Comment:
Can we remove this because we have `ARROW_PYTHON_INCLUDES`?
##########
python/pyarrow/tests/test_gdb.py:
##########
@@ -154,7 +154,7 @@ def select_frame(self, func_name):
# but it's not available on old GDB versions (such as 8.1.1),
# so instead parse the stack trace for a matching frame number.
out = self.run_command("info stack")
- pat = r"(?mi)^#(\d+)\s+.* in " + re.escape(func_name) + " "
+ pat = r"(?mi)^#(\d+)\s+.* in " + re.escape(func_name)
Review Comment:
Why is this change needed?
##########
python/setup.py:
##########
@@ -227,6 +228,105 @@ def initialize_options(self):
'_hdfsio',
'gandiva']
+ def _run_cmake_arrow_python(self):
+ # check if build_type is correctly passed / set
+ if self.build_type.lower() not in ('release', 'debug'):
+ raise ValueError("--build-type (or PYARROW_BUILD_TYPE) needs to "
+ "be 'release' or 'debug'")
+
+ # The directory containing this setup.py
+ source = os.path.dirname(os.path.abspath(__file__))
+ # The directory containing this C PyArrow CMakeLists.txt
+ source_cpyarrow = pjoin(source, "pyarrow/src_arrow")
+
+ # The directory for the module being built
+ build_cmd = self.get_finalized_command('build')
+ saved_cwd = os.getcwd()
+ build_temp = pjoin(saved_cwd, 'build', 'dist', 'temp')
+ build_include = pjoin(saved_cwd, 'build', 'dist', 'include')
+ build_lib = pjoin(os.getcwd(), build_cmd.build_lib)
+
+ # The directory containing Arrow C++ build
+ arrow_build_dir = os.environ.get('ARROW_BUILD_DIR', 'build')
+
+ if self.inplace:
+ # a bit hacky
+ build_lib = saved_cwd
+
+ if not os.path.isdir(build_temp):
+ self.mkpath(build_temp)
+ if not os.path.isdir(build_lib):
+ self.mkpath(build_lib)
+ if not os.path.isdir(build_include):
+ self.mkpath(build_include)
+
+ # Change to the build directory
+ with changed_dir(build_temp):
+ # cmake args
+ cmake_options = [
+ '-DCMAKE_INSTALL_PREFIX=' +
+ str(pjoin(saved_cwd, 'build/dist')),
+ '-DCMAKE_BUILD_TYPE={0}'.format(self.build_type.lower()),
+ '-DARROW_BUILD_DIR=' + str(arrow_build_dir),
+ '-DPYTHON_EXECUTABLE=%s' % sys.executable,
+ '-DPython3_EXECUTABLE=%s' % sys.executable,
+ ]
+
+ # Check for specific options
+ def append_cmake_bool(value, varname):
+ cmake_options.append('-D{0}={1}'.format(
+ varname, 'on' if value else 'off'))
+
+ append_cmake_bool(self.with_dataset, 'PYARROW_WITH_DATASET')
+ append_cmake_bool(self.with_parquet_encryption,
+ 'PYARROW_WITH_PARQUET_ENCRYPTION')
+ append_cmake_bool(self.with_hdfs,
+ 'PYARROW_WITH_HDFS')
+
+ # Windows
+ if self.cmake_generator:
+ cmake_options += ['-G', self.cmake_generator]
+
+ # build args
+ build_tool_args = []
+ if os.environ.get('PYARROW_PARALLEL'):
+ build_tool_args.append('--')
+ build_tool_args.append(
+ '-j{0}'.format(os.environ['PYARROW_PARALLEL']))
+
+ # run cmake
+ print("-- Running cmake for arrow python")
+ self.spawn(['cmake'] + cmake_options + [source_cpyarrow])
+ print("-- Finished cmake for arrow python")
+ # run make & install
+ print("-- Running make build and install for arrow python")
+ self.spawn(['cmake', '--build', '.', '--target', 'install'] +
+ build_tool_args)
+ print("-- Finished make build and install for arrow python")
Review Comment:
```suggestion
print("-- Finished cmake build and install for arrow python")
```
##########
python/pyarrow/src_arrow/CMakeLists.txt:
##########
@@ -0,0 +1,447 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#
+# arrow_python
+#
+
+cmake_minimum_required(VERSION 3.5)
+
+# RPATH settings on macOS do not affect install_name.
+# https://cmake.org/cmake/help/latest/policy/CMP0068.html
+if(POLICY CMP0068)
+ cmake_policy(SET CMP0068 NEW)
+endif()
+
+#
+# Define
+# ARROW_SOURCE_DIR: location of arrow/cpp
+# CMAKE_MODULE_PATH: location of cmake_modules in python
+#
+
+get_filename_component(PYARROW_SOURCE_DIR ${CMAKE_SOURCE_DIR} DIRECTORY)
+get_filename_component(PYTHON_SOURCE_DIR ${PYARROW_SOURCE_DIR} DIRECTORY)
+get_filename_component(ARROW_SOURCE ${PYTHON_SOURCE_DIR} DIRECTORY)
+set(ARROW_CPP_SOURCE_DIR "${ARROW_SOURCE}/cpp")
+set(ARROW_SOURCE_DIR "${ARROW_CPP_SOURCE_DIR}")
+
+# normalize ARROW_HOME path
+file(TO_CMAKE_PATH "$ENV{ARROW_HOME}" ARROW_HOME)
+set(CMAKE_MODULE_PATH "${PYTHON_SOURCE_DIR}/cmake_modules"
"${ARROW_HOME}/lib/cmake/arrow")
+
+#
+# Arrow version
+#
+
+set(ARROW_PYTHON_VERSION "9.0.0-SNAPSHOT")
+string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_PYTHON_BASE_VERSION
"${ARROW_PYTHON_VERSION}")
+# Need to set to ARRROW_VERSION before finding Arrow package!
+project(arrow_python VERSION "${ARROW_PYTHON_BASE_VERSION}")
+
+if(NOT DEFINED CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Release)
+endif()
+
+#
+# Arrow
+#
+
+find_package(Arrow REQUIRED)
+include(ArrowOptions)
+
+#
+# Python
+#
+# Use the first Python installation on PATH, not the newest one
+set(Python3_FIND_STRATEGY "LOCATION")
+# On Windows, use registry last, not first
+set(Python3_FIND_REGISTRY "LAST")
+# On macOS, use framework last, not first
+set(Python3_FIND_FRAMEWORK "LAST")
+
+find_package(Python3Alt 3.7 REQUIRED)
+include_directories(SYSTEM ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS} src)
+
+add_custom_target(arrow_python-all)
+add_custom_target(arrow_python)
+add_custom_target(arrow_python-tests)
+add_dependencies(arrow_python-all arrow_python arrow_python-tests)
+
+set(ARROW_PYTHON_SRCS
+ arrow_to_pandas.cc
+ benchmark.cc
+ common.cc
+ datetime.cc
+ decimal.cc
+ deserialize.cc
+ extension_type.cc
+ gdb.cc
+ helpers.cc
+ inference.cc
+ init.cc
+ io.cc
+ ipc.cc
+ numpy_convert.cc
+ numpy_to_arrow.cc
+ python_to_arrow.cc
+ pyarrow.cc
+ serialize.cc
+ udf.cc)
+
+set_source_files_properties(init.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+ SKIP_UNITY_BUILD_INCLUSION ON)
+
+#
+# Arrow vs C PyArrow options
Review Comment:
```suggestion
# Arrow vs ArrowPython options
```
##########
python/setup.py:
##########
@@ -227,6 +228,105 @@ def initialize_options(self):
'_hdfsio',
'gandiva']
+ def _run_cmake_arrow_python(self):
+ # check if build_type is correctly passed / set
+ if self.build_type.lower() not in ('release', 'debug'):
+ raise ValueError("--build-type (or PYARROW_BUILD_TYPE) needs to "
+ "be 'release' or 'debug'")
+
+ # The directory containing this setup.py
+ source = os.path.dirname(os.path.abspath(__file__))
+ # The directory containing this C PyArrow CMakeLists.txt
Review Comment:
```suggestion
# The directory containing this ArrowPython CMakeLists.txt
```
##########
python/pyarrow/src_arrow/CMakeLists.txt:
##########
@@ -0,0 +1,447 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#
+# arrow_python
+#
+
+cmake_minimum_required(VERSION 3.5)
+
+# RPATH settings on macOS do not affect install_name.
+# https://cmake.org/cmake/help/latest/policy/CMP0068.html
+if(POLICY CMP0068)
+ cmake_policy(SET CMP0068 NEW)
+endif()
+
+#
+# Define
+# ARROW_SOURCE_DIR: location of arrow/cpp
+# CMAKE_MODULE_PATH: location of cmake_modules in python
+#
+
+get_filename_component(PYARROW_SOURCE_DIR ${CMAKE_SOURCE_DIR} DIRECTORY)
+get_filename_component(PYTHON_SOURCE_DIR ${PYARROW_SOURCE_DIR} DIRECTORY)
+get_filename_component(ARROW_SOURCE ${PYTHON_SOURCE_DIR} DIRECTORY)
+set(ARROW_CPP_SOURCE_DIR "${ARROW_SOURCE}/cpp")
+set(ARROW_SOURCE_DIR "${ARROW_CPP_SOURCE_DIR}")
Review Comment:
Why do we need this alias?
##########
python/pyarrow/src_arrow/CMakeLists.txt:
##########
@@ -0,0 +1,447 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#
+# arrow_python
+#
+
+cmake_minimum_required(VERSION 3.5)
+
+# RPATH settings on macOS do not affect install_name.
+# https://cmake.org/cmake/help/latest/policy/CMP0068.html
+if(POLICY CMP0068)
+ cmake_policy(SET CMP0068 NEW)
+endif()
+
+#
+# Define
+# ARROW_SOURCE_DIR: location of arrow/cpp
+# CMAKE_MODULE_PATH: location of cmake_modules in python
+#
+
+get_filename_component(PYARROW_SOURCE_DIR ${CMAKE_SOURCE_DIR} DIRECTORY)
+get_filename_component(PYTHON_SOURCE_DIR ${PYARROW_SOURCE_DIR} DIRECTORY)
Review Comment:
How about `ARROW_PYTHON_SOURCE_DIR`?
```suggestion
get_filename_component(ARROW_PYTHON_SOURCE_DIR ${CMAKE_SOURCE_DIR} DIRECTORY)
get_filename_component(PYTHON_SOURCE_DIR ${ARROW_PYTHON_SOURCE_DIR}
DIRECTORY)
```
##########
python/pyarrow/src_arrow/CMakeLists.txt:
##########
@@ -0,0 +1,447 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#
+# arrow_python
+#
+
+cmake_minimum_required(VERSION 3.5)
+
+# RPATH settings on macOS do not affect install_name.
+# https://cmake.org/cmake/help/latest/policy/CMP0068.html
+if(POLICY CMP0068)
+ cmake_policy(SET CMP0068 NEW)
+endif()
+
+#
+# Define
+# ARROW_SOURCE_DIR: location of arrow/cpp
+# CMAKE_MODULE_PATH: location of cmake_modules in python
+#
+
+get_filename_component(PYARROW_SOURCE_DIR ${CMAKE_SOURCE_DIR} DIRECTORY)
+get_filename_component(PYTHON_SOURCE_DIR ${PYARROW_SOURCE_DIR} DIRECTORY)
+get_filename_component(ARROW_SOURCE ${PYTHON_SOURCE_DIR} DIRECTORY)
+set(ARROW_CPP_SOURCE_DIR "${ARROW_SOURCE}/cpp")
+set(ARROW_SOURCE_DIR "${ARROW_CPP_SOURCE_DIR}")
+
+# normalize ARROW_HOME path
+file(TO_CMAKE_PATH "$ENV{ARROW_HOME}" ARROW_HOME)
+set(CMAKE_MODULE_PATH "${PYTHON_SOURCE_DIR}/cmake_modules"
"${ARROW_HOME}/lib/cmake/arrow")
+
+#
+# Arrow version
+#
+
+set(ARROW_PYTHON_VERSION "9.0.0-SNAPSHOT")
+string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" ARROW_PYTHON_BASE_VERSION
"${ARROW_PYTHON_VERSION}")
+# Need to set to ARRROW_VERSION before finding Arrow package!
+project(arrow_python VERSION "${ARROW_PYTHON_BASE_VERSION}")
+
+if(NOT DEFINED CMAKE_BUILD_TYPE)
+ set(CMAKE_BUILD_TYPE Release)
+endif()
+
+#
+# Arrow
+#
+
+find_package(Arrow REQUIRED)
+include(ArrowOptions)
+
+#
+# Python
+#
+# Use the first Python installation on PATH, not the newest one
+set(Python3_FIND_STRATEGY "LOCATION")
+# On Windows, use registry last, not first
+set(Python3_FIND_REGISTRY "LAST")
+# On macOS, use framework last, not first
+set(Python3_FIND_FRAMEWORK "LAST")
+
+find_package(Python3Alt 3.7 REQUIRED)
+include_directories(SYSTEM ${NUMPY_INCLUDE_DIRS} ${PYTHON_INCLUDE_DIRS} src)
+
+add_custom_target(arrow_python-all)
+add_custom_target(arrow_python)
+add_custom_target(arrow_python-tests)
+add_dependencies(arrow_python-all arrow_python arrow_python-tests)
+
+set(ARROW_PYTHON_SRCS
+ arrow_to_pandas.cc
+ benchmark.cc
+ common.cc
+ datetime.cc
+ decimal.cc
+ deserialize.cc
+ extension_type.cc
+ gdb.cc
+ helpers.cc
+ inference.cc
+ init.cc
+ io.cc
+ ipc.cc
+ numpy_convert.cc
+ numpy_to_arrow.cc
+ python_to_arrow.cc
+ pyarrow.cc
+ serialize.cc
+ udf.cc)
+
+set_source_files_properties(init.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON
+ SKIP_UNITY_BUILD_INCLUSION ON)
+
+#
+# Arrow vs C PyArrow options
+#
+
+# Check all the options from Arrow and C PyArrow to be in line
+if(PYARROW_WITH_DATASET)
+ find_package(ArrowDataset REQUIRED)
+endif()
+
+if(PYARROW_WITH_PARQUET_ENCRYPTION)
+ if(PARQUET_REQUIRE_ENCRYPTION)
+ list(APPEND ARROW_PYTHON_SRCS parquet_encryption.cc)
+ find_package(Parquet REQUIRED)
+ else()
+ message(FATAL_ERROR "You must build Arrow C++ with
PARQUET_REQUIRE_ENCRYPTION=ON")
+ endif()
+endif()
+
+if(PYARROW_WITH_HDFS)
+ if(NOT ARROW_HDFS)
+ message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON")
+ endif()
+endif()
+
+# Check for only Arrow C++ options
+if(ARROW_CSV)
+ list(APPEND ARROW_PYTHON_SRCS csv.cc)
+endif()
+
+if(ARROW_FILESYSTEM)
+ list(APPEND ARROW_PYTHON_SRCS filesystem.cc)
+endif()
+
+# Link to arrow dependecies
+if(ARROW_BUILD_SHARED)
+ set(ARROW_PYTHON_DEPENDENCIES arrow_shared)
+else()
+ set(THREADS_PREFER_PTHREAD_FLAG ON)
+ find_package(Threads REQUIRED)
+ set(ARROW_PYTHON_DEPENDENCIES arrow_static Threads::Threads)
+endif()
+
+if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR CMAKE_CXX_COMPILER_ID
STREQUAL "Clang")
+ set_property(SOURCE pyarrow.cc
+ APPEND_STRING
+ PROPERTY COMPILE_FLAGS " -Wno-cast-qual ")
+endif()
+
+#
+# Compiler stuff
+#
+
+include(GNUInstallDirs)
+
+# This ensures that things like gnu++11 get passed correctly
+if(NOT DEFINED CMAKE_CXX_STANDARD)
+ set(CMAKE_CXX_STANDARD 11)
+endif()
+
+# We require a C++11 compliant compiler
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# Needed gdb flags
+include(SetupCxxFlags)
+
+#
+# shred/static link libs
+#
+
+set(ARROW_PYTHON_SHARED_LINK_LIBS arrow_shared)
+set(ARROW_PYTHON_SHARED_PRIVATE_LINK_LIBS)
+set(ARROW_PYTHON_STATIC_LINK_LIBS ${PYTHON_OTHER_LIBS})
+
+if(WIN32)
+ list(APPEND ARROW_PYTHON_SHARED_LINK_LIBS ${PYTHON_LIBRARIES}
${PYTHON_OTHER_LIBS})
+endif()
+
+if(PARQUET_REQUIRE_ENCRYPTION AND PYARROW_WITH_PARQUET_ENCRYPTION)
Review Comment:
It seems that `PARQUET_REQUIRE_ENCRYPTION` is redundant:
```suggestion
if(PYARROW_WITH_PARQUET_ENCRYPTION)
```
##########
ci/scripts/python_test.sh:
##########
@@ -54,4 +55,11 @@ export PYARROW_TEST_ORC
export PYARROW_TEST_PARQUET
export PYARROW_TEST_S3
+# Testing Arrow Python
+if [ "${ARROW_BUILD_TESTS}" == "ON" ]; then
+ pushd ${test_dir}
+ ctest
Review Comment:
```suggestion
ctest --output-on-failure --timeout 300
```
I want to add `--parallel ${n_jobs}` too like `ci/scripts/cpp_test.sh`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]