http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/python/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index ef874e3..35a1a89 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -47,9 +47,6 @@ endif() # Top level cmake dir if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") - option(PYARROW_BUILD_TESTS - "Build the PyArrow C++ googletest unit tests" - OFF) option(PYARROW_BUILD_PARQUET "Build the PyArrow Parquet integration" OFF) @@ -57,7 +54,7 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") "Build the PyArrow jemalloc integration" OFF) option(PYARROW_BUNDLE_ARROW_CPP - "Bundle the Arrow C++ libraries" + "Bundle the Arrow C++ libraries" OFF) endif() @@ -75,6 +72,8 @@ endif(CCACHE_FOUND) # Compiler flags ############################################################ +include(BuildUtils) +include(CompilerInfo) include(SetupCxxFlags) # Add common flags @@ -86,8 +85,6 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-omit-frame-pointer") # Suppress Cython warnings set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-variable") -# Determine compiler version -include(CompilerInfo) if ("${COMPILER_FAMILY}" STREQUAL "clang") # Using Clang with ccache causes a bunch of spurious warnings that are @@ -216,115 +213,8 @@ include_directories(SYSTEM src) ############################################################ -# Testing -############################################################ - -# Add a new test case, with or without an executable that should be built. -# -# REL_TEST_NAME is the name of the test. It may be a single component -# (e.g. monotime-test) or contain additional components (e.g. -# net/net_util-test). Either way, the last component must be a globally -# unique name. -# -# Arguments after the test name will be passed to set_tests_properties(). -function(ADD_PYARROW_TEST REL_TEST_NAME) - if(NO_TESTS) - return() - endif() - get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE) - - if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${REL_TEST_NAME}.cc) - # This test has a corresponding .cc file, set it up as an executable. - set(TEST_PATH "${EXECUTABLE_OUTPUT_PATH}/${TEST_NAME}") - add_executable(${TEST_NAME} "${REL_TEST_NAME}.cc") - target_link_libraries(${TEST_NAME} ${PYARROW_TEST_LINK_LIBS}) - else() - # No executable, just invoke the test (probably a script) directly. - set(TEST_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${REL_TEST_NAME}) - endif() - - add_test(${TEST_NAME} - ${BUILD_SUPPORT_DIR}/run-test.sh ${TEST_PATH}) - if(ARGN) - set_tests_properties(${TEST_NAME} PROPERTIES ${ARGN}) - endif() -endfunction() - -# A wrapper for add_dependencies() that is compatible with NO_TESTS. -function(ADD_PYARROW_TEST_DEPENDENCIES REL_TEST_NAME) - if(NO_TESTS) - return() - endif() - get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE) - - add_dependencies(${TEST_NAME} ${ARGN}) -endfunction() - -enable_testing() - -############################################################ # Dependencies ############################################################ -function(ADD_THIRDPARTY_LIB LIB_NAME) - set(options) - set(one_value_args SHARED_LIB STATIC_LIB) - set(multi_value_args DEPS) - cmake_parse_arguments(ARG "${options}" "${one_value_args}" "${multi_value_args}" ${ARGN}) - if(ARG_UNPARSED_ARGUMENTS) - message(SEND_ERROR "Error: unrecognized arguments: ${ARG_UNPARSED_ARGUMENTS}") - endif() - - if(("${PYARROW_LINK}" STREQUAL "s" AND ARG_STATIC_LIB) OR (NOT ARG_SHARED_LIB)) - if(NOT ARG_STATIC_LIB) - message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}") - endif() - add_library(${LIB_NAME} STATIC IMPORTED) - set_target_properties(${LIB_NAME} - PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}") - message(STATUS "Added static library dependency ${LIB_NAME}: ${ARG_STATIC_LIB}") - else() - add_library(${LIB_NAME} SHARED IMPORTED) - set_target_properties(${LIB_NAME} - PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}") - message(STATUS "Added shared library dependency ${LIB_NAME}: ${ARG_SHARED_LIB}") - endif() - - if(ARG_DEPS) - set_target_properties(${LIB_NAME} - PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${ARG_DEPS}") - endif() - - # Set up an "exported variant" for this thirdparty library (see "Visibility" - # above). It's the same as the real target, just with an "_exported" suffix. - # We prefer the static archive if it exists (as it's akin to an "internal" - # library), but we'll settle for the shared object if we must. - # - # A shared object exported variant will force any "leaf" library that - # transitively depends on it to also depend on it at runtime; this is - # desirable for some libraries (e.g. cyrus_sasl). - set(LIB_NAME_EXPORTED ${LIB_NAME}_exported) - if(ARG_STATIC_LIB) - add_library(${LIB_NAME_EXPORTED} STATIC IMPORTED) - set_target_properties(${LIB_NAME_EXPORTED} - PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}") - else() - add_library(${LIB_NAME_EXPORTED} SHARED IMPORTED) - set_target_properties(${LIB_NAME_EXPORTED} - PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}") - endif() - if(ARG_DEPS) - set_target_properties(${LIB_NAME_EXPORTED} - PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${ARG_DEPS}") - endif() -endfunction() - -## GMock -if (PYARROW_BUILD_TESTS) - find_package(GTest REQUIRED) - include_directories(SYSTEM ${GTEST_INCLUDE_DIR}) - ADD_THIRDPARTY_LIB(gtest - STATIC_LIB ${GTEST_STATIC_LIB}) -endif() ## Parquet find_package(Parquet) @@ -352,6 +242,8 @@ if (PYARROW_BUNDLE_ARROW_CPP) COPYONLY) SET(ARROW_IPC_SHARED_LIB ${BUILD_OUTPUT_ROOT_DIRECTORY}/libarrow_ipc${CMAKE_SHARED_LIBRARY_SUFFIX}) + SET(ARROW_PYTHON_SHARED_LIB + ${BUILD_OUTPUT_ROOT_DIRECTORY}/libarrow_python${CMAKE_SHARED_LIBRARY_SUFFIX}) endif() ADD_THIRDPARTY_LIB(arrow @@ -360,66 +252,8 @@ ADD_THIRDPARTY_LIB(arrow_io SHARED_LIB ${ARROW_IO_SHARED_LIB}) ADD_THIRDPARTY_LIB(arrow_ipc SHARED_LIB ${ARROW_IPC_SHARED_LIB}) - -############################################################ -# Linker setup -############################################################ - -set(PYARROW_MIN_TEST_LIBS - pyarrow_test_main - pyarrow) - -set(PYARROW_MIN_TEST_LIBS - pyarrow_test_main - pyarrow - ${PYARROW_BASE_LIBS}) - -if(NOT APPLE AND PYARROW_BUILD_TESTS) - ADD_THIRDPARTY_LIB(python - SHARED_LIB "${PYTHON_LIBRARIES}") - list(APPEND PYARROW_MIN_TEST_LIBS python) -endif() - -set(PYARROW_TEST_LINK_LIBS ${PYARROW_MIN_TEST_LIBS}) - -############################################################ -# "make ctags" target -############################################################ -if (UNIX) - add_custom_target(ctags ctags -R --languages=c++,c --exclude=thirdparty/installed) -endif (UNIX) - -############################################################ -# "make etags" target -############################################################ -if (UNIX) - add_custom_target(tags etags --members --declarations - `find ${CMAKE_CURRENT_SOURCE_DIR}/src - -name \\*.cc -or -name \\*.hh -or -name \\*.cpp -or -name \\*.h -or -name \\*.c -or - -name \\*.f`) - add_custom_target(etags DEPENDS tags) -endif (UNIX) - -############################################################ -# "make cscope" target -############################################################ -if (UNIX) - add_custom_target(cscope find ${CMAKE_CURRENT_SOURCE_DIR} - ( -name \\*.cc -or -name \\*.hh -or -name \\*.cpp -or - -name \\*.h -or -name \\*.c -or -name \\*.f ) - -exec echo \"{}\" \; > cscope.files && cscope -q -b VERBATIM) -endif (UNIX) - -############################################################ -# "make lint" target -############################################################ -if (UNIX) - # Full lint - add_custom_target(lint ${BUILD_SUPPORT_DIR}/cpplint.py - --verbose=2 - --filter=-whitespace/comments,-readability/todo,-build/header_guard - `find ${CMAKE_CURRENT_SOURCE_DIR}/src -name \\*.cc -or -name \\*.h`) -endif (UNIX) +ADD_THIRDPARTY_LIB(arrow_python + SHARED_LIB ${ARROW_PYTHON_SHARED_LIB}) ############################################################ # Subdirectories @@ -429,9 +263,6 @@ if (UNIX) set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE) endif() -add_subdirectory(src/pyarrow) -add_subdirectory(src/pyarrow/util) - set(CYTHON_EXTENSIONS array config @@ -444,19 +275,11 @@ set(CYTHON_EXTENSIONS table ) -set(PYARROW_SRCS - src/pyarrow/common.cc - src/pyarrow/config.cc - src/pyarrow/helpers.cc - src/pyarrow/io.cc - src/pyarrow/adapters/builtin.cc - src/pyarrow/adapters/pandas.cc -) - set(LINK_LIBS - arrow - arrow_io - arrow_ipc + arrow_shared + arrow_io_shared + arrow_ipc_shared + arrow_python_shared ) if (PYARROW_BUILD_PARQUET) @@ -497,24 +320,12 @@ if (PYARROW_BUILD_JEMALLOC) SHARED_LIB ${ARROW_JEMALLOC_SHARED_LIB}) set(LINK_LIBS ${LINK_LIBS} - arrow_jemalloc) + arrow_jemalloc_shared) set(CYTHON_EXTENSIONS ${CYTHON_EXTENSIONS} jemalloc) endif() -add_library(pyarrow SHARED - ${PYARROW_SRCS}) -if (PYARROW_BUNDLE_ARROW_CPP) - set_target_properties(pyarrow PROPERTIES - INSTALL_RPATH "\$ORIGIN") -endif() -target_link_libraries(pyarrow ${LINK_LIBS}) - -if(APPLE) - set_target_properties(pyarrow PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") -endif() - ############################################################ # Setup and build Cython modules ############################################################ @@ -555,5 +366,5 @@ foreach(module ${CYTHON_EXTENSIONS}) set_target_properties(${module_name} PROPERTIES INSTALL_RPATH ${module_install_rpath}) - target_link_libraries(${module_name} pyarrow) + target_link_libraries(${module_name} ${LINK_LIBS}) endforeach(module)
http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/python/cmake_modules/FindArrow.cmake ---------------------------------------------------------------------- diff --git a/python/cmake_modules/FindArrow.cmake b/python/cmake_modules/FindArrow.cmake index 5d0207d..5030c9c 100644 --- a/python/cmake_modules/FindArrow.cmake +++ b/python/cmake_modules/FindArrow.cmake @@ -57,12 +57,18 @@ find_library(ARROW_JEMALLOC_LIB_PATH NAMES arrow_jemalloc ${ARROW_SEARCH_LIB_PATH} NO_DEFAULT_PATH) +find_library(ARROW_PYTHON_LIB_PATH NAMES arrow_python + PATHS + ${ARROW_SEARCH_LIB_PATH} + NO_DEFAULT_PATH) + if (ARROW_INCLUDE_DIR AND ARROW_LIB_PATH) set(ARROW_FOUND TRUE) set(ARROW_LIB_NAME libarrow) set(ARROW_IO_LIB_NAME libarrow_io) set(ARROW_IPC_LIB_NAME libarrow_ipc) set(ARROW_JEMALLOC_LIB_NAME libarrow_jemalloc) + set(ARROW_PYTHON_LIB_NAME libarrow_python) set(ARROW_LIBS ${ARROW_SEARCH_LIB_PATH}) set(ARROW_STATIC_LIB ${ARROW_SEARCH_LIB_PATH}/${ARROW_LIB_NAME}.a) @@ -77,6 +83,9 @@ if (ARROW_INCLUDE_DIR AND ARROW_LIB_PATH) set(ARROW_JEMALLOC_STATIC_LIB ${ARROW_SEARCH_LIB_PATH}/${ARROW_JEMALLOC_LIB_NAME}.a) set(ARROW_JEMALLOC_SHARED_LIB ${ARROW_LIBS}/${ARROW_JEMALLOC_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) + set(ARROW_PYTHON_STATIC_LIB ${ARROW_SEARCH_LIB_PATH}/${ARROW_PYTHON_LIB_NAME}.a) + set(ARROW_PYTHON_SHARED_LIB ${ARROW_LIBS}/${ARROW_PYTHON_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) + if (NOT Arrow_FIND_QUIETLY) message(STATUS "Found the Arrow core library: ${ARROW_LIB_PATH}") message(STATUS "Found the Arrow IO library: ${ARROW_IO_LIB_PATH}") http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/python/cmake_modules/FindNumPy.cmake ---------------------------------------------------------------------- diff --git a/python/cmake_modules/FindNumPy.cmake b/python/cmake_modules/FindNumPy.cmake deleted file mode 100644 index 58bb531..0000000 --- a/python/cmake_modules/FindNumPy.cmake +++ /dev/null @@ -1,100 +0,0 @@ -# - Find the NumPy libraries -# This module finds if NumPy is installed, and sets the following variables -# indicating where it is. -# -# TODO: Update to provide the libraries and paths for linking npymath lib. -# -# NUMPY_FOUND - was NumPy found -# NUMPY_VERSION - the version of NumPy found as a string -# NUMPY_VERSION_MAJOR - the major version number of NumPy -# NUMPY_VERSION_MINOR - the minor version number of NumPy -# NUMPY_VERSION_PATCH - the patch version number of NumPy -# NUMPY_VERSION_DECIMAL - e.g. version 1.6.1 is 10601 -# NUMPY_INCLUDE_DIRS - path to the NumPy include files - -#============================================================================ -# Copyright 2012 Continuum Analytics, Inc. -# -# MIT License -# -# Permission is hereby granted, free of charge, to any person obtaining -# a copy of this software and associated documentation files -# (the "Software"), to deal in the Software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of the Software, and to permit -# persons to whom the Software is furnished to do so, subject to -# the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -#============================================================================ - -# Finding NumPy involves calling the Python interpreter -if(NumPy_FIND_REQUIRED) - find_package(PythonInterp REQUIRED) -else() - find_package(PythonInterp) -endif() - -if(NOT PYTHONINTERP_FOUND) - set(NUMPY_FOUND FALSE) - return() -endif() - -execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" - "import numpy as n; print(n.__version__); print(n.get_include());" - RESULT_VARIABLE _NUMPY_SEARCH_SUCCESS - OUTPUT_VARIABLE _NUMPY_VALUES_OUTPUT - ERROR_VARIABLE _NUMPY_ERROR_VALUE - OUTPUT_STRIP_TRAILING_WHITESPACE) - -if(NOT _NUMPY_SEARCH_SUCCESS MATCHES 0) - if(NumPy_FIND_REQUIRED) - message(FATAL_ERROR - "NumPy import failure:\n${_NUMPY_ERROR_VALUE}") - endif() - set(NUMPY_FOUND FALSE) - return() -endif() - -# Convert the process output into a list -string(REGEX REPLACE ";" "\\\\;" _NUMPY_VALUES ${_NUMPY_VALUES_OUTPUT}) -string(REGEX REPLACE "\n" ";" _NUMPY_VALUES ${_NUMPY_VALUES}) -list(GET _NUMPY_VALUES 0 NUMPY_VERSION) -list(GET _NUMPY_VALUES 1 NUMPY_INCLUDE_DIRS) - -string(REGEX MATCH "^[0-9]+\\.[0-9]+\\.[0-9]+" _VER_CHECK "${NUMPY_VERSION}") -if("${_VER_CHECK}" STREQUAL "") - # The output from Python was unexpected. Raise an error always - # here, because we found NumPy, but it appears to be corrupted somehow. - message(FATAL_ERROR - "Requested version and include path from NumPy, got instead:\n${_NUMPY_VALUES_OUTPUT}\n") - return() -endif() - -# Make sure all directory separators are '/' -string(REGEX REPLACE "\\\\" "/" NUMPY_INCLUDE_DIRS ${NUMPY_INCLUDE_DIRS}) - -# Get the major and minor version numbers -string(REGEX REPLACE "\\." ";" _NUMPY_VERSION_LIST ${NUMPY_VERSION}) -list(GET _NUMPY_VERSION_LIST 0 NUMPY_VERSION_MAJOR) -list(GET _NUMPY_VERSION_LIST 1 NUMPY_VERSION_MINOR) -list(GET _NUMPY_VERSION_LIST 2 NUMPY_VERSION_PATCH) -string(REGEX MATCH "[0-9]*" NUMPY_VERSION_PATCH ${NUMPY_VERSION_PATCH}) -math(EXPR NUMPY_VERSION_DECIMAL - "(${NUMPY_VERSION_MAJOR} * 10000) + (${NUMPY_VERSION_MINOR} * 100) + ${NUMPY_VERSION_PATCH}") - -find_package_message(NUMPY - "Found NumPy: version \"${NUMPY_VERSION}\" ${NUMPY_INCLUDE_DIRS}" - "${NUMPY_INCLUDE_DIRS}${NUMPY_VERSION}") - -set(NUMPY_FOUND TRUE) http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/python/cmake_modules/FindPythonLibsNew.cmake ---------------------------------------------------------------------- diff --git a/python/cmake_modules/FindPythonLibsNew.cmake b/python/cmake_modules/FindPythonLibsNew.cmake deleted file mode 100644 index 1000a95..0000000 --- a/python/cmake_modules/FindPythonLibsNew.cmake +++ /dev/null @@ -1,241 +0,0 @@ -# - Find python libraries -# This module finds the libraries corresponding to the Python interpeter -# FindPythonInterp provides. -# This code sets the following variables: -# -# PYTHONLIBS_FOUND - have the Python libs been found -# PYTHON_PREFIX - path to the Python installation -# PYTHON_LIBRARIES - path to the python library -# PYTHON_INCLUDE_DIRS - path to where Python.h is found -# PYTHON_SITE_PACKAGES - path to installation site-packages -# PYTHON_IS_DEBUG - whether the Python interpreter is a debug build -# -# PYTHON_INCLUDE_PATH - path to where Python.h is found (deprecated) -# -# A function PYTHON_ADD_MODULE(<name> src1 src2 ... srcN) is defined -# to build modules for python. -# -# Thanks to talljimbo for the patch adding the 'LDVERSION' config -# variable usage. - -#============================================================================= -# Copyright 2001-2009 Kitware, Inc. -# Copyright 2012-2014 Continuum Analytics, Inc. -# -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# * Neither the names of Kitware, Inc., the Insight Software Consortium, -# nor the names of their contributors may be used to endorse or promote -# products derived from this software without specific prior written -# permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -# # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#============================================================================= -# (To distribute this file outside of CMake, substitute the full -# License text for the above reference.) - -# Use the Python interpreter to find the libs. -if(PythonLibsNew_FIND_REQUIRED) - find_package(PythonInterp REQUIRED) -else() - find_package(PythonInterp) -endif() - -if(NOT PYTHONINTERP_FOUND) - set(PYTHONLIBS_FOUND FALSE) - return() -endif() - -# According to http://stackoverflow.com/questions/646518/python-how-to-detect-debug-interpreter -# testing whether sys has the gettotalrefcount function is a reliable, -# cross-platform way to detect a CPython debug interpreter. -# -# The library suffix is from the config var LDVERSION sometimes, otherwise -# VERSION. VERSION will typically be like "2.7" on unix, and "27" on windows. -# -# The config var LIBPL is for Linux, and helps on Debian Jessie where the -# addition of multi-arch support shuffled things around. -execute_process(COMMAND "${PYTHON_EXECUTABLE}" "-c" - "from distutils import sysconfig as s;import sys;import struct; -print('.'.join(str(v) for v in sys.version_info)); -print(sys.prefix); -print(s.get_python_inc(plat_specific=True)); -print(s.get_python_lib(plat_specific=True)); -print(s.get_config_var('SO')); -print(hasattr(sys, 'gettotalrefcount')+0); -print(struct.calcsize('@P')); -print(s.get_config_var('LDVERSION') or s.get_config_var('VERSION')); -print(s.get_config_var('LIBPL')); -" - RESULT_VARIABLE _PYTHON_SUCCESS - OUTPUT_VARIABLE _PYTHON_VALUES - ERROR_VARIABLE _PYTHON_ERROR_VALUE - OUTPUT_STRIP_TRAILING_WHITESPACE) - -if(NOT _PYTHON_SUCCESS MATCHES 0) - if(PythonLibsNew_FIND_REQUIRED) - message(FATAL_ERROR - "Python config failure:\n${_PYTHON_ERROR_VALUE}") - endif() - set(PYTHONLIBS_FOUND FALSE) - return() -endif() - -# Convert the process output into a list -string(REGEX REPLACE ";" "\\\\;" _PYTHON_VALUES ${_PYTHON_VALUES}) -string(REGEX REPLACE "\n" ";" _PYTHON_VALUES ${_PYTHON_VALUES}) -list(GET _PYTHON_VALUES 0 _PYTHON_VERSION_LIST) -list(GET _PYTHON_VALUES 1 PYTHON_PREFIX) -list(GET _PYTHON_VALUES 2 PYTHON_INCLUDE_DIR) -list(GET _PYTHON_VALUES 3 PYTHON_SITE_PACKAGES) -list(GET _PYTHON_VALUES 4 PYTHON_MODULE_EXTENSION) -list(GET _PYTHON_VALUES 5 PYTHON_IS_DEBUG) -list(GET _PYTHON_VALUES 6 PYTHON_SIZEOF_VOID_P) -list(GET _PYTHON_VALUES 7 PYTHON_LIBRARY_SUFFIX) -list(GET _PYTHON_VALUES 8 PYTHON_LIBRARY_PATH) - -# Make sure the Python has the same pointer-size as the chosen compiler -# Skip the check on OS X, it doesn't consistently have CMAKE_SIZEOF_VOID_P defined -if((NOT APPLE) AND (NOT "${PYTHON_SIZEOF_VOID_P}" STREQUAL "${CMAKE_SIZEOF_VOID_P}")) - if(PythonLibsNew_FIND_REQUIRED) - math(EXPR _PYTHON_BITS "${PYTHON_SIZEOF_VOID_P} * 8") - math(EXPR _CMAKE_BITS "${CMAKE_SIZEOF_VOID_P} * 8") - message(FATAL_ERROR - "Python config failure: Python is ${_PYTHON_BITS}-bit, " - "chosen compiler is ${_CMAKE_BITS}-bit") - endif() - set(PYTHONLIBS_FOUND FALSE) - return() -endif() - -# The built-in FindPython didn't always give the version numbers -string(REGEX REPLACE "\\." ";" _PYTHON_VERSION_LIST ${_PYTHON_VERSION_LIST}) -list(GET _PYTHON_VERSION_LIST 0 PYTHON_VERSION_MAJOR) -list(GET _PYTHON_VERSION_LIST 1 PYTHON_VERSION_MINOR) -list(GET _PYTHON_VERSION_LIST 2 PYTHON_VERSION_PATCH) - -# Make sure all directory separators are '/' -string(REGEX REPLACE "\\\\" "/" PYTHON_PREFIX ${PYTHON_PREFIX}) -string(REGEX REPLACE "\\\\" "/" PYTHON_INCLUDE_DIR ${PYTHON_INCLUDE_DIR}) -string(REGEX REPLACE "\\\\" "/" PYTHON_SITE_PACKAGES ${PYTHON_SITE_PACKAGES}) - -if(CMAKE_HOST_WIN32) - if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC") - set(PYTHON_LIBRARY - "${PYTHON_PREFIX}/libs/Python${PYTHON_LIBRARY_SUFFIX}.lib") - else() - set(PYTHON_LIBRARY "${PYTHON_PREFIX}/libs/libpython${PYTHON_LIBRARY_SUFFIX}.a") - endif() -elseif(APPLE) - # Seems to require "-undefined dynamic_lookup" instead of linking - # against the .dylib, otherwise it crashes. This flag is added - # below - set(PYTHON_LIBRARY "") - #set(PYTHON_LIBRARY - # "${PYTHON_PREFIX}/lib/libpython${PYTHON_LIBRARY_SUFFIX}.dylib") -else() - if(${PYTHON_SIZEOF_VOID_P} MATCHES 8) - set(_PYTHON_LIBS_SEARCH "${PYTHON_PREFIX}/lib64" "${PYTHON_PREFIX}/lib" "${PYTHON_LIBRARY_PATH}") - else() - set(_PYTHON_LIBS_SEARCH "${PYTHON_PREFIX}/lib" "${PYTHON_LIBRARY_PATH}") - endif() - message(STATUS "Searching for Python libs in ${_PYTHON_LIBS_SEARCH}") - message(STATUS "Looking for python${PYTHON_LIBRARY_SUFFIX}") - # Probably this needs to be more involved. It would be nice if the config - # information the python interpreter itself gave us were more complete. - find_library(PYTHON_LIBRARY - NAMES "python${PYTHON_LIBRARY_SUFFIX}" - PATHS ${_PYTHON_LIBS_SEARCH} - NO_SYSTEM_ENVIRONMENT_PATH) - message(STATUS "Found Python lib ${PYTHON_LIBRARY}") -endif() - -# For backward compatibility, set PYTHON_INCLUDE_PATH, but make it internal. -SET(PYTHON_INCLUDE_PATH "${PYTHON_INCLUDE_DIR}" CACHE INTERNAL - "Path to where Python.h is found (deprecated)") - -MARK_AS_ADVANCED( - PYTHON_LIBRARY - PYTHON_INCLUDE_DIR -) - -# We use PYTHON_INCLUDE_DIR, PYTHON_LIBRARY and PYTHON_DEBUG_LIBRARY for the -# cache entries because they are meant to specify the location of a single -# library. We now set the variables listed by the documentation for this -# module. -SET(PYTHON_INCLUDE_DIRS "${PYTHON_INCLUDE_DIR}") -SET(PYTHON_LIBRARIES "${PYTHON_LIBRARY}") -SET(PYTHON_DEBUG_LIBRARIES "${PYTHON_DEBUG_LIBRARY}") - - -# Don't know how to get to this directory, just doing something simple :P -#INCLUDE(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake) -#FIND_PACKAGE_HANDLE_STANDARD_ARGS(PythonLibs DEFAULT_MSG PYTHON_LIBRARIES PYTHON_INCLUDE_DIRS) -find_package_message(PYTHON - "Found PythonLibs: ${PYTHON_LIBRARY}" - "${PYTHON_EXECUTABLE}${PYTHON_VERSION}") - - -# PYTHON_ADD_MODULE(<name> src1 src2 ... srcN) is used to build modules for python. -FUNCTION(PYTHON_ADD_MODULE _NAME ) - GET_PROPERTY(_TARGET_SUPPORTS_SHARED_LIBS - GLOBAL PROPERTY TARGET_SUPPORTS_SHARED_LIBS) - OPTION(PYTHON_ENABLE_MODULE_${_NAME} "Add module ${_NAME}" TRUE) - OPTION(PYTHON_MODULE_${_NAME}_BUILD_SHARED - "Add module ${_NAME} shared" ${_TARGET_SUPPORTS_SHARED_LIBS}) - - # Mark these options as advanced - MARK_AS_ADVANCED(PYTHON_ENABLE_MODULE_${_NAME} - PYTHON_MODULE_${_NAME}_BUILD_SHARED) - - IF(PYTHON_ENABLE_MODULE_${_NAME}) - IF(PYTHON_MODULE_${_NAME}_BUILD_SHARED) - SET(PY_MODULE_TYPE MODULE) - ELSE(PYTHON_MODULE_${_NAME}_BUILD_SHARED) - SET(PY_MODULE_TYPE STATIC) - SET_PROPERTY(GLOBAL APPEND PROPERTY PY_STATIC_MODULES_LIST ${_NAME}) - ENDIF(PYTHON_MODULE_${_NAME}_BUILD_SHARED) - - SET_PROPERTY(GLOBAL APPEND PROPERTY PY_MODULES_LIST ${_NAME}) - ADD_LIBRARY(${_NAME} ${PY_MODULE_TYPE} ${ARGN}) - IF(APPLE) - # On OS X, linking against the Python libraries causes - # segfaults, so do this dynamic lookup instead. - SET_TARGET_PROPERTIES(${_NAME} PROPERTIES LINK_FLAGS - "-undefined dynamic_lookup") - ELSE() - # In general, we should not link against libpython as we do not embed - # the Python interpreter. The python binary itself can then define where - # the symbols should loaded from. - SET_TARGET_PROPERTIES(${_NAME} PROPERTIES LINK_FLAGS - "-Wl,-undefined,dynamic_lookup") - ENDIF() - IF(PYTHON_MODULE_${_NAME}_BUILD_SHARED) - SET_TARGET_PROPERTIES(${_NAME} PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}") - SET_TARGET_PROPERTIES(${_NAME} PROPERTIES SUFFIX "${PYTHON_MODULE_EXTENSION}") - ELSE() - ENDIF() - - ENDIF(PYTHON_ENABLE_MODULE_${_NAME}) -ENDFUNCTION(PYTHON_ADD_MODULE) http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/python/pyarrow/config.pyx ---------------------------------------------------------------------- diff --git a/python/pyarrow/config.pyx b/python/pyarrow/config.pyx index 5ad7cf5..536f278 100644 --- a/python/pyarrow/config.pyx +++ b/python/pyarrow/config.pyx @@ -14,21 +14,21 @@ # distutils: language = c++ # cython: embedsignature = True -cdef extern from 'pyarrow/do_import_numpy.h': +cdef extern from 'arrow/python/do_import_numpy.h': pass -cdef extern from 'pyarrow/numpy_interop.h' namespace 'arrow::py': +cdef extern from 'arrow/python/numpy_interop.h' namespace 'arrow::py': int import_numpy() -cdef extern from 'pyarrow/config.h' namespace 'arrow::py': - void pyarrow_init() - void pyarrow_set_numpy_nan(object o) +cdef extern from 'arrow/python/config.h' namespace 'arrow::py': + void Init() + void set_numpy_nan(object o) import_numpy() -pyarrow_init() +Init() import numpy as np -pyarrow_set_numpy_nan(np.nan) +set_numpy_nan(np.nan) import multiprocessing import os http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/python/pyarrow/includes/pyarrow.pxd ---------------------------------------------------------------------- diff --git a/python/pyarrow/includes/pyarrow.pxd b/python/pyarrow/includes/pyarrow.pxd index 3fdbebc..c3fdf4b 100644 --- a/python/pyarrow/includes/pyarrow.pxd +++ b/python/pyarrow/includes/pyarrow.pxd @@ -25,7 +25,7 @@ from pyarrow.includes.libarrow cimport (CArray, CBuffer, CColumn, cimport pyarrow.includes.libarrow_io as arrow_io -cdef extern from "pyarrow/api.h" namespace "arrow::py" nogil: +cdef extern from "arrow/python/api.h" namespace "arrow::py" nogil: shared_ptr[CDataType] GetPrimitiveType(Type type) shared_ptr[CDataType] GetTimestampType(TimeUnit unit) CStatus ConvertPySequence(object obj, CMemoryPool* pool, @@ -53,13 +53,9 @@ cdef extern from "pyarrow/api.h" namespace "arrow::py" nogil: void set_default_memory_pool(CMemoryPool* pool) CMemoryPool* get_memory_pool() - -cdef extern from "pyarrow/common.h" namespace "arrow::py" nogil: cdef cppclass PyBuffer(CBuffer): PyBuffer(object o) - -cdef extern from "pyarrow/io.h" namespace "arrow::py" nogil: cdef cppclass PyReadableFile(arrow_io.RandomAccessFile): PyReadableFile(object fo) http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/python/setup.py ---------------------------------------------------------------------- diff --git a/python/setup.py b/python/setup.py index 9abf985..dae6cb2 100644 --- a/python/setup.py +++ b/python/setup.py @@ -186,7 +186,7 @@ class build_ext(_build_ext): # a bit hacky build_lib = saved_cwd - # Move the built libpyarrow library to the place expected by the Python + # Move the libraries to the place expected by the Python # build shared_library_prefix = 'lib' if sys.platform == 'darwin': @@ -203,15 +203,16 @@ class build_ext(_build_ext): pass def move_lib(lib_name): - lib_filename = shared_library_prefix + lib_name + shared_library_suffix + lib_filename = (shared_library_prefix + lib_name + + shared_library_suffix) shutil.move(pjoin(self.build_type, lib_filename), pjoin(build_lib, 'pyarrow', lib_filename)) - move_lib("pyarrow") if self.bundle_arrow_cpp: move_lib("arrow") move_lib("arrow_io") move_lib("arrow_ipc") + move_lib("arrow_python") if self.with_jemalloc: move_lib("arrow_jemalloc") if self.with_parquet: @@ -227,14 +228,14 @@ class build_ext(_build_ext): if self._failure_permitted(name): print('Cython module {0} failure permitted'.format(name)) continue - raise RuntimeError('libpyarrow C-extension failed to build:', + raise RuntimeError('pyarrow C-extension failed to build:', os.path.abspath(built_path)) ext_path = pjoin(build_lib, self._get_cmake_ext_path(name)) if os.path.exists(ext_path): os.remove(ext_path) self.mkpath(os.path.dirname(ext_path)) - print('Moving built libpyarrow C-extension', built_path, + print('Moving built C-extension', built_path, 'to build path', ext_path) shutil.move(self.get_ext_built(name), ext_path) self._found_names.append(name) http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/python/src/pyarrow/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/python/src/pyarrow/CMakeLists.txt b/python/src/pyarrow/CMakeLists.txt deleted file mode 100644 index 9e69718..0000000 --- a/python/src/pyarrow/CMakeLists.txt +++ /dev/null @@ -1,22 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -####################################### -# Unit tests -####################################### - -ADD_PYARROW_TEST(adapters/pandas-test) http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/python/src/pyarrow/adapters/builtin.cc ---------------------------------------------------------------------- diff --git a/python/src/pyarrow/adapters/builtin.cc b/python/src/pyarrow/adapters/builtin.cc deleted file mode 100644 index 06e098a..0000000 --- a/python/src/pyarrow/adapters/builtin.cc +++ /dev/null @@ -1,527 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include <Python.h> -#include <datetime.h> -#include <sstream> - -#include "pyarrow/adapters/builtin.h" - -#include "arrow/api.h" -#include "arrow/status.h" - -#include "pyarrow/helpers.h" -#include "pyarrow/util/datetime.h" - -namespace arrow { -namespace py { - -static inline bool IsPyInteger(PyObject* obj) { -#if PYARROW_IS_PY2 - return PyLong_Check(obj) || PyInt_Check(obj); -#else - return PyLong_Check(obj); -#endif -} - -class ScalarVisitor { - public: - ScalarVisitor() - : total_count_(0), - none_count_(0), - bool_count_(0), - int_count_(0), - date_count_(0), - timestamp_count_(0), - float_count_(0), - binary_count_(0), - unicode_count_(0) {} - - void Visit(PyObject* obj) { - ++total_count_; - if (obj == Py_None) { - ++none_count_; - } else if (PyBool_Check(obj)) { - ++bool_count_; - } else if (PyFloat_Check(obj)) { - ++float_count_; - } else if (IsPyInteger(obj)) { - ++int_count_; - } else if (PyDate_CheckExact(obj)) { - ++date_count_; - } else if (PyDateTime_CheckExact(obj)) { - ++timestamp_count_; - } else if (PyBytes_Check(obj)) { - ++binary_count_; - } else if (PyUnicode_Check(obj)) { - ++unicode_count_; - } else { - // TODO(wesm): accumulate error information somewhere - } - } - - std::shared_ptr<DataType> GetType() { - // TODO(wesm): handling mixed-type cases - if (float_count_) { - return float64(); - } else if (int_count_) { - // TODO(wesm): tighter type later - return int64(); - } else if (date_count_) { - return date64(); - } else if (timestamp_count_) { - return timestamp(TimeUnit::MICRO); - } else if (bool_count_) { - return boolean(); - } else if (binary_count_) { - return binary(); - } else if (unicode_count_) { - return utf8(); - } else { - return null(); - } - } - - int64_t total_count() const { return total_count_; } - - private: - int64_t total_count_; - int64_t none_count_; - int64_t bool_count_; - int64_t int_count_; - int64_t date_count_; - int64_t timestamp_count_; - int64_t float_count_; - int64_t binary_count_; - int64_t unicode_count_; - - // Place to accumulate errors - // std::vector<Status> errors_; -}; - -static constexpr int MAX_NESTING_LEVELS = 32; - -class SeqVisitor { - public: - SeqVisitor() : max_nesting_level_(0) { - memset(nesting_histogram_, 0, MAX_NESTING_LEVELS * sizeof(int)); - } - - Status Visit(PyObject* obj, int level = 0) { - Py_ssize_t size = PySequence_Size(obj); - - if (level > max_nesting_level_) { max_nesting_level_ = level; } - - for (int64_t i = 0; i < size; ++i) { - // TODO(wesm): Error checking? - // TODO(wesm): Specialize for PyList_GET_ITEM? - OwnedRef item_ref(PySequence_GetItem(obj, i)); - PyObject* item = item_ref.obj(); - - if (PyList_Check(item)) { - RETURN_NOT_OK(Visit(item, level + 1)); - } else if (PyDict_Check(item)) { - return Status::NotImplemented("No type inference for dicts"); - } else { - // We permit nulls at any level of nesting - if (item == Py_None) { - // TODO - } else { - ++nesting_histogram_[level]; - scalars_.Visit(item); - } - } - } - return Status::OK(); - } - - std::shared_ptr<DataType> GetType() { - if (scalars_.total_count() == 0) { - if (max_nesting_level_ == 0) { - return null(); - } else { - return nullptr; - } - } else { - std::shared_ptr<DataType> result = scalars_.GetType(); - for (int i = 0; i < max_nesting_level_; ++i) { - result = std::make_shared<ListType>(result); - } - return result; - } - } - - Status Validate() const { - if (scalars_.total_count() > 0) { - if (num_nesting_levels() > 1) { - return Status::Invalid("Mixed nesting levels not supported"); - } else if (max_observed_level() < max_nesting_level_) { - return Status::Invalid("Mixed nesting levels not supported"); - } - } - return Status::OK(); - } - - int max_observed_level() const { - int result = 0; - for (int i = 0; i < MAX_NESTING_LEVELS; ++i) { - if (nesting_histogram_[i] > 0) { result = i; } - } - return result; - } - - int num_nesting_levels() const { - int result = 0; - for (int i = 0; i < MAX_NESTING_LEVELS; ++i) { - if (nesting_histogram_[i] > 0) { ++result; } - } - return result; - } - - private: - ScalarVisitor scalars_; - - // Track observed - int max_nesting_level_; - int nesting_histogram_[MAX_NESTING_LEVELS]; -}; - -// Non-exhaustive type inference -Status InferArrowType(PyObject* obj, int64_t* size, std::shared_ptr<DataType>* out_type) { - *size = PySequence_Size(obj); - if (PyErr_Occurred()) { - // Not a sequence - PyErr_Clear(); - return Status::TypeError("Object is not a sequence"); - } - - // For 0-length sequences, refuse to guess - if (*size == 0) { *out_type = null(); } - - SeqVisitor seq_visitor; - RETURN_NOT_OK(seq_visitor.Visit(obj)); - RETURN_NOT_OK(seq_visitor.Validate()); - - *out_type = seq_visitor.GetType(); - - if (*out_type == nullptr) { return Status::TypeError("Unable to determine data type"); } - - return Status::OK(); -} - -// Marshal Python sequence (list, tuple, etc.) to Arrow array -class SeqConverter { - public: - virtual Status Init(const std::shared_ptr<ArrayBuilder>& builder) { - builder_ = builder; - return Status::OK(); - } - - virtual Status AppendData(PyObject* seq) = 0; - - protected: - std::shared_ptr<ArrayBuilder> builder_; -}; - -template <typename BuilderType> -class TypedConverter : public SeqConverter { - public: - Status Init(const std::shared_ptr<ArrayBuilder>& builder) override { - builder_ = builder; - typed_builder_ = static_cast<BuilderType*>(builder.get()); - return Status::OK(); - } - - protected: - BuilderType* typed_builder_; -}; - -class BoolConverter : public TypedConverter<BooleanBuilder> { - public: - Status AppendData(PyObject* seq) override { - Py_ssize_t size = PySequence_Size(seq); - RETURN_NOT_OK(typed_builder_->Reserve(size)); - for (int64_t i = 0; i < size; ++i) { - OwnedRef item(PySequence_GetItem(seq, i)); - if (item.obj() == Py_None) { - typed_builder_->AppendNull(); - } else { - if (item.obj() == Py_True) { - typed_builder_->Append(true); - } else { - typed_builder_->Append(false); - } - } - } - return Status::OK(); - } -}; - -class Int64Converter : public TypedConverter<Int64Builder> { - public: - Status AppendData(PyObject* seq) override { - int64_t val; - Py_ssize_t size = PySequence_Size(seq); - RETURN_NOT_OK(typed_builder_->Reserve(size)); - for (int64_t i = 0; i < size; ++i) { - OwnedRef item(PySequence_GetItem(seq, i)); - if (item.obj() == Py_None) { - typed_builder_->AppendNull(); - } else { - val = PyLong_AsLongLong(item.obj()); - RETURN_IF_PYERROR(); - typed_builder_->Append(val); - } - } - return Status::OK(); - } -}; - -class DateConverter : public TypedConverter<Date64Builder> { - public: - Status AppendData(PyObject* seq) override { - Py_ssize_t size = PySequence_Size(seq); - RETURN_NOT_OK(typed_builder_->Reserve(size)); - for (int64_t i = 0; i < size; ++i) { - OwnedRef item(PySequence_GetItem(seq, i)); - if (item.obj() == Py_None) { - typed_builder_->AppendNull(); - } else { - PyDateTime_Date* pydate = reinterpret_cast<PyDateTime_Date*>(item.obj()); - typed_builder_->Append(PyDate_to_ms(pydate)); - } - } - return Status::OK(); - } -}; - -class TimestampConverter : public TypedConverter<TimestampBuilder> { - public: - Status AppendData(PyObject* seq) override { - Py_ssize_t size = PySequence_Size(seq); - RETURN_NOT_OK(typed_builder_->Reserve(size)); - for (int64_t i = 0; i < size; ++i) { - OwnedRef item(PySequence_GetItem(seq, i)); - if (item.obj() == Py_None) { - typed_builder_->AppendNull(); - } else { - PyDateTime_DateTime* pydatetime = - reinterpret_cast<PyDateTime_DateTime*>(item.obj()); - struct tm datetime = {0}; - datetime.tm_year = PyDateTime_GET_YEAR(pydatetime) - 1900; - datetime.tm_mon = PyDateTime_GET_MONTH(pydatetime) - 1; - datetime.tm_mday = PyDateTime_GET_DAY(pydatetime); - datetime.tm_hour = PyDateTime_DATE_GET_HOUR(pydatetime); - datetime.tm_min = PyDateTime_DATE_GET_MINUTE(pydatetime); - datetime.tm_sec = PyDateTime_DATE_GET_SECOND(pydatetime); - int us = PyDateTime_DATE_GET_MICROSECOND(pydatetime); - RETURN_IF_PYERROR(); - struct tm epoch = {0}; - epoch.tm_year = 70; - epoch.tm_mday = 1; - // Microseconds since the epoch - int64_t val = lrint(difftime(mktime(&datetime), mktime(&epoch))) * 1000000 + us; - typed_builder_->Append(val); - } - } - return Status::OK(); - } -}; - -class DoubleConverter : public TypedConverter<DoubleBuilder> { - public: - Status AppendData(PyObject* seq) override { - double val; - Py_ssize_t size = PySequence_Size(seq); - RETURN_NOT_OK(typed_builder_->Reserve(size)); - for (int64_t i = 0; i < size; ++i) { - OwnedRef item(PySequence_GetItem(seq, i)); - if (item.obj() == Py_None) { - typed_builder_->AppendNull(); - } else { - val = PyFloat_AsDouble(item.obj()); - RETURN_IF_PYERROR(); - typed_builder_->Append(val); - } - } - return Status::OK(); - } -}; - -class BytesConverter : public TypedConverter<BinaryBuilder> { - public: - Status AppendData(PyObject* seq) override { - PyObject* item; - PyObject* bytes_obj; - OwnedRef tmp; - const char* bytes; - int64_t length; - Py_ssize_t size = PySequence_Size(seq); - for (int64_t i = 0; i < size; ++i) { - item = PySequence_GetItem(seq, i); - OwnedRef holder(item); - - if (item == Py_None) { - RETURN_NOT_OK(typed_builder_->AppendNull()); - continue; - } else if (PyUnicode_Check(item)) { - tmp.reset(PyUnicode_AsUTF8String(item)); - RETURN_IF_PYERROR(); - bytes_obj = tmp.obj(); - } else if (PyBytes_Check(item)) { - bytes_obj = item; - } else { - return Status::TypeError("Non-string value encountered"); - } - // No error checking - length = PyBytes_GET_SIZE(bytes_obj); - bytes = PyBytes_AS_STRING(bytes_obj); - RETURN_NOT_OK(typed_builder_->Append(bytes, length)); - } - return Status::OK(); - } -}; - -class UTF8Converter : public TypedConverter<StringBuilder> { - public: - Status AppendData(PyObject* seq) override { - PyObject* item; - PyObject* bytes_obj; - OwnedRef tmp; - const char* bytes; - int64_t length; - Py_ssize_t size = PySequence_Size(seq); - for (int64_t i = 0; i < size; ++i) { - item = PySequence_GetItem(seq, i); - OwnedRef holder(item); - - if (item == Py_None) { - RETURN_NOT_OK(typed_builder_->AppendNull()); - continue; - } else if (!PyUnicode_Check(item)) { - return Status::TypeError("Non-unicode value encountered"); - } - tmp.reset(PyUnicode_AsUTF8String(item)); - RETURN_IF_PYERROR(); - bytes_obj = tmp.obj(); - - // No error checking - length = PyBytes_GET_SIZE(bytes_obj); - bytes = PyBytes_AS_STRING(bytes_obj); - RETURN_NOT_OK(typed_builder_->Append(bytes, length)); - } - return Status::OK(); - } -}; - -class ListConverter : public TypedConverter<ListBuilder> { - public: - Status Init(const std::shared_ptr<ArrayBuilder>& builder) override; - - Status AppendData(PyObject* seq) override { - Py_ssize_t size = PySequence_Size(seq); - for (int64_t i = 0; i < size; ++i) { - OwnedRef item(PySequence_GetItem(seq, i)); - if (item.obj() == Py_None) { - RETURN_NOT_OK(typed_builder_->AppendNull()); - } else { - typed_builder_->Append(); - RETURN_NOT_OK(value_converter_->AppendData(item.obj())); - } - } - return Status::OK(); - } - - protected: - std::shared_ptr<SeqConverter> value_converter_; -}; - -// Dynamic constructor for sequence converters -std::shared_ptr<SeqConverter> GetConverter(const std::shared_ptr<DataType>& type) { - switch (type->type) { - case Type::BOOL: - return std::make_shared<BoolConverter>(); - case Type::INT64: - return std::make_shared<Int64Converter>(); - case Type::DATE64: - return std::make_shared<DateConverter>(); - case Type::TIMESTAMP: - return std::make_shared<TimestampConverter>(); - case Type::DOUBLE: - return std::make_shared<DoubleConverter>(); - case Type::BINARY: - return std::make_shared<BytesConverter>(); - case Type::STRING: - return std::make_shared<UTF8Converter>(); - case Type::LIST: - return std::make_shared<ListConverter>(); - case Type::STRUCT: - default: - return nullptr; - break; - } -} - -Status ListConverter::Init(const std::shared_ptr<ArrayBuilder>& builder) { - builder_ = builder; - typed_builder_ = static_cast<ListBuilder*>(builder.get()); - - value_converter_ = - GetConverter(static_cast<ListType*>(builder->type().get())->value_type()); - if (value_converter_ == nullptr) { - return Status::NotImplemented("value type not implemented"); - } - - value_converter_->Init(typed_builder_->value_builder()); - return Status::OK(); -} - -Status AppendPySequence(PyObject* obj, const std::shared_ptr<DataType>& type, - const std::shared_ptr<ArrayBuilder>& builder) { - std::shared_ptr<SeqConverter> converter = GetConverter(type); - if (converter == nullptr) { - std::stringstream ss; - ss << "No type converter implemented for " << type->ToString(); - return Status::NotImplemented(ss.str()); - } - converter->Init(builder); - - return converter->AppendData(obj); -} - -Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out) { - std::shared_ptr<DataType> type; - int64_t size; - PyDateTime_IMPORT; - RETURN_NOT_OK(InferArrowType(obj, &size, &type)); - - // Handle NA / NullType case - if (type->type == Type::NA) { - out->reset(new NullArray(size)); - return Status::OK(); - } - - // Give the sequence converter an array builder - std::shared_ptr<ArrayBuilder> builder; - RETURN_NOT_OK(MakeBuilder(pool, type, &builder)); - RETURN_NOT_OK(AppendPySequence(obj, type, builder)); - - return builder->Finish(out); -} - -} // namespace py -} // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/python/src/pyarrow/adapters/builtin.h ---------------------------------------------------------------------- diff --git a/python/src/pyarrow/adapters/builtin.h b/python/src/pyarrow/adapters/builtin.h deleted file mode 100644 index 2d45e67..0000000 --- a/python/src/pyarrow/adapters/builtin.h +++ /dev/null @@ -1,54 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// Functions for converting between CPython built-in data structures and Arrow -// data structures - -#ifndef PYARROW_ADAPTERS_BUILTIN_H -#define PYARROW_ADAPTERS_BUILTIN_H - -#include <Python.h> - -#include <memory> - -#include <arrow/type.h> - -#include "arrow/util/visibility.h" - -#include "pyarrow/common.h" - -namespace arrow { - -class Array; -class Status; - -namespace py { - -ARROW_EXPORT arrow::Status InferArrowType( - PyObject* obj, int64_t* size, std::shared_ptr<arrow::DataType>* out_type); - -ARROW_EXPORT arrow::Status AppendPySequence(PyObject* obj, - const std::shared_ptr<arrow::DataType>& type, - const std::shared_ptr<arrow::ArrayBuilder>& builder); - -ARROW_EXPORT -Status ConvertPySequence(PyObject* obj, MemoryPool* pool, std::shared_ptr<Array>* out); - -} // namespace py -} // namespace arrow - -#endif // PYARROW_ADAPTERS_BUILTIN_H http://git-wip-us.apache.org/repos/asf/arrow/blob/3aac4ade/python/src/pyarrow/adapters/pandas-test.cc ---------------------------------------------------------------------- diff --git a/python/src/pyarrow/adapters/pandas-test.cc b/python/src/pyarrow/adapters/pandas-test.cc deleted file mode 100644 index e694e79..0000000 --- a/python/src/pyarrow/adapters/pandas-test.cc +++ /dev/null @@ -1,64 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "gtest/gtest.h" - -#include <cstdint> -#include <memory> -#include <string> -#include <vector> - -#include "arrow/array.h" -#include "arrow/builder.h" -#include "arrow/schema.h" -#include "arrow/table.h" -#include "arrow/test-util.h" -#include "arrow/type.h" -#include "pyarrow/adapters/pandas.h" - -namespace arrow { -namespace py { - -TEST(PandasConversionTest, TestObjectBlockWriteFails) { - StringBuilder builder; - const char value[] = {'\xf1', '\0'}; - - for (int i = 0; i < 1000; ++i) { - builder.Append(value, strlen(value)); - } - - std::shared_ptr<Array> arr; - ASSERT_OK(builder.Finish(&arr)); - - auto f1 = field("f1", utf8()); - auto f2 = field("f2", utf8()); - auto f3 = field("f3", utf8()); - std::vector<std::shared_ptr<Field>> fields = {f1, f2, f3}; - std::vector<std::shared_ptr<Column>> cols = {std::make_shared<Column>(f1, arr), - std::make_shared<Column>(f2, arr), std::make_shared<Column>(f3, arr)}; - - auto schema = std::make_shared<Schema>(fields); - auto table = std::make_shared<Table>("", schema, cols); - - PyObject* out; - Py_BEGIN_ALLOW_THREADS; - ASSERT_RAISES(UnknownError, ConvertTableToPandas(table, 2, &out)); - Py_END_ALLOW_THREADS; -} - -} // namespace py -} // namespace arrow