Repository: arrow Updated Branches: refs/heads/master f0f1ca60d -> ef579ca7e
ARROW-1123: Make jemalloc the default allocator Change-Id: Ib03392431851773df3b59b2c9a4d9a7bd672d2cb Author: Uwe L. Korn <uw...@xhochy.com> Closes #761 from xhochy/ARROW-1123 and squashes the following commits: 2c75f56d [Uwe L. Korn] Use shared jemalloc always if available 2205586f [Uwe L. Korn] Run parquet tests in manylinux1 build 97f77d08 [Uwe L. Korn] Add pthread to static dependencies 7d01e9eb [Uwe L. Korn] Cpplint bf478f1c [Uwe L. Korn] Fix allocator 4fbc7bac [Uwe L. Korn] Correct small allocations c3bacc05 [Uwe L. Korn] Revert "Also link static libs to librt" 44f0cfc5 [Uwe L. Korn] Also link static libs to librt bac694df [Uwe L. Korn] Also link static libs to librt 35212bc0 [Uwe L. Korn] Remove obsolete import check 4b714a14 [Uwe L. Korn] Don't force optional that is no longer there e004150c [Uwe L. Korn] Only search the for pthread library, not the headers e65d0d1d [Uwe L. Korn] ARROW-1123: Make jemalloc the default allocator Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/ef579ca7 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/ef579ca7 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/ef579ca7 Branch: refs/heads/master Commit: ef579ca7eb0a99ff32ae8eed8fc3127c7ef4110b Parents: f0f1ca6 Author: Uwe L. Korn <uw...@xhochy.com> Authored: Thu Jun 22 09:33:20 2017 -0400 Committer: Wes McKinney <wes.mckin...@twosigma.com> Committed: Thu Jun 22 09:33:20 2017 -0400 ---------------------------------------------------------------------- ci/travis_script_python.sh | 3 +- cpp/CMakeLists.txt | 74 ++++++++++-- cpp/cmake_modules/BuildUtils.cmake | 17 ++- cpp/src/arrow/allocator-test.cc | 2 +- cpp/src/arrow/jemalloc/CMakeLists.txt | 120 ------------------- cpp/src/arrow/jemalloc/arrow-jemalloc.pc.in | 30 ----- .../jemalloc/jemalloc-builder-benchmark.cc | 47 -------- .../arrow/jemalloc/jemalloc-memory_pool-test.cc | 51 -------- cpp/src/arrow/jemalloc/memory_pool.cc | 76 ------------ cpp/src/arrow/jemalloc/memory_pool.h | 57 --------- cpp/src/arrow/jemalloc/symbols.map | 30 ----- cpp/src/arrow/memory_pool.cc | 32 ++++- python/CMakeLists.txt | 17 --- python/cmake_modules/FindArrow.cmake | 10 -- python/doc/source/api.rst | 1 - python/doc/source/development.rst | 5 +- python/doc/source/index.rst | 6 - python/doc/source/jemalloc.rst | 47 -------- python/manylinux1/Dockerfile-x86_64 | 10 +- python/manylinux1/Dockerfile-x86_64_base | 3 +- python/manylinux1/build_arrow.sh | 6 +- python/pyarrow/__init__.py | 9 -- python/pyarrow/_jemalloc.pyx | 28 ----- python/pyarrow/includes/libarrow_jemalloc.pxd | 27 ----- python/pyarrow/tests/test_jemalloc.py | 67 ----------- python/setup.py | 11 -- site/README.md | 4 +- 27 files changed, 117 insertions(+), 673 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/ci/travis_script_python.sh ---------------------------------------------------------------------- diff --git a/ci/travis_script_python.sh b/ci/travis_script_python.sh index 904db52..6cc760f 100755 --- a/ci/travis_script_python.sh +++ b/ci/travis_script_python.sh @@ -106,10 +106,9 @@ python_version_tests() { # Other stuff pip install pip install -r requirements.txt - python setup.py build_ext --inplace --with-parquet --with-jemalloc + python setup.py build_ext --inplace --with-parquet python -c "import pyarrow.parquet" - python -c "import pyarrow._jemalloc" python -m pytest -vv -r sxX pyarrow --parquet http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/cpp/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 42e1f69..962891a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -438,6 +438,14 @@ if (DEFINED ENV{GFLAGS_HOME}) endif() # ---------------------------------------------------------------------- +# Find pthreads + +if (NOT MSVC) + find_library(PTHREAD_LIBRARY pthread) + message(STATUS "Found pthread: ${PTHREAD_LIBRARY}") +endif() + +# ---------------------------------------------------------------------- # Add Boost dependencies (code adapted from Apache Kudu (incubating)) set(Boost_DEBUG TRUE) @@ -756,7 +764,8 @@ if (ARROW_JEMALLOC) include_directories(SYSTEM ${JEMALLOC_INCLUDE_DIR}) ADD_THIRDPARTY_LIB(jemalloc STATIC_LIB ${JEMALLOC_STATIC_LIB} - SHARED_LIB ${JEMALLOC_SHARED_LIB}) + SHARED_LIB ${JEMALLOC_SHARED_LIB} + DEPS ${PTHREAD_LIBRARY}) endif() ## Google PerfTools @@ -803,20 +812,16 @@ include_directories(SYSTEM "${HADOOP_HOME}/include") # Linker setup ############################################################ set(ARROW_MIN_TEST_LIBS + ${ARROW_STATIC_LINK_LIBS} arrow_static gtest gtest_main ${ARROW_BASE_LIBS}) -if (APPLE) +if(NOT MSVC) set(ARROW_MIN_TEST_LIBS ${ARROW_MIN_TEST_LIBS} ${CMAKE_DL_LIBS}) -elseif(NOT MSVC) - set(ARROW_MIN_TEST_LIBS - ${ARROW_MIN_TEST_LIBS} - pthread - ${CMAKE_DL_LIBS}) endif() set(ARROW_TEST_LINK_LIBS ${ARROW_MIN_TEST_LIBS}) @@ -938,8 +943,55 @@ set(ARROW_STATIC_PRIVATE_LINK_LIBS if (NOT MSVC) set(ARROW_LINK_LIBS ${ARROW_LINK_LIBS} - ${CMAKE_DL_LIBS} - pthread) + ${CMAKE_DL_LIBS}) +endif() + +if (ARROW_JEMALLOC) + add_definitions(-DARROW_JEMALLOC) + # In the case that jemalloc is only available as a shared library also use it to + # link it in the static requirements. In contrast to other libraries we try in + # most cases to use the system provided version of jemalloc to better align with + # other potential users of jemalloc. + if (JEMALLOC_STATIC_LIB AND NOT ARROW_JEMALLOC_USE_SHARED) + set(ARROW_JEMALLOC_STATIC_LINK_LIBS jemalloc_static) + else() + set(ARROW_JEMALLOC_STATIC_LINK_LIBS jemalloc_shared) + endif() + + if (NOT APPLE) + set(ARROW_JEMALLOC_STATIC_LINK_LIBS ${ARROW_JEMALLOC_STATIC_LINK_LIBS} ${PTHREAD_LIBRARY} rt) + endif() + + if (ARROW_JEMALLOC_USE_SHARED) + set(ARROW_JEMALLOC_SHARED_LINK_LIBS jemalloc_shared) + else() + if (CMAKE_COMPILER_IS_GNUCXX) + set(ARROW_JEMALLOC_SHARED_LINK_LIBS + jemalloc_static + # For glibc <2.17 we need to link to librt. + # As we compile with --as-needed by default, the linker will omit this + # dependency if not required. + ${PTHREAD_LIBRARY} + rt + ) + else() + set(ARROW_JEMALLOC_SHARED_LINK_LIBS + jemalloc_static + ) + endif() + endif() + set(ARROW_SHARED_PRIVATE_LINK_LIBS + ${ARROW_SHARED_PRIVATE_LINK_LIBS} + ${ARROW_JEMALLOC_SHARED_LINK_LIBS}) + set(ARROW_STATIC_LINK_LIBS + ${ARROW_STATIC_LINK_LIBS} + ${ARROW_JEMALLOC_STATIC_LINK_LIBS}) +elseif (NOT MSVC) + # We need to separate this as otherwise CMake would mess with the library + # linking order. + set(ARROW_LINK_LIBS + ${ARROW_LINK_LIBS} + ${PTHREAD_LIBRARY}) endif() if(RAPIDJSON_VENDORED) @@ -1021,10 +1073,6 @@ ADD_ARROW_LIB(arrow add_subdirectory(src/arrow/util) -if(ARROW_JEMALLOC) - add_subdirectory(src/arrow/jemalloc) -endif() - if(ARROW_PYTHON) find_package(PythonLibsNew REQUIRED) find_package(NumPy REQUIRED) http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/cpp/cmake_modules/BuildUtils.cmake ---------------------------------------------------------------------- diff --git a/cpp/cmake_modules/BuildUtils.cmake b/cpp/cmake_modules/BuildUtils.cmake index db83efe..6b2be41 100644 --- a/cpp/cmake_modules/BuildUtils.cmake +++ b/cpp/cmake_modules/BuildUtils.cmake @@ -46,6 +46,10 @@ function(ADD_THIRDPARTY_LIB LIB_NAME) set_target_properties(${AUG_LIB_NAME} PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}") endif() + if(ARG_DEPS) + set_target_properties(${AUG_LIB_NAME} + PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${ARG_DEPS}") + endif() message("Added shared library dependency ${LIB_NAME}: ${ARG_SHARED_LIB}") elseif(ARG_STATIC_LIB) add_library(${LIB_NAME} STATIC IMPORTED) @@ -55,6 +59,10 @@ function(ADD_THIRDPARTY_LIB LIB_NAME) add_library(${AUG_LIB_NAME} STATIC IMPORTED) set_target_properties(${AUG_LIB_NAME} PROPERTIES IMPORTED_LOCATION "${ARG_STATIC_LIB}") + if(ARG_DEPS) + set_target_properties(${AUG_LIB_NAME} + PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${ARG_DEPS}") + endif() message("Added static library dependency ${LIB_NAME}: ${ARG_STATIC_LIB}") elseif(ARG_SHARED_LIB) add_library(${LIB_NAME} SHARED IMPORTED) @@ -72,14 +80,13 @@ function(ADD_THIRDPARTY_LIB LIB_NAME) PROPERTIES IMPORTED_LOCATION "${ARG_SHARED_LIB}") endif() message("Added shared library dependency ${LIB_NAME}: ${ARG_SHARED_LIB}") + if(ARG_DEPS) + set_target_properties(${AUG_LIB_NAME} + PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${ARG_DEPS}") + endif() else() message(FATAL_ERROR "No static or shared library provided for ${LIB_NAME}") endif() - - if(ARG_DEPS) - set_target_properties(${LIB_NAME} - PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES "${ARG_DEPS}") - endif() endfunction() function(ADD_ARROW_LIB LIB_NAME) http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/cpp/src/arrow/allocator-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/allocator-test.cc b/cpp/src/arrow/allocator-test.cc index 811ef5a..5a4e98d 100644 --- a/cpp/src/arrow/allocator-test.cc +++ b/cpp/src/arrow/allocator-test.cc @@ -33,7 +33,7 @@ TEST(stl_allocator, MemoryTracking) { ASSERT_EQ(0, pool->bytes_allocated()); } -#if !(defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER)) +#if !(defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) || defined(ARROW_JEMALLOC)) TEST(stl_allocator, TestOOM) { stl_allocator<uint64_t> alloc; http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/cpp/src/arrow/jemalloc/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/jemalloc/CMakeLists.txt b/cpp/src/arrow/jemalloc/CMakeLists.txt deleted file mode 100644 index 7b627ac..0000000 --- a/cpp/src/arrow/jemalloc/CMakeLists.txt +++ /dev/null @@ -1,120 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# ---------------------------------------------------------------------- -# arrow_jemalloc : Arrow jemalloc-based allocator - -include_directories(SYSTEM "{JEMALLOC_INCLUDE_DIR}") - -# In the case that jemalloc is only available as a shared library also use it to -# link it in the static requirements. In contrast to other libraries we try in -# most cases to use the system provided version of jemalloc to better align with -# other potential users of jemalloc. -if (JEMALLOC_STATIC_LIB) - set(ARROW_JEMALLOC_STATIC_LINK_LIBS - arrow_static - jemalloc_static - ) -else() - set(ARROW_JEMALLOC_STATIC_LINK_LIBS - arrow_static - jemalloc_shared - ) -endif() - -if (NOT APPLE) - set(ARROW_JEMALLOC_STATIC_LINK_LIBS ${ARROW_JEMALLOC_STATIC_LINK_LIBS} pthread) -endif() - -if (ARROW_JEMALLOC_USE_SHARED) - set(ARROW_JEMALLOC_SHARED_LINK_LIBS - arrow_shared - jemalloc_shared - ) -else() - if (CMAKE_COMPILER_IS_GNUCXX) - set(ARROW_JEMALLOC_SHARED_LINK_LIBS - arrow_shared - jemalloc_static - # For glibc <2.17 we need to link to librt. - # As we compile with --as-needed by default, the linker will omit this - # dependency if not required. - rt - ) - else() - set(ARROW_JEMALLOC_SHARED_LINK_LIBS - arrow_shared - jemalloc_static - ) - endif() -endif() - - -if (ARROW_BUILD_STATIC) - set(ARROW_JEMALLOC_TEST_LINK_LIBS - ${ARROW_JEMALLOC_STATIC_LINK_LIBS} - arrow_jemalloc_static) -else() - set(ARROW_JEMALLOC_TEST_LINK_LIBS - arrow_jemalloc_shared) -endif() - -set(ARROW_JEMALLOC_SRCS - memory_pool.cc -) - -if(NOT APPLE) - # Localize thirdparty symbols using a linker version script. This hides them - # from the client application. The OS X linker does not support the - # version-script option. - set(ARROW_JEMALLOC_LINK_FLAGS "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/symbols.map") -endif() - -if (JEMALLOC_VENDORED) - set(JEMALLOC_DEPENDENCIES jemalloc_ep) -endif() - -ADD_ARROW_LIB(arrow_jemalloc - SOURCES ${ARROW_JEMALLOC_SRCS} - DEPENDENCIES ${JEMALLOC_DEPENDENCIES} - SHARED_LINK_FLAGS ${ARROW_JEMALLOC_LINK_FLAGS} - SHARED_LINK_LIBS ${ARROW_JEMALLOC_SHARED_LINK_LIBS} - SHARED_PRIVATE_LINK_LIBS ${ARROW_JEMALLOC_SHARED_PRIVATE_LINK_LIBS} - STATIC_LINK_LIBS ${ARROW_JEMALLOC_STATIC_LINK_LIBS} - STATIC_PRIVATE_LINK_LIBS ${ARROW_JEMALLOC_STATIC_PRIVATE_LINK_LIBS} -) - -ADD_ARROW_TEST(jemalloc-memory_pool-test) -ARROW_TEST_LINK_LIBRARIES(jemalloc-memory_pool-test - ${ARROW_JEMALLOC_TEST_LINK_LIBS}) - -ADD_ARROW_BENCHMARK(jemalloc-builder-benchmark) -ARROW_BENCHMARK_LINK_LIBRARIES(jemalloc-builder-benchmark - ${ARROW_JEMALLOC_TEST_LINK_LIBS}) - -# Headers: top level -install(FILES - memory_pool.h - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/jemalloc") - -# pkg-config support -configure_file(arrow-jemalloc.pc.in - "${CMAKE_CURRENT_BINARY_DIR}/arrow-jemalloc.pc" - @ONLY) -install( - FILES "${CMAKE_CURRENT_BINARY_DIR}/arrow-jemalloc.pc" - DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig/") http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/cpp/src/arrow/jemalloc/arrow-jemalloc.pc.in ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/jemalloc/arrow-jemalloc.pc.in b/cpp/src/arrow/jemalloc/arrow-jemalloc.pc.in deleted file mode 100644 index 8e946d1..0000000 --- a/cpp/src/arrow/jemalloc/arrow-jemalloc.pc.in +++ /dev/null @@ -1,30 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -prefix=@CMAKE_INSTALL_PREFIX@ -libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ -includedir=${prefix}/include - -so_version=@ARROW_SO_VERSION@ -abi_version=@ARROW_ABI_VERSION@ - -Name: Apache Arrow jemalloc-based allocator -Description: jemalloc allocator for Arrow. -Version: @ARROW_VERSION@ -Libs: -L${libdir} -larrow_jemalloc -Cflags: -I${includedir} -Requires: arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/cpp/src/arrow/jemalloc/jemalloc-builder-benchmark.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/jemalloc/jemalloc-builder-benchmark.cc b/cpp/src/arrow/jemalloc/jemalloc-builder-benchmark.cc deleted file mode 100644 index d69c304..0000000 --- a/cpp/src/arrow/jemalloc/jemalloc-builder-benchmark.cc +++ /dev/null @@ -1,47 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "benchmark/benchmark.h" - -#include "arrow/builder.h" -#include "arrow/jemalloc/memory_pool.h" -#include "arrow/test-util.h" - -namespace arrow { - -constexpr int64_t kFinalSize = 256; - -static void BM_BuildPrimitiveArrayNoNulls( - benchmark::State& state) { // NOLINT non-const reference - // 2 MiB block - std::vector<int64_t> data(256 * 1024, 100); - while (state.KeepRunning()) { - Int64Builder builder(jemalloc::MemoryPool::default_pool(), arrow::int64()); - for (int i = 0; i < kFinalSize; i++) { - // Build up an array of 512 MiB in size - builder.Append(data.data(), data.size(), nullptr); - } - std::shared_ptr<Array> out; - builder.Finish(&out); - } - state.SetBytesProcessed( - state.iterations() * data.size() * sizeof(int64_t) * kFinalSize); -} - -BENCHMARK(BM_BuildPrimitiveArrayNoNulls)->Repetitions(5)->Unit(benchmark::kMillisecond); - -} // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/cpp/src/arrow/jemalloc/jemalloc-memory_pool-test.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/jemalloc/jemalloc-memory_pool-test.cc b/cpp/src/arrow/jemalloc/jemalloc-memory_pool-test.cc deleted file mode 100644 index a8448ab..0000000 --- a/cpp/src/arrow/jemalloc/jemalloc-memory_pool-test.cc +++ /dev/null @@ -1,51 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include <cstdint> -#include <limits> - -#include "gtest/gtest.h" - -#include "arrow/jemalloc/memory_pool.h" -#include "arrow/memory_pool-test.h" - -namespace arrow { -namespace jemalloc { -namespace test { - -class TestJemallocMemoryPool : public ::arrow::test::TestMemoryPoolBase { - public: - ::arrow::MemoryPool* memory_pool() override { - return ::arrow::jemalloc::MemoryPool::default_pool(); - } -}; - -TEST_F(TestJemallocMemoryPool, MemoryTracking) { - this->TestMemoryTracking(); -} - -TEST_F(TestJemallocMemoryPool, OOM) { - this->TestOOM(); -} - -TEST_F(TestJemallocMemoryPool, Reallocate) { - this->TestReallocate(); -} - -} // namespace test -} // namespace jemalloc -} // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/cpp/src/arrow/jemalloc/memory_pool.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/jemalloc/memory_pool.cc b/cpp/src/arrow/jemalloc/memory_pool.cc deleted file mode 100644 index f7a1446..0000000 --- a/cpp/src/arrow/jemalloc/memory_pool.cc +++ /dev/null @@ -1,76 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "arrow/jemalloc/memory_pool.h" - -#include <sstream> - -// Needed to support jemalloc 3 and 4 -#define JEMALLOC_MANGLE -#include <jemalloc/jemalloc.h> - -#include "arrow/status.h" - -constexpr size_t kAlignment = 64; - -namespace arrow { -namespace jemalloc { - -MemoryPool* MemoryPool::default_pool() { - static MemoryPool pool; - return &pool; -} - -MemoryPool::MemoryPool() : allocated_size_(0) {} - -MemoryPool::~MemoryPool() {} - -Status MemoryPool::Allocate(int64_t size, uint8_t** out) { - *out = reinterpret_cast<uint8_t*>(mallocx(size, MALLOCX_ALIGN(kAlignment))); - if (*out == NULL) { - std::stringstream ss; - ss << "malloc of size " << size << " failed"; - return Status::OutOfMemory(ss.str()); - } - allocated_size_ += size; - return Status::OK(); -} - -Status MemoryPool::Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) { - *ptr = reinterpret_cast<uint8_t*>(rallocx(*ptr, new_size, MALLOCX_ALIGN(kAlignment))); - if (*ptr == NULL) { - std::stringstream ss; - ss << "realloc of size " << new_size << " failed"; - return Status::OutOfMemory(ss.str()); - } - - allocated_size_ += new_size - old_size; - - return Status::OK(); -} - -void MemoryPool::Free(uint8_t* buffer, int64_t size) { - allocated_size_ -= size; - dallocx(buffer, MALLOCX_ALIGN(kAlignment)); -} - -int64_t MemoryPool::bytes_allocated() const { - return allocated_size_.load(); -} - -} // namespace jemalloc -} // namespace arrow http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/cpp/src/arrow/jemalloc/memory_pool.h ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/jemalloc/memory_pool.h b/cpp/src/arrow/jemalloc/memory_pool.h deleted file mode 100644 index 0d32b46..0000000 --- a/cpp/src/arrow/jemalloc/memory_pool.h +++ /dev/null @@ -1,57 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// Public API for the jemalloc-based allocator - -#ifndef ARROW_JEMALLOC_MEMORY_POOL_H -#define ARROW_JEMALLOC_MEMORY_POOL_H - -#include "arrow/memory_pool.h" - -#include <atomic> - -namespace arrow { - -class Status; - -namespace jemalloc { - -class ARROW_EXPORT MemoryPool : public ::arrow::MemoryPool { - public: - static MemoryPool* default_pool(); - - MemoryPool(MemoryPool const&) = delete; - MemoryPool& operator=(MemoryPool const&) = delete; - - virtual ~MemoryPool(); - - Status Allocate(int64_t size, uint8_t** out) override; - Status Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) override; - void Free(uint8_t* buffer, int64_t size) override; - - int64_t bytes_allocated() const override; - - private: - MemoryPool(); - - std::atomic<int64_t> allocated_size_; -}; - -} // namespace jemalloc -} // namespace arrow - -#endif // ARROW_JEMALLOC_MEMORY_POOL_H http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/cpp/src/arrow/jemalloc/symbols.map ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/jemalloc/symbols.map b/cpp/src/arrow/jemalloc/symbols.map deleted file mode 100644 index 1e87cae..0000000 --- a/cpp/src/arrow/jemalloc/symbols.map +++ /dev/null @@ -1,30 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. See accompanying LICENSE file. - -{ - # Symbols marked as 'local' are not exported by the DSO and thus may not - # be used by client applications. - local: - # devtoolset / static-libstdc++ symbols - __cxa_*; - - extern "C++" { - # boost - boost::*; - - # devtoolset or -static-libstdc++ - the Red Hat devtoolset statically - # links c++11 symbols into binaries so that the result may be executed on - # a system with an older libstdc++ which doesn't include the necessary - # c++11 symbols. - std::*; - }; -}; http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/cpp/src/arrow/memory_pool.cc ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc index 7992f22..e7de5c4 100644 --- a/cpp/src/arrow/memory_pool.cc +++ b/cpp/src/arrow/memory_pool.cc @@ -27,14 +27,21 @@ #include "arrow/status.h" #include "arrow/util/logging.h" +#ifdef ARROW_JEMALLOC +// Needed to support jemalloc 3 and 4 +#define JEMALLOC_MANGLE +#include <jemalloc/jemalloc.h> +#endif + namespace arrow { +constexpr size_t kAlignment = 64; + namespace { // Allocate memory according to the alignment requirements for Arrow // (as of May 2016 64 bytes) Status AllocateAligned(int64_t size, uint8_t** out) { - // TODO(emkornfield) find something compatible with windows - constexpr size_t kAlignment = 64; +// TODO(emkornfield) find something compatible with windows #ifdef _MSC_VER // Special code path for MSVC *out = @@ -44,6 +51,14 @@ Status AllocateAligned(int64_t size, uint8_t** out) { ss << "malloc of size " << size << " failed"; return Status::OutOfMemory(ss.str()); } +#elif defined(ARROW_JEMALLOC) + *out = reinterpret_cast<uint8_t*>(mallocx( + std::max(static_cast<size_t>(size), kAlignment), MALLOCX_ALIGN(kAlignment))); + if (*out == NULL) { + std::stringstream ss; + ss << "malloc of size " << size << " failed"; + return Status::OutOfMemory(ss.str()); + } #else const int result = posix_memalign( reinterpret_cast<void**>(out), kAlignment, static_cast<size_t>(size)); @@ -87,6 +102,14 @@ Status DefaultMemoryPool::Allocate(int64_t size, uint8_t** out) { } Status DefaultMemoryPool::Reallocate(int64_t old_size, int64_t new_size, uint8_t** ptr) { +#ifdef ARROW_JEMALLOC + *ptr = reinterpret_cast<uint8_t*>(rallocx(*ptr, new_size, MALLOCX_ALIGN(kAlignment))); + if (*ptr == NULL) { + std::stringstream ss; + ss << "realloc of size " << new_size << " failed"; + return Status::OutOfMemory(ss.str()); + } +#else // Note: We cannot use realloc() here as it doesn't guarantee alignment. // Allocate new chunk @@ -98,8 +121,9 @@ Status DefaultMemoryPool::Reallocate(int64_t old_size, int64_t new_size, uint8_t _aligned_free(*ptr); #else std::free(*ptr); -#endif +#endif // defined(_MSC_VER) *ptr = out; +#endif // defined(ARROW_JEMALLOC) bytes_allocated_ += new_size - old_size; { @@ -118,6 +142,8 @@ void DefaultMemoryPool::Free(uint8_t* buffer, int64_t size) { DCHECK_GE(bytes_allocated_, size); #ifdef _MSC_VER _aligned_free(buffer); +#elif defined(ARROW_JEMALLOC) + dallocx(buffer, MALLOCX_ALIGN(kAlignment)); #else std::free(buffer); #endif http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/python/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index f35326a..224147d 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -51,9 +51,6 @@ if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") option(PYARROW_BUILD_PARQUET "Build the PyArrow Parquet integration" OFF) - option(PYARROW_BUILD_JEMALLOC - "Build the PyArrow jemalloc integration" - OFF) option(PYARROW_BUNDLE_ARROW_CPP "Bundle the Arrow C++ libraries" OFF) @@ -336,20 +333,6 @@ if (PYARROW_BUILD_PARQUET) _parquet) endif() -if (PYARROW_BUILD_JEMALLOC) - if (PYARROW_BUNDLE_ARROW_CPP) - bundle_arrow_lib(ARROW_JEMALLOC_SHARED_LIB) - endif() - ADD_THIRDPARTY_LIB(arrow_jemalloc - SHARED_LIB ${ARROW_JEMALLOC_SHARED_LIB}) - set(LINK_LIBS - ${LINK_LIBS} - arrow_jemalloc_shared) - set(CYTHON_EXTENSIONS - ${CYTHON_EXTENSIONS} - _jemalloc) -endif() - ############################################################ # Setup and build Cython modules ############################################################ http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/python/cmake_modules/FindArrow.cmake ---------------------------------------------------------------------- diff --git a/python/cmake_modules/FindArrow.cmake b/python/cmake_modules/FindArrow.cmake index 4c8ed3d..c893a1a 100644 --- a/python/cmake_modules/FindArrow.cmake +++ b/python/cmake_modules/FindArrow.cmake @@ -61,11 +61,6 @@ find_library(ARROW_LIB_PATH NAMES arrow NO_DEFAULT_PATH) get_filename_component(ARROW_LIBS ${ARROW_LIB_PATH} DIRECTORY) -find_library(ARROW_JEMALLOC_LIB_PATH NAMES arrow_jemalloc - PATHS - ${ARROW_SEARCH_LIB_PATH} - NO_DEFAULT_PATH) - find_library(ARROW_PYTHON_LIB_PATH NAMES arrow_python PATHS ${ARROW_SEARCH_LIB_PATH} @@ -104,10 +99,8 @@ if (ARROW_INCLUDE_DIR AND ARROW_LIBS) else() set(ARROW_STATIC_LIB ${ARROW_PYTHON_LIB_PATH}/lib${ARROW_LIB_NAME}.a) set(ARROW_PYTHON_STATIC_LIB ${ARROW_PYTHON_LIB_PATH}/lib${ARROW_PYTHON_LIB_NAME}.a) - set(ARROW_JEMALLOC_STATIC_LIB ${ARROW_PYTHON_LIB_PATH}/libarrow_jemalloc.a) set(ARROW_SHARED_LIB ${ARROW_LIBS}/lib${ARROW_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) - set(ARROW_JEMALLOC_SHARED_LIB ${ARROW_LIBS}/libarrow_jemalloc${CMAKE_SHARED_LIBRARY_SUFFIX}) set(ARROW_PYTHON_SHARED_LIB ${ARROW_LIBS}/lib${ARROW_PYTHON_LIB_NAME}${CMAKE_SHARED_LIBRARY_SUFFIX}) endif() endif() @@ -116,7 +109,6 @@ if (ARROW_FOUND) if (NOT Arrow_FIND_QUIETLY) message(STATUS "Found the Arrow core library: ${ARROW_LIB_PATH}") message(STATUS "Found the Arrow Python library: ${ARROW_PYTHON_LIB_PATH}") - message(STATUS "Found the Arrow jemalloc library: ${ARROW_JEMALLOC_LIB_PATH}") endif () else () if (NOT Arrow_FIND_QUIETLY) @@ -138,6 +130,4 @@ mark_as_advanced( ARROW_SHARED_LIB ARROW_PYTHON_STATIC_LIB ARROW_PYTHON_SHARED_LIB - ARROW_JEMALLOC_STATIC_LIB - ARROW_JEMALLOC_SHARED_LIB ) http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/python/doc/source/api.rst ---------------------------------------------------------------------- diff --git a/python/doc/source/api.rst b/python/doc/source/api.rst index c145935..4810a31 100644 --- a/python/doc/source/api.rst +++ b/python/doc/source/api.rst @@ -194,7 +194,6 @@ Memory Pools MemoryPool default_memory_pool - jemalloc_memory_pool total_allocated_bytes set_memory_pool http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/python/doc/source/development.rst ---------------------------------------------------------------------- diff --git a/python/doc/source/development.rst b/python/doc/source/development.rst index 910418c..2063ba8 100644 --- a/python/doc/source/development.rst +++ b/python/doc/source/development.rst @@ -141,7 +141,7 @@ Now, build pyarrow: cd arrow/python python setup.py build_ext --build-type=$ARROW_BUILD_TYPE \ - --with-parquet --with-jemalloc --inplace + --with-parquet --inplace If you did not build parquet-cpp, you can omit ``--with-parquet``. @@ -162,7 +162,6 @@ You should be able to run the unit tests with: pyarrow/tests/test_hdfs.py sssssssssssssss pyarrow/tests/test_io.py .................. pyarrow/tests/test_ipc.py ........ - pyarrow/tests/test_jemalloc.py ss pyarrow/tests/test_parquet.py .................... pyarrow/tests/test_scalars.py .......... pyarrow/tests/test_schema.py ......... @@ -176,7 +175,7 @@ You can build a wheel by running: .. code-block:: shell python setup.py build_ext --build-type=$ARROW_BUILD_TYPE \ - --with-parquet --with-jemalloc --bundle-arrow-cpp bdist_wheel + --with-parquet --bundle-arrow-cpp bdist_wheel Again, if you did not build parquet-cpp, you should omit ``--with-parquet``. http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/python/doc/source/index.rst ---------------------------------------------------------------------- diff --git a/python/doc/source/index.rst b/python/doc/source/index.rst index 4bfbe44..a12853c 100644 --- a/python/doc/source/index.rst +++ b/python/doc/source/index.rst @@ -44,9 +44,3 @@ structures. parquet api getting_involved - -.. toctree:: - :maxdepth: 2 - :caption: Additional Features - - jemalloc MemoryPool <jemalloc.rst> http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/python/doc/source/jemalloc.rst ---------------------------------------------------------------------- diff --git a/python/doc/source/jemalloc.rst b/python/doc/source/jemalloc.rst deleted file mode 100644 index 9389dcb..0000000 --- a/python/doc/source/jemalloc.rst +++ /dev/null @@ -1,47 +0,0 @@ -.. Licensed to the Apache Software Foundation (ASF) under one -.. or more contributor license agreements. See the NOTICE file -.. distributed with this work for additional information -.. regarding copyright ownership. The ASF licenses this file -.. to you under the Apache License, Version 2.0 (the -.. "License"); you may not use this file except in compliance -.. with the License. You may obtain a copy of the License at - -.. http://www.apache.org/licenses/LICENSE-2.0 - -.. Unless required by applicable law or agreed to in writing, -.. software distributed under the License is distributed on an -.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -.. KIND, either express or implied. See the License for the -.. specific language governing permissions and limitations -.. under the License. - -jemalloc MemoryPool -=================== - -Arrow's default :class:`~pyarrow.MemoryPool` uses the system's allocator -through the POSIX APIs. Although this already provides aligned allocation, the -POSIX interface doesn't support aligned reallocation. The default reallocation -strategy is to allocate a new region, copy over the old data and free the -previous region. Using `jemalloc <http://jemalloc.net/>`_ we can simply extend -the existing memory allocation to the requested size. While this may still be -linear in the size of allocated memory, it is magnitudes faster as only the page -mapping in the kernel is touched, not the actual data. - -The jemalloc-based allocator is not enabled by default to allow the use of the -system allocator and/or other allocators like ``tcmalloc``. You can either -explicitly make it the default allocator or pass it only to single operations. - -.. code:: python - - import pyarrow as pa - - jemalloc_pool = pyarrow.jemalloc_memory_pool() - - # Explicitly use jemalloc for allocating memory for an Arrow Table object - array = pa.Array.from_pylist([1, 2, 3], memory_pool=jemalloc_pool) - - # Set the global pool - pyarrow.set_memory_pool(jemalloc_pool) - # This operation has no explicit MemoryPool specified and will thus will - # also use jemalloc for its allocations. - array = pa.Array.from_pylist([1, 2, 3]) http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/python/manylinux1/Dockerfile-x86_64 ---------------------------------------------------------------------- diff --git a/python/manylinux1/Dockerfile-x86_64 b/python/manylinux1/Dockerfile-x86_64 index 11e66f7..13919a2 100644 --- a/python/manylinux1/Dockerfile-x86_64 +++ b/python/manylinux1/Dockerfile-x86_64 @@ -9,17 +9,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. See accompanying LICENSE file. -FROM quay.io/xhochy/arrow_manylinux1_x86_64_base:ARROW-1024 +FROM quay.io/xhochy/arrow_manylinux1_x86_64_base:latest ADD arrow /arrow WORKDIR /arrow/cpp RUN mkdir build-plain WORKDIR /arrow/cpp/build-plain -RUN cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/arrow-dist -DARROW_BUILD_TESTS=OFF -DARROW_BUILD_SHARED=ON -DARROW_BOOST_USE_SHARED=OFF -DARROW_JEMALLOC=ON -DARROW_RPATH_ORIGIN=ON -DARROW_JEMALLOC_USE_SHARED=OFF .. -RUN make -j5 install +RUN cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/arrow-dist -DARROW_BUILD_TESTS=OFF -DARROW_BUILD_SHARED=ON -DARROW_BOOST_USE_SHARED=OFF -DARROW_JEMALLOC=ON -DARROW_RPATH_ORIGIN=ON -DARROW_JEMALLOC_USE_SHARED=OFF .. +RUN ninja install WORKDIR / RUN git clone https://github.com/apache/parquet-cpp.git WORKDIR /parquet-cpp -RUN ARROW_HOME=/arrow-dist cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/arrow-dist -DPARQUET_BUILD_TESTS=OFF -DPARQUET_BOOST_USE_SHARED=OFF . -RUN make -j5 install +RUN ARROW_HOME=/arrow-dist cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/arrow-dist -DPARQUET_BUILD_TESTS=OFF -DPARQUET_BOOST_USE_SHARED=OFF -GNinja . +RUN ninja install http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/python/manylinux1/Dockerfile-x86_64_base ---------------------------------------------------------------------- diff --git a/python/manylinux1/Dockerfile-x86_64_base b/python/manylinux1/Dockerfile-x86_64_base index 2ae7e0f..44a9888 100644 --- a/python/manylinux1/Dockerfile-x86_64_base +++ b/python/manylinux1/Dockerfile-x86_64_base @@ -25,8 +25,9 @@ RUN /build_jemalloc.sh WORKDIR / # Install cmake manylinux1 package -RUN /opt/python/cp35-cp35m/bin/pip install cmake +RUN /opt/python/cp35-cp35m/bin/pip install cmake ninja RUN ln -s /opt/python/cp35-cp35m/bin/cmake /usr/bin/cmake +RUN ln -s /opt/python/cp35-cp35m/bin/ninja /usr/bin/ninja ADD scripts/build_gtest.sh / RUN /build_gtest.sh http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/python/manylinux1/build_arrow.sh ---------------------------------------------------------------------- diff --git a/python/manylinux1/build_arrow.sh b/python/manylinux1/build_arrow.sh index 4565e59..8c6bda9 100755 --- a/python/manylinux1/build_arrow.sh +++ b/python/manylinux1/build_arrow.sh @@ -35,7 +35,6 @@ cd /arrow/python # PyArrow build configuration export PYARROW_BUILD_TYPE='release' export PYARROW_WITH_PARQUET=1 -export PYARROW_WITH_JEMALLOC=1 export PYARROW_BUNDLE_ARROW_CPP=1 # Need as otherwise arrow_io is sometimes not linked export LDFLAGS="-Wl,--no-as-needed" @@ -60,13 +59,12 @@ for PYTHON in ${PYTHON_VERSIONS}; do # Clear output directory rm -rf dist/ echo "=== (${PYTHON}) Building wheel ===" - PATH="$PATH:$(cpython_path $PYTHON)/bin" $PYTHON_INTERPRETER setup.py build_ext --inplace --with-parquet --with-jemalloc --bundle-arrow-cpp + PATH="$PATH:$(cpython_path $PYTHON)/bin" $PYTHON_INTERPRETER setup.py build_ext --inplace --with-parquet --bundle-arrow-cpp PATH="$PATH:$(cpython_path $PYTHON)/bin" $PYTHON_INTERPRETER setup.py bdist_wheel echo "=== (${PYTHON}) Test the existence of optional modules ===" $PIPI_IO -r requirements.txt PATH="$PATH:$(cpython_path $PYTHON)/bin" $PYTHON_INTERPRETER -c "import pyarrow.parquet" - PATH="$PATH:$(cpython_path $PYTHON)/bin" $PYTHON_INTERPRETER -c "import pyarrow._jemalloc" echo "=== (${PYTHON}) Tag the wheel with manylinux1 ===" mkdir -p repaired_wheels/ @@ -75,7 +73,7 @@ for PYTHON in ${PYTHON_VERSIONS}; do echo "=== (${PYTHON}) Testing manylinux1 wheel ===" source /venv-test-${PYTHON}/bin/activate pip install repaired_wheels/*.whl - py.test /venv-test-${PYTHON}/lib/*/site-packages/pyarrow + py.test --parquet /venv-test-${PYTHON}/lib/*/site-packages/pyarrow deactivate mv repaired_wheels/*.whl /io/dist http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/python/pyarrow/__init__.py ---------------------------------------------------------------------- diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 0f34121..237d44f 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -90,15 +90,6 @@ from pyarrow.lib import (ArrowException, ArrowTypeError) -def jemalloc_memory_pool(): - """ - Returns a jemalloc-based memory allocator, which can be passed to - pyarrow.set_memory_pool - """ - from pyarrow._jemalloc import default_pool - return default_pool() - - from pyarrow.filesystem import Filesystem, HdfsClient, LocalFilesystem from pyarrow.ipc import (RecordBatchFileReader, RecordBatchFileWriter, http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/python/pyarrow/_jemalloc.pyx ---------------------------------------------------------------------- diff --git a/python/pyarrow/_jemalloc.pyx b/python/pyarrow/_jemalloc.pyx deleted file mode 100644 index 6f00c9d..0000000 --- a/python/pyarrow/_jemalloc.pyx +++ /dev/null @@ -1,28 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# cython: profile=False -# distutils: language = c++ -# cython: embedsignature = True - -from pyarrow.includes.libarrow_jemalloc cimport CJemallocMemoryPool -from pyarrow.lib cimport MemoryPool - -def default_pool(): - cdef MemoryPool pool = MemoryPool() - pool.init(CJemallocMemoryPool.default_pool()) - return pool http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/python/pyarrow/includes/libarrow_jemalloc.pxd ---------------------------------------------------------------------- diff --git a/python/pyarrow/includes/libarrow_jemalloc.pxd b/python/pyarrow/includes/libarrow_jemalloc.pxd deleted file mode 100644 index 0609d19..0000000 --- a/python/pyarrow/includes/libarrow_jemalloc.pxd +++ /dev/null @@ -1,27 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -# distutils: language = c++ - -from pyarrow.includes.common cimport * -from pyarrow.includes.libarrow cimport * - -cdef extern from "arrow/jemalloc/memory_pool.h" namespace "arrow::jemalloc" nogil: - cdef cppclass CJemallocMemoryPool" arrow::jemalloc::MemoryPool": - int64_t bytes_allocated() - @staticmethod - CMemoryPool* default_pool() http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/python/pyarrow/tests/test_jemalloc.py ---------------------------------------------------------------------- diff --git a/python/pyarrow/tests/test_jemalloc.py b/python/pyarrow/tests/test_jemalloc.py deleted file mode 100644 index 50eb74a..0000000 --- a/python/pyarrow/tests/test_jemalloc.py +++ /dev/null @@ -1,67 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -import gc -import pytest - -import pyarrow as pa - - -try: - pa.jemalloc_memory_pool() - HAVE_JEMALLOC = True -except ImportError: - HAVE_JEMALLOC = False - - -jemalloc = pytest.mark.skipif(not HAVE_JEMALLOC, - reason='jemalloc support not built') - - -@jemalloc -def test_different_memory_pool(): - gc.collect() - bytes_before_default = pa.total_allocated_bytes() - bytes_before_jemalloc = pa.jemalloc_memory_pool().bytes_allocated() - - # it works - array = pa.array([1, None, 3, None], # noqa - memory_pool=pa.jemalloc_memory_pool()) - gc.collect() - assert pa.total_allocated_bytes() == bytes_before_default - assert (pa.jemalloc_memory_pool().bytes_allocated() > - bytes_before_jemalloc) - - -@jemalloc -def test_default_memory_pool(): - gc.collect() - bytes_before_default = pa.total_allocated_bytes() - bytes_before_jemalloc = pa.jemalloc_memory_pool().bytes_allocated() - - old_memory_pool = pa.default_memory_pool() - pa.set_memory_pool(pa.jemalloc_memory_pool()) - - array = pa.array([1, None, 3, None]) # noqa - - pa.set_memory_pool(old_memory_pool) - gc.collect() - - assert pa.total_allocated_bytes() == bytes_before_default - - assert (pa.jemalloc_memory_pool().bytes_allocated() > - bytes_before_jemalloc) http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/python/setup.py ---------------------------------------------------------------------- diff --git a/python/setup.py b/python/setup.py index eddf88a..1ea57ae 100644 --- a/python/setup.py +++ b/python/setup.py @@ -82,7 +82,6 @@ class build_ext(_build_ext): user_options = ([('extra-cmake-args=', None, 'extra arguments for CMake'), ('build-type=', None, 'build type (debug or release)'), ('with-parquet', None, 'build the Parquet extension'), - ('with-jemalloc', None, 'build the jemalloc extension'), ('bundle-arrow-cpp', None, 'bundle the Arrow C++ libraries')] + _build_ext.user_options) @@ -100,14 +99,11 @@ class build_ext(_build_ext): self.with_parquet = strtobool( os.environ.get('PYARROW_WITH_PARQUET', '0')) - self.with_jemalloc = strtobool( - os.environ.get('PYARROW_WITH_JEMALLOC', '0')) self.bundle_arrow_cpp = strtobool( os.environ.get('PYARROW_BUNDLE_ARROW_CPP', '0')) CYTHON_MODULE_NAMES = [ 'lib', - '_jemalloc', '_parquet'] def _run_cmake(self): @@ -143,9 +139,6 @@ class build_ext(_build_ext): if self.with_parquet: cmake_options.append('-DPYARROW_BUILD_PARQUET=on') - if self.with_jemalloc: - cmake_options.append('-DPYARROW_BUILD_JEMALLOC=on') - if self.bundle_arrow_cpp: cmake_options.append('-DPYARROW_BUNDLE_ARROW_CPP=ON') # ARROW-1090: work around CMake rough edges @@ -249,8 +242,6 @@ class build_ext(_build_ext): shutil.move(pjoin(build_prefix, 'include'), pjoin(build_lib, 'pyarrow')) move_lib("arrow") move_lib("arrow_python") - if self.with_jemalloc: - move_lib("arrow_jemalloc") if self.with_parquet: move_lib("parquet") @@ -284,8 +275,6 @@ class build_ext(_build_ext): def _failure_permitted(self, name): if name == '_parquet' and not self.with_parquet: return True - if name == '_jemalloc' and not self.with_jemalloc: - return True return False def _get_inplace_dir(self): http://git-wip-us.apache.org/repos/asf/arrow/blob/ef579ca7/site/README.md ---------------------------------------------------------------------- diff --git a/site/README.md b/site/README.md index aeebaa1..0e052c8 100644 --- a/site/README.md +++ b/site/README.md @@ -73,11 +73,11 @@ rsync -r html/ ../../site/asf-site/docs/cpp #### Python -First, build PyArrow with all optional extensions (Apache Parquet, jemalloc). +First, build PyArrow with all optional extensions (Apache Parquet). ``` cd ../python -python setup.py build_ext --inplace --with-parquet --with-jemalloc +python setup.py build_ext --inplace --with-parquet python setup.py build_sphinx -s doc/source rsync -r doc/_build/html/ ../site/asf-site/docs/python/ ```