This is an automated email from the ASF dual-hosted git repository. kszucs pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push: new e8cc48b ARROW-4372: [C++] Embed precompiled bitcode in the gandiva library e8cc48b is described below commit e8cc48b455251f4a8b6a8174d019669fe4a92d20 Author: Krisztián Szűcs <szucs.kriszt...@gmail.com> AuthorDate: Thu Feb 21 16:33:26 2019 +0100 ARROW-4372: [C++] Embed precompiled bitcode in the gandiva library We were not running the pyarrow tests after installing the manylinux wheels, which can lead to uncaught issues, like: https://travis-ci.org/kszucs/crossbow/builds/484284104 Author: Krisztián Szűcs <szucs.kriszt...@gmail.com> Closes #3484 from kszucs/manylinux_tests and squashes the following commits: 3b1da30e <Krisztián Szűcs> use sudo c573a564 <Krisztián Szűcs> use env variables insude the container fd5e3fea <Krisztián Szűcs> use latest docker image tag d5531d95 <Krisztián Szűcs> test imports inside the wheel container 1aa19f18 <Krisztián Szűcs> reenable travis builds b3994962 <Krisztián Szűcs> test py27mu and py36m wheels 71233c7d <Krisztián Szűcs> test 2.7,16 wheel 2372f3d7 <Krisztián Szűcs> fix requirements path; disable other CI tests 3e4ec2a2 <Krisztián Szűcs> unterminated llvm:MemoryBuffer; fix check_import.py path 7c88d61b <Krisztián Szűcs> only build python 3.6 wheel 18c54883 <Krisztián Szűcs> install wheel from dist dir 0bb07a7e <Krisztián Szűcs> better bash split 54fc653b <Krisztián Szűcs> don't export d3cb058b <Krisztián Szűcs> fix wheel building script 0d29b311 <Krisztián Szűcs> remove not existing resources from gandiva's pom 5d75adb4 <Krisztián Szűcs> initialize jni loader 09d829a4 <Krisztián Szűcs> build wheels for a single python distribution at a time; adjust travis and crossbow scripts 79abc0e8 <Krisztián Szűcs> mark .cc file as generated af78be26 <Krisztián Szűcs> don't bundle irhelpers in the jar a88cd370 <Krisztián Szűcs> cmake format 7deb3593 <Krisztián Szűcs> fix REGEX; remove byteCodeFilePath from java configuration object fa195294 <Krisztián Szűcs> properly construct llvm:StringRef 5841dcdd <Krisztián Szűcs> remove commented code 42391b13 <Krisztián Szűcs> don't pass precompiled bitcode all around the constructors d480c83c <Krisztián Szűcs> use const string ref for now b0b11175 <Krisztián Szűcs> conda llvmdev 169f43ac <Krisztián Szűcs> build gandiva in cpp docker image cb696251 <Krisztián Szűcs> silent maven download msgs 19200c30 <Krisztián Szűcs> don't run wheel tests twice; cmake format f2205d01 <Krisztián Szűcs> gandiva jni dbf5b1cf <Krisztián Szűcs> embed precompiled bitcode as char array; load precompiled IR from string 00d98e0d <Krisztián Szűcs> try to bundle bytecode files 97fe94b2 <Krisztián Szűcs> fix requirements-test.txt path 86e7e5bc <Krisztián Szűcs> run pyarrow tests in manylinux CI build --- .travis.yml | 2 + ci/docker_build_cpp.sh | 2 + ci/travis_script_gandiva_java.sh | 2 + ci/travis_script_manylinux.sh | 57 +++++-- cpp/Dockerfile | 1 + cpp/src/gandiva/CMakeLists.txt | 19 +-- cpp/src/gandiva/configuration.cc | 5 +- cpp/src/gandiva/configuration.h | 24 +-- cpp/src/gandiva/engine.cc | 19 ++- cpp/src/gandiva/engine.h | 5 +- cpp/src/gandiva/jni/config_builder.cc | 8 - cpp/src/gandiva/jni/env_helper.h | 3 - cpp/src/gandiva/jni/jni_common.cc | 6 - cpp/src/gandiva/precompiled/CMakeLists.txt | 26 ++- ...c_file_path.cc.in => precompiled_bitcode.cc.in} | 7 +- cpp/src/gandiva/tests/projector_test.cc | 15 -- cpp/src/gandiva/tests/test_util.h | 3 +- dev/tasks/python-wheels/linux-test.sh | 4 +- dev/tasks/python-wheels/travis.linux.yml | 6 +- java/gandiva/pom.xml | 14 +- .../gandiva/evaluator/ConfigurationBuilder.java | 12 +- .../apache/arrow/gandiva/evaluator/JniLoader.java | 21 +-- python/manylinux1/build_arrow.sh | 181 ++++++++++----------- 23 files changed, 207 insertions(+), 235 deletions(-) diff --git a/.travis.yml b/.travis.yml index 60c4446..7458ab6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -225,6 +225,8 @@ matrix: - $TRAVIS_BUILD_DIR/ci/travis_script_python.sh 3.6 - name: "[manylinux1] Python" language: cpp + env: + - PYTHON_VERSIONS="2.7,32 3.6,16" before_script: - if [ $ARROW_CI_PYTHON_AFFECTED == "1" ]; then docker pull quay.io/xhochy/arrow_manylinux1_x86_64_base:llvm-7-manylinux1; fi script: diff --git a/ci/docker_build_cpp.sh b/ci/docker_build_cpp.sh index 450dc87..5bf6094 100755 --- a/ci/docker_build_cpp.sh +++ b/ci/docker_build_cpp.sh @@ -34,6 +34,8 @@ cmake -GNinja \ -DARROW_PARQUET=${ARROW_PARQUET:-ON} \ -DARROW_HDFS=${ARROW_HDFS:-OFF} \ -DARROW_PYTHON=${ARROW_PYTHON:-OFF} \ + -DARROW_GANDIVA=${ARROW_GANDIVA:-OFF} \ + -DARROW_GANDIVA_JAVA=${ARROW_GANDIVA_JAVA:-OFF} \ -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-OFF} \ -DARROW_BUILD_UTILITIES=${ARROW_BUILD_UTILITIES:-ON} \ -DARROW_INSTALL_NAME_RPATH=${ARROW_INSTALL_NAME_RPATH:-ON} \ diff --git a/ci/travis_script_gandiva_java.sh b/ci/travis_script_gandiva_java.sh index 387be9a..9d5871f 100755 --- a/ci/travis_script_gandiva_java.sh +++ b/ci/travis_script_gandiva_java.sh @@ -22,6 +22,8 @@ set -e source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh JAVA_DIR=${TRAVIS_BUILD_DIR}/java +export MAVEN_OPTS="-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn" + pushd $JAVA_DIR # build with gandiva profile diff --git a/ci/travis_script_manylinux.sh b/ci/travis_script_manylinux.sh index 8e8086a..fa02c82 100755 --- a/ci/travis_script_manylinux.sh +++ b/ci/travis_script_manylinux.sh @@ -20,23 +20,58 @@ set -ex -pushd python/manylinux1 -docker run --shm-size=2g --rm -e PYARROW_PARALLEL=3 -v $PWD:/io -v $PWD/../../:/arrow quay.io/xhochy/arrow_manylinux1_x86_64_base:llvm-7-manylinux1 /io/build_arrow.sh - # Testing for https://issues.apache.org/jira/browse/ARROW-2657 # These tests cannot be run inside of the docker container, since TensorFlow # does not run on manylinux1 source $TRAVIS_BUILD_DIR/ci/travis_env_common.sh - source $TRAVIS_BUILD_DIR/ci/travis_install_conda.sh -PYTHON_VERSION=3.6 -CONDA_ENV_DIR=$TRAVIS_BUILD_DIR/pyarrow-test-$PYTHON_VERSION +pushd python/manylinux1 + +cat << EOF > check_imports.py +import sys +import pyarrow +import pyarrow.orc +import pyarrow.parquet +import pyarrow.plasma +import tensorflow + +if sys.version_info.major > 2: + import pyarrow.gandiva +EOF + +for PYTHON_TUPLE in ${PYTHON_VERSIONS}; do + IFS="," read PYTHON_VERSION UNICODE_WIDTH <<< $PYTHON_TUPLE + + # cleanup the artifact directory, docker writes it as root + sudo rm -rf dist + + # build the wheels + docker run --shm-size=2g --rm \ + -e PYARROW_PARALLEL=3 \ + -e PYTHON_VERSION=$PYTHON_VERSION \ + -e UNICODE_WIDTH=$UNICODE_WIDTH \ + -v $PWD:/io \ + -v $PWD/../../:/arrow \ + quay.io/xhochy/arrow_manylinux1_x86_64_base:latest \ + /io/build_arrow.sh + + # create a testing conda environment + CONDA_ENV_DIR=$TRAVIS_BUILD_DIR/pyarrow-test-$PYTHON_VERSION + conda create -y -q -p $CONDA_ENV_DIR python=$PYTHON_VERSION + conda activate $CONDA_ENV_DIR + + # install the produced wheels + pip install tensorflow + pip install dist/*.whl + + # Test optional dependencies and the presence of tensorflow + python check_imports.py -conda create -y -q -p $CONDA_ENV_DIR python=$PYTHON_VERSION -conda activate $CONDA_ENV_DIR + # Install test dependencies and run pyarrow tests + pip install -r ../requirements-test.txt + pytest --pyargs pyarrow +done -pip install -q tensorflow -pip install "dist/`ls dist/ | grep cp36`" -python -c "import pyarrow; import tensorflow" +popd diff --git a/cpp/Dockerfile b/cpp/Dockerfile index 17d332d..b570bf6 100644 --- a/cpp/Dockerfile +++ b/cpp/Dockerfile @@ -48,6 +48,7 @@ RUN arrow/ci/docker_install_conda.sh && \ ENV CC=gcc \ CXX=g++ \ + ARROW_GANDIVA=OFF \ ARROW_BUILD_TESTS=ON \ ARROW_BUILD_TOOLCHAIN=$CONDA_PREFIX \ ARROW_HOME=$CONDA_PREFIX \ diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt index 068a21e..9d03ef1 100644 --- a/cpp/src/gandiva/CMakeLists.txt +++ b/cpp/src/gandiva/CMakeLists.txt @@ -27,17 +27,14 @@ add_dependencies(gandiva-all gandiva gandiva-tests gandiva-benchmarks) find_package(LLVM) -# Set the path where the byte-code files will be installed. -set(GANDIVA_BC_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}/gandiva) +# Set the path where the bitcode file generated, see precompiled/CMakeLists.txt +set(GANDIVA_PRECOMPILED_BC_PATH "${CMAKE_CURRENT_BINARY_DIR}/irhelpers.bc") +set(GANDIVA_PRECOMPILED_CC_PATH "${CMAKE_CURRENT_BINARY_DIR}/precompiled_bitcode.cc") +set(GANDIVA_PRECOMPILED_CC_IN_PATH + "${CMAKE_CURRENT_SOURCE_DIR}/precompiled_bitcode.cc.in") -set(GANDIVA_BC_FILE_NAME irhelpers.bc) -set(GANDIVA_BC_INSTALL_PATH "${GANDIVA_BC_INSTALL_DIR}/${GANDIVA_BC_FILE_NAME}") -set(GANDIVA_BC_OUTPUT_PATH "${CMAKE_CURRENT_BINARY_DIR}/${GANDIVA_BC_FILE_NAME}") -install(FILES ${GANDIVA_BC_OUTPUT_PATH} DESTINATION ${GANDIVA_BC_INSTALL_DIR}) - -set(BC_FILE_PATH_CC "${CMAKE_CURRENT_BINARY_DIR}/bc_file_path.cc") -configure_file(bc_file_path.cc.in ${BC_FILE_PATH_CC}) -add_definitions(-DGANDIVA_BYTE_COMPILE_FILE_PATH="${GANDIVA_BC_OUTPUT_PATH}") +# add_arrow_lib will look for this not yet existing file, so flag as generated +set_source_files_properties(${GANDIVA_PRECOMPILED_CC_PATH} PROPERTIES GENERATED TRUE) set(SRC_FILES annotator.cc @@ -73,7 +70,7 @@ set(SRC_FILES selection_vector.cc tree_expr_builder.cc to_date_holder.cc - ${BC_FILE_PATH_CC}) + ${GANDIVA_PRECOMPILED_CC_PATH}) set(GANDIVA_SHARED_PRIVATE_LINK_LIBS arrow_shared LLVM::LLVM_INTERFACE ${RE2_LIBRARY}) diff --git a/cpp/src/gandiva/configuration.cc b/cpp/src/gandiva/configuration.cc index df1a13d..0c6a33f 100644 --- a/cpp/src/gandiva/configuration.cc +++ b/cpp/src/gandiva/configuration.cc @@ -25,12 +25,11 @@ const std::shared_ptr<Configuration> ConfigurationBuilder::default_configuration InitDefaultConfig(); std::size_t Configuration::Hash() const { - boost::hash<std::string> string_hash; - return string_hash(byte_code_file_path_); + return 0; // dummy for now, no configuration properties yet } bool Configuration::operator==(const Configuration& other) const { - return other.byte_code_file_path() == byte_code_file_path(); + return true; // always true, no configuration properties yet } bool Configuration::operator!=(const Configuration& other) const { diff --git a/cpp/src/gandiva/configuration.h b/cpp/src/gandiva/configuration.h index 480a95e..7fb0d63 100644 --- a/cpp/src/gandiva/configuration.h +++ b/cpp/src/gandiva/configuration.h @@ -26,9 +26,6 @@ namespace gandiva { -GANDIVA_EXPORT -extern const char kByteCodeFilePath[]; - class ConfigurationBuilder; /// \brief runtime config for gandiva /// @@ -38,17 +35,9 @@ class GANDIVA_EXPORT Configuration { public: friend class ConfigurationBuilder; - const std::string& byte_code_file_path() const { return byte_code_file_path_; } - std::size_t Hash() const; bool operator==(const Configuration& other) const; bool operator!=(const Configuration& other) const; - - private: - explicit Configuration(const std::string& byte_code_file_path) - : byte_code_file_path_(byte_code_file_path) {} - - const std::string byte_code_file_path_; }; /// \brief configuration builder for gandiva @@ -57,15 +46,8 @@ class GANDIVA_EXPORT Configuration { /// to override specific values and build a custom instance class GANDIVA_EXPORT ConfigurationBuilder { public: - ConfigurationBuilder() : byte_code_file_path_(kByteCodeFilePath) {} - - ConfigurationBuilder& set_byte_code_file_path(const std::string& byte_code_file_path) { - byte_code_file_path_ = byte_code_file_path; - return *this; - } - std::shared_ptr<Configuration> build() { - std::shared_ptr<Configuration> configuration(new Configuration(byte_code_file_path_)); + std::shared_ptr<Configuration> configuration(new Configuration()); return configuration; } @@ -74,10 +56,8 @@ class GANDIVA_EXPORT ConfigurationBuilder { } private: - std::string byte_code_file_path_; - static std::shared_ptr<Configuration> InitDefaultConfig() { - std::shared_ptr<Configuration> configuration(new Configuration(kByteCodeFilePath)); + std::shared_ptr<Configuration> configuration(new Configuration()); return configuration; } diff --git a/cpp/src/gandiva/engine.cc b/cpp/src/gandiva/engine.cc index c816ec7..22805bb 100644 --- a/cpp/src/gandiva/engine.cc +++ b/cpp/src/gandiva/engine.cc @@ -66,6 +66,9 @@ namespace gandiva { +extern const char kPrecompiledBitcode[]; +extern const size_t kPrecompiledBitcodeSize; + std::once_flag init_once_flag; bool Engine::init_once_done_ = false; @@ -114,7 +117,7 @@ Status Engine::Make(std::shared_ptr<Configuration> config, // Add mappings for functions that can be accessed from LLVM/IR module. engine_obj->AddGlobalMappings(); - auto status = engine_obj->LoadPreCompiledIRFiles(config->byte_code_file_path()); + auto status = engine_obj->LoadPreCompiledIR(); ARROW_RETURN_NOT_OK(status); // Add decimal functions @@ -126,14 +129,16 @@ Status Engine::Make(std::shared_ptr<Configuration> config, } // Handling for pre-compiled IR libraries. -Status Engine::LoadPreCompiledIRFiles(const std::string& byte_code_file_path) { +Status Engine::LoadPreCompiledIR() { + auto bitcode = llvm::StringRef(kPrecompiledBitcode, kPrecompiledBitcodeSize); + /// Read from file into memory buffer. llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> buffer_or_error = - llvm::MemoryBuffer::getFile(byte_code_file_path); - ARROW_RETURN_IF( - !buffer_or_error, - Status::CodeGenError("Could not load module from IR ", byte_code_file_path, ": ", - buffer_or_error.getError().message())); + llvm::MemoryBuffer::getMemBuffer(bitcode, "precompiled", false); + + ARROW_RETURN_IF(!buffer_or_error, + Status::CodeGenError("Could not load module from IR: ", + buffer_or_error.getError().message())); std::unique_ptr<llvm::MemoryBuffer> buffer = move(buffer_or_error.get()); diff --git a/cpp/src/gandiva/engine.h b/cpp/src/gandiva/engine.h index 7a976d5..3b46f62 100644 --- a/cpp/src/gandiva/engine.h +++ b/cpp/src/gandiva/engine.h @@ -79,8 +79,9 @@ class GANDIVA_EXPORT Engine { llvm::ExecutionEngine& execution_engine() { return *execution_engine_.get(); } - /// load pre-compiled IR modules and merge them into the main module. - Status LoadPreCompiledIRFiles(const std::string& byte_code_file_path); + /// load pre-compiled IR modules from precompiled_bitcode.cc and merge them into + /// the main module. + Status LoadPreCompiledIR(); // Create and add mappings for cpp functions that can be accessed from LLVM. void AddGlobalMappings(); diff --git a/cpp/src/gandiva/jni/config_builder.cc b/cpp/src/gandiva/jni/config_builder.cc index b224279..f2694bb 100644 --- a/cpp/src/gandiva/jni/config_builder.cc +++ b/cpp/src/gandiva/jni/config_builder.cc @@ -34,16 +34,8 @@ using gandiva::ConfigurationBuilder; JNIEXPORT jlong JNICALL Java_org_apache_arrow_gandiva_evaluator_ConfigurationBuilder_buildConfigInstance( JNIEnv* env, jobject configuration) { - jstring byte_code_file_path = - (jstring)env->CallObjectMethod(configuration, byte_code_accessor_method_id_, 0); ConfigurationBuilder configuration_builder; - if (byte_code_file_path != nullptr) { - const char* byte_code_file_path_cpp = env->GetStringUTFChars(byte_code_file_path, 0); - configuration_builder.set_byte_code_file_path(byte_code_file_path_cpp); - env->ReleaseStringUTFChars(byte_code_file_path, byte_code_file_path_cpp); - } std::shared_ptr<Configuration> config = configuration_builder.build(); - env->DeleteLocalRef(byte_code_file_path); return ConfigHolder::MapInsert(config); } diff --git a/cpp/src/gandiva/jni/env_helper.h b/cpp/src/gandiva/jni/env_helper.h index c1bfbaa..1b56304 100644 --- a/cpp/src/gandiva/jni/env_helper.h +++ b/cpp/src/gandiva/jni/env_helper.h @@ -23,7 +23,4 @@ // class references extern jclass configuration_builder_class_; -// method references -extern jmethodID byte_code_accessor_method_id_; - #endif // ENV_HELPER_H diff --git a/cpp/src/gandiva/jni/jni_common.cc b/cpp/src/gandiva/jni/jni_common.cc index 339b0cd..1ba3aad 100644 --- a/cpp/src/gandiva/jni/jni_common.cc +++ b/cpp/src/gandiva/jni/jni_common.cc @@ -68,7 +68,6 @@ static jint JNI_VERSION = JNI_VERSION_1_6; // extern refs - initialized for other modules. jclass configuration_builder_class_; -jmethodID byte_code_accessor_method_id_; // refs for self. static jclass gandiva_exception_; @@ -93,11 +92,6 @@ jint JNI_OnLoad(JavaVM* vm, void* reserved) { gandiva_exception_ = (jclass)env->NewGlobalRef(localExceptionClass); env->DeleteLocalRef(localExceptionClass); - const char method_name[] = "getByteCodeFilePath"; - const char return_type[] = "()Ljava/lang/String;"; - byte_code_accessor_method_id_ = - env->GetMethodID(configuration_builder_class_, method_name, return_type); - env->ExceptionDescribe(); return JNI_VERSION; diff --git a/cpp/src/gandiva/precompiled/CMakeLists.txt b/cpp/src/gandiva/precompiled/CMakeLists.txt index 8e2f498..1249000 100644 --- a/cpp/src/gandiva/precompiled/CMakeLists.txt +++ b/cpp/src/gandiva/precompiled/CMakeLists.txt @@ -67,12 +67,32 @@ foreach(SRC_FILE ${PRECOMPILED_SRCS}) endforeach() # link all of the bitcode files into a single bitcode file. -add_custom_command(OUTPUT ${GANDIVA_BC_OUTPUT_PATH} - COMMAND ${LLVM_LINK_EXECUTABLE} -o ${GANDIVA_BC_OUTPUT_PATH} +add_custom_command(OUTPUT ${GANDIVA_PRECOMPILED_BC_PATH} + COMMAND ${LLVM_LINK_EXECUTABLE} -o ${GANDIVA_PRECOMPILED_BC_PATH} ${BC_FILES} DEPENDS ${BC_FILES}) -add_custom_target(precompiled ALL DEPENDS ${GANDIVA_BC_OUTPUT_PATH}) +# write a cmake script to replace precompiled bitcode file's content into a .cc file +file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/WritePrecompiledCC.cmake" "\ + file(READ \"${GANDIVA_PRECOMPILED_BC_PATH}\" HEXCONTENT HEX) + string(REGEX REPLACE + \"([0-9a-f][0-9a-f])\" + \"'\\\\\\\\x\\\\1',\" + GANDIVA_PRECOMPILED_BITCODE_CHARS + \"\${HEXCONTENT}\") + configure_file(${GANDIVA_PRECOMPILED_CC_IN_PATH} + ${GANDIVA_PRECOMPILED_CC_PATH}) +") + +# add the previous command to the execution chain +add_custom_command(OUTPUT ${GANDIVA_PRECOMPILED_CC_PATH} + COMMAND ${CMAKE_COMMAND} -P + "${CMAKE_CURRENT_BINARY_DIR}/WritePrecompiledCC.cmake" + DEPENDS ${GANDIVA_PRECOMPILED_CC_IN_PATH} + ${GANDIVA_PRECOMPILED_BC_PATH}) + +add_custom_target(precompiled ALL + DEPENDS ${GANDIVA_PRECOMPILED_BC_PATH} ${GANDIVA_PRECOMPILED_CC_PATH}) function(add_precompiled_unit_test REL_TEST_NAME) get_filename_component(TEST_NAME ${REL_TEST_NAME} NAME_WE) diff --git a/cpp/src/gandiva/bc_file_path.cc.in b/cpp/src/gandiva/precompiled_bitcode.cc.in similarity index 79% rename from cpp/src/gandiva/bc_file_path.cc.in rename to cpp/src/gandiva/precompiled_bitcode.cc.in index 54e81ca..1d5985a 100644 --- a/cpp/src/gandiva/bc_file_path.cc.in +++ b/cpp/src/gandiva/precompiled_bitcode.cc.in @@ -15,9 +15,12 @@ // specific language governing permissions and limitations // under the License. +#include <string> + namespace gandiva { -// Path to the byte-code file. -extern const char kByteCodeFilePath[] = "${GANDIVA_BC_INSTALL_PATH}"; +// Content of precompiled bitcode file. +extern const char kPrecompiledBitcode[] = { ${GANDIVA_PRECOMPILED_BITCODE_CHARS} }; +extern const size_t kPrecompiledBitcodeSize = sizeof(kPrecompiledBitcode); } // namespace gandiva diff --git a/cpp/src/gandiva/tests/projector_test.cc b/cpp/src/gandiva/tests/projector_test.cc index ba0e632..8c0b9dd 100644 --- a/cpp/src/gandiva/tests/projector_test.cc +++ b/cpp/src/gandiva/tests/projector_test.cc @@ -84,21 +84,6 @@ TEST_F(TestProjector, TestProjectCache) { &cached_projector); ASSERT_OK(status); EXPECT_EQ(cached_projector, projector); - - // if configuration is different, should return a new projector. - - // build a new path by replacing the first '/' with '//' - std::string alt_path(GANDIVA_BYTE_COMPILE_FILE_PATH); - auto pos = alt_path.find('/', 0); - EXPECT_NE(pos, std::string::npos); - alt_path.replace(pos, 1, "//"); - auto other_configuration = - ConfigurationBuilder().set_byte_code_file_path(alt_path).build(); - std::shared_ptr<Projector> should_be_new_projector2; - status = Projector::Make(schema, {sum_expr, sub_expr}, other_configuration, - &should_be_new_projector2); - ASSERT_OK(status); - EXPECT_NE(projector, should_be_new_projector2); } TEST_F(TestProjector, TestProjectCacheFieldNames) { diff --git a/cpp/src/gandiva/tests/test_util.h b/cpp/src/gandiva/tests/test_util.h index af16412..e841e6a 100644 --- a/cpp/src/gandiva/tests/test_util.h +++ b/cpp/src/gandiva/tests/test_util.h @@ -89,8 +89,7 @@ static ArrayPtr MakeArrowTypeArray(const std::shared_ptr<arrow::DataType>& type, std::shared_ptr<Configuration> TestConfiguration() { auto builder = ConfigurationBuilder(); - builder.set_byte_code_file_path(GANDIVA_BYTE_COMPILE_FILE_PATH); - return builder.build(); + return builder.DefaultConfiguration(); } } // namespace gandiva diff --git a/dev/tasks/python-wheels/linux-test.sh b/dev/tasks/python-wheels/linux-test.sh index 45efdb0..dcd77eb 100755 --- a/dev/tasks/python-wheels/linux-test.sh +++ b/dev/tasks/python-wheels/linux-test.sh @@ -24,17 +24,17 @@ pip install /arrow/python/manylinux1/dist/*.whl python --version # Test optional dependencies -command=" +python -c " import sys import pyarrow import pyarrow.orc import pyarrow.parquet import pyarrow.plasma +import tensorflow if sys.version_info.major > 2: import pyarrow.gandiva " -python -c "$command" # Run pyarrow tests pip install -r /arrow/python/requirements-test.txt diff --git a/dev/tasks/python-wheels/travis.linux.yml b/dev/tasks/python-wheels/travis.linux.yml index b5cbc65..240c523 100644 --- a/dev/tasks/python-wheels/travis.linux.yml +++ b/dev/tasks/python-wheels/travis.linux.yml @@ -41,10 +41,12 @@ script: - pushd arrow/python/manylinux1 - docker run --shm-size=2g -e SETUPTOOLS_SCM_PRETEND_VERSION={{ arrow.no_rc_version }} - -e PYTHON_VERSIONS="{{ python_version }},{{ unicode_width }}" + -e PYTHON_VERSION="{{ python_version }}" + -e UNICODE_WIDTH="{{ unicode_width }}" -v $PWD:/io -v $PWD/../../:/arrow - quay.io/xhochy/arrow_manylinux1_x86_64_base:latest /io/build_arrow.sh + quay.io/xhochy/arrow_manylinux1_x86_64_base:llvm-7-manylinux1 + /io/build_arrow.sh - popd # test on multiple distributions diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml index a090153..3469c9f 100644 --- a/java/gandiva/pom.xml +++ b/java/gandiva/pom.xml @@ -29,7 +29,7 @@ <protobuf.version>2.5.0</protobuf.version> <dep.guava.version>18.0</dep.guava.version> <checkstyle.failOnViolation>true</checkstyle.failOnViolation> - <gandiva.cpp.build.dir>../../cpp/debug/debug</gandiva.cpp.build.dir> + <gandiva.cpp.build.dir>../../cpp/build/debug</gandiva.cpp.build.dir> </properties> <dependencies> <dependency> @@ -126,18 +126,6 @@ <include>**/libgandiva_jni.*</include> </includes> </resource> - <resource> - <directory>${gandiva.cpp.build.dir}</directory> - <includes> - <include>**/libgandiva_helpers.*</include> - </includes> - </resource> - <resource> - <directory>${gandiva.cpp.build.dir}/../src/gandiva</directory> - <includes> - <include>irhelpers.bc</include> - </includes> - </resource> </resources> <extensions> diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java index 46deee9..f0e8c0d 100644 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/ConfigurationBuilder.java @@ -22,18 +22,8 @@ package org.apache.arrow.gandiva.evaluator; */ public class ConfigurationBuilder { - private String byteCodeFilePath = ""; - - public ConfigurationBuilder withByteCodeFilePath(final String byteCodeFilePath) { - this.byteCodeFilePath = byteCodeFilePath; - return this; - } - - public String getByteCodeFilePath() { - return byteCodeFilePath; - } - public native long buildConfigInstance(); public native void releaseConfigInstance(long configId); + } diff --git a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java index ccb5307..b6174d7 100644 --- a/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java +++ b/java/gandiva/src/main/java/org/apache/arrow/gandiva/evaluator/JniLoader.java @@ -32,16 +32,13 @@ import org.apache.arrow.gandiva.exceptions.GandivaException; */ class JniLoader { private static final String LIBRARY_NAME = "gandiva_jni"; - private static final String IRHELPERS_BC = "irhelpers.bc"; private static volatile JniLoader INSTANCE; private static volatile long defaultConfiguration = 0L; - private final String byteCodeFilePath; private final JniWrapper wrapper; - private JniLoader(String byteCodeFilePath) { - this.byteCodeFilePath = byteCodeFilePath; + private JniLoader() { this.wrapper = new JniWrapper(); } @@ -60,8 +57,7 @@ class JniLoader { try { String tempDir = System.getProperty("java.io.tmpdir"); loadGandivaLibraryFromJar(tempDir); - File byteCodeFile = moveFileFromJarToTemp(tempDir, IRHELPERS_BC); - return new JniLoader(byteCodeFile.getAbsolutePath()); + return new JniLoader(); } catch (IOException ioException) { throw new GandivaException("unable to create native instance", ioException); } @@ -108,13 +104,6 @@ class JniLoader { } /** - * Returns the byte code file path extracted from jar. - */ - public String getByteCodeFilePath() { - return byteCodeFilePath; - } - - /** * Returns the jni wrapper. */ JniWrapper getWrapper() throws GandivaException { @@ -130,11 +119,9 @@ class JniLoader { if (defaultConfiguration == 0L) { synchronized (ConfigurationBuilder.class) { if (defaultConfiguration == 0L) { - String defaultByteCodeFilePath = JniLoader.getInstance().getByteCodeFilePath(); - + JniLoader.getInstance(); // setup defaultConfiguration = new ConfigurationBuilder() - .withByteCodeFilePath(defaultByteCodeFilePath) - .buildConfigInstance(); + .buildConfigInstance(); } } } diff --git a/python/manylinux1/build_arrow.sh b/python/manylinux1/build_arrow.sh index 75537a6..e0475bb 100755 --- a/python/manylinux1/build_arrow.sh +++ b/python/manylinux1/build_arrow.sh @@ -23,9 +23,6 @@ # Build upon the scripts in https://github.com/matthew-brett/manylinux-builds # * Copyright (c) 2013-2016, Matt Terry and Matthew Brett (BSD 2-clause) -# Build different python versions with various unicode widths -PYTHON_VERSIONS="${PYTHON_VERSIONS:-2.7,16 2.7,32 3.5,16 3.6,16 3.7,16}" - source /multibuild/manylinux_utils.sh # Quit on failure @@ -48,95 +45,89 @@ export PYARROW_CMAKE_OPTIONS='-DTHRIFT_HOME=/usr -DBoost_NAMESPACE=arrow_boost - # Ensure the target directory exists mkdir -p /io/dist -for PYTHON_TUPLE in ${PYTHON_VERSIONS}; do - IFS="," - set -- $PYTHON_TUPLE; - PYTHON=$1 - U_WIDTH=$2 - CPYTHON_PATH="$(cpython_path $PYTHON ${U_WIDTH})" - PYTHON_INTERPRETER="${CPYTHON_PATH}/bin/python" - PIP="${CPYTHON_PATH}/bin/pip" - PATH="$PATH:${CPYTHON_PATH}" - - if [ $PYTHON != "2.7" ]; then - # Gandiva is not supported on Python 2.7 - export PYARROW_WITH_GANDIVA=1 - export BUILD_ARROW_GANDIVA=ON - else - export PYARROW_WITH_GANDIVA=0 - export BUILD_ARROW_GANDIVA=OFF - fi - - echo "=== (${PYTHON}) Building Arrow C++ libraries ===" - ARROW_BUILD_DIR=/tmp/build-PY${PYTHON}-${U_WIDTH} - mkdir -p "${ARROW_BUILD_DIR}" - pushd "${ARROW_BUILD_DIR}" - PATH="${CPYTHON_PATH}/bin:$PATH" cmake -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_INSTALL_PREFIX=/arrow-dist \ - -DCMAKE_INSTALL_LIBDIR=lib \ - -DARROW_BUILD_TESTS=OFF \ - -DARROW_BUILD_SHARED=ON \ - -DARROW_BOOST_USE_SHARED=ON \ - -DARROW_GANDIVA_PC_CXX_FLAGS="-isystem;/opt/rh/devtoolset-2/root/usr/include/c++/4.8.2;-isystem;/opt/rh/devtoolset-2/root/usr/include/c++/4.8.2/x86_64-CentOS-linux/" \ - -DARROW_JEMALLOC=ON \ - -DARROW_RPATH_ORIGIN=ON \ - -DARROW_PYTHON=ON \ - -DARROW_PARQUET=ON \ - -DPythonInterp_FIND_VERSION=${PYTHON} \ - -DARROW_PLASMA=ON \ - -DARROW_TENSORFLOW=ON \ - -DARROW_ORC=ON \ - -DARROW_GANDIVA=${BUILD_ARROW_GANDIVA} \ - -DARROW_GANDIVA_JAVA=OFF \ - -DBoost_NAMESPACE=arrow_boost \ - -DBOOST_ROOT=/arrow_boost_dist \ - -GNinja /arrow/cpp - ninja install - popd - - # Check that we don't expose any unwanted symbols - /io/scripts/check_arrow_visibility.sh - - echo "=== (${PYTHON}) Install the wheel build dependencies ===" - $PIP install -r requirements-wheel.txt - - # Clear output directory - rm -rf dist/ - echo "=== (${PYTHON}) Building wheel ===" - # Remove build directory to ensure CMake gets a clean run - rm -rf build/ - PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py build_ext \ - --inplace \ - --bundle-arrow-cpp \ - --bundle-boost \ - --boost-namespace=arrow_boost - PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py bdist_wheel - PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py sdist - - echo "=== (${PYTHON}) Tag the wheel with manylinux1 ===" - mkdir -p repaired_wheels/ - auditwheel -v repair -L . dist/pyarrow-*.whl -w repaired_wheels/ - - echo "=== (${PYTHON}) Testing manylinux1 wheel ===" - source /venv-test-${PYTHON}-${U_WIDTH}/bin/activate - pip install repaired_wheels/*.whl - - if [ $PYTHON != "2.7" ]; then - PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER -c "import pyarrow.gandiva" - fi - PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER -c "import pyarrow.orc" - PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER -c "import pyarrow.parquet" - PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER -c "import pyarrow.plasma" - - echo "=== (${PYTHON}) Install modules required for testing ===" - pip install -r requirements-test.txt - - # The TensorFlow test will be skipped here, since TensorFlow is not - # manylinux1 compatible; however, the wheels will support TensorFlow on - # a TensorFlow compatible system - py.test -v -r sxX --durations=15 --parquet ${VIRTUAL_ENV}/lib/*/site-packages/pyarrow - deactivate - - mv repaired_wheels/*.whl /io/dist - mv dist/*.tar.gz /io/dist -done +# Must pass PYTHON_VERSION and UNICODE_WIDTH env variables +# possible values are: 2.7,16 2.7,32 3.5,16 3.6,16 3.7,16 + +CPYTHON_PATH="$(cpython_path ${PYTHON_VERSION} ${UNICODE_WIDTH})" +PYTHON_INTERPRETER="${CPYTHON_PATH}/bin/python" +PIP="${CPYTHON_PATH}/bin/pip" +PATH="${PATH}:${CPYTHON_PATH}" + +if [ ${PYTHON_VERSION} != "2.7" ]; then + # Gandiva is not supported on Python 2.7 + export PYARROW_WITH_GANDIVA=1 + export BUILD_ARROW_GANDIVA=ON +else + export PYARROW_WITH_GANDIVA=0 + export BUILD_ARROW_GANDIVA=OFF +fi + +echo "=== (${PYTHON_VERSION}) Building Arrow C++ libraries ===" +ARROW_BUILD_DIR=/tmp/build-PY${PYTHON_VERSION}-${UNICODE_WIDTH} +mkdir -p "${ARROW_BUILD_DIR}" +pushd "${ARROW_BUILD_DIR}" +PATH="${CPYTHON_PATH}/bin:${PATH}" cmake -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_INSTALL_PREFIX=/arrow-dist \ + -DCMAKE_INSTALL_LIBDIR=lib \ + -DARROW_BUILD_TESTS=OFF \ + -DARROW_BUILD_SHARED=ON \ + -DARROW_BOOST_USE_SHARED=ON \ + -DARROW_GANDIVA_PC_CXX_FLAGS="-isystem;/opt/rh/devtoolset-2/root/usr/include/c++/4.8.2;-isystem;/opt/rh/devtoolset-2/root/usr/include/c++/4.8.2/x86_64-CentOS-linux/" \ + -DARROW_JEMALLOC=ON \ + -DARROW_RPATH_ORIGIN=ON \ + -DARROW_PYTHON=ON \ + -DARROW_PARQUET=ON \ + -DPythonInterp_FIND_VERSION=${PYTHON_VERSION} \ + -DARROW_PLASMA=ON \ + -DARROW_TENSORFLOW=ON \ + -DARROW_ORC=ON \ + -DARROW_GANDIVA=${BUILD_ARROW_GANDIVA} \ + -DARROW_GANDIVA_JAVA=OFF \ + -DBoost_NAMESPACE=arrow_boost \ + -DBOOST_ROOT=/arrow_boost_dist \ + -GNinja /arrow/cpp +ninja install +popd + +# Check that we don't expose any unwanted symbols +/io/scripts/check_arrow_visibility.sh + +echo "=== (${PYTHON_VERSION}) Install the wheel build dependencies ===" +$PIP install -r requirements-wheel.txt + +# Clear output directory +rm -rf dist/ +echo "=== (${PYTHON_VERSION}) Building wheel ===" +# Remove build directory to ensure CMake gets a clean run +rm -rf build/ +PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py build_ext \ + --inplace \ + --bundle-arrow-cpp \ + --bundle-boost \ + --boost-namespace=arrow_boost +PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py bdist_wheel +PATH="$PATH:${CPYTHON_PATH}/bin" $PYTHON_INTERPRETER setup.py sdist + +echo "=== (${PYTHON_VERSION}) Tag the wheel with manylinux1 ===" +mkdir -p repaired_wheels/ +auditwheel -v repair -L . dist/pyarrow-*.whl -w repaired_wheels/ + +# Install the built wheels +$PIP install repaired_wheels/*.whl + +# Test that the modules are importable +$PYTHON_INTERPRETER -c " +import sys +import pyarrow +import pyarrow.orc +import pyarrow.parquet +import pyarrow.plasma + +if sys.version_info.major > 2: + import pyarrow.gandiva +" + +# More thorough testing happens outsite of the build to prevent +# packaging issues like ARROW-4372 +mv dist/*.tar.gz /io/dist +mv repaired_wheels/*.whl /io/dist