Repository: arrow Updated Branches: refs/heads/master ff6c6e0f9 -> 8841bc071
ARROW-1281: [C++/Python] Add Docker setup for testing HDFS IO in C++ and Python We aren't testing this in Travis CI because spinning up an HDFS cluster is a bit heavy weight, but this will at least enable us to do easier ongoing validation that this functionality is working properly. Author: Wes McKinney <[email protected]> Closes #895 from wesm/ARROW-1281 and squashes the following commits: a96e1665 [Wes McKinney] Fix header 4effee78 [Wes McKinney] Fix license header d12eea48 [Wes McKinney] Fix license headers 591e7c6b [Wes McKinney] Add Python tests bbbd8c10 [Wes McKinney] Docker HDFS testing scripts, use hdfs-client.xml from Apache HAWQ (incubating) Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/8841bc07 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/8841bc07 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/8841bc07 Branch: refs/heads/master Commit: 8841bc071b1d0a3eff2592af5ca9b5591ed9e5c5 Parents: ff6c6e0 Author: Wes McKinney <[email protected]> Authored: Fri Jul 28 10:30:57 2017 -0400 Committer: Wes McKinney <[email protected]> Committed: Fri Jul 28 10:30:57 2017 -0400 ---------------------------------------------------------------------- python/testing/README.md | 26 ++ python/testing/functions.sh | 100 ++++++ python/testing/hdfs/Dockerfile | 50 +++ python/testing/hdfs/libhdfs3-hdfs-client.xml | 332 +++++++++++++++++++ python/testing/hdfs/restart_docker_container.sh | 38 +++ python/testing/hdfs/run_tests.sh | 41 +++ python/testing/set_env_common.sh | 70 ++++ python/testing/setup_toolchain.sh | 65 ++++ python/testing/test_hdfs.sh | 25 ++ 9 files changed, 747 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/8841bc07/python/testing/README.md ---------------------------------------------------------------------- diff --git a/python/testing/README.md b/python/testing/README.md new file mode 100644 index 0000000..07970a2 --- /dev/null +++ b/python/testing/README.md @@ -0,0 +1,26 @@ +<!--- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> + +# Testing tools for odds and ends + +## Testing HDFS file interface + +```shell +./test_hdfs.sh +``` \ No newline at end of file http://git-wip-us.apache.org/repos/asf/arrow/blob/8841bc07/python/testing/functions.sh ---------------------------------------------------------------------- diff --git a/python/testing/functions.sh b/python/testing/functions.sh new file mode 100644 index 0000000..6bc342b --- /dev/null +++ b/python/testing/functions.sh @@ -0,0 +1,100 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +use_gcc() { + export CC=gcc-4.9 + export CXX=g++-4.9 +} + +use_clang() { + export CC=clang-4.0 + export CXX=clang++-4.0 +} + +bootstrap_python_env() { + PYTHON_VERSION=$1 + CONDA_ENV_DIR=$BUILD_DIR/pyarrow-test-$PYTHON_VERSION + + conda create -y -q -p $CONDA_ENV_DIR python=$PYTHON_VERSION cmake curl + source activate $CONDA_ENV_DIR + + python --version + which python + + # faster builds, please + conda install -y -q nomkl pip numpy pandas cython +} + +build_pyarrow() { + # Other stuff pip install + pushd $ARROW_PYTHON_DIR + pip install -r requirements.txt + python setup.py build_ext --with-parquet --with-plasma \ + install --single-version-externally-managed --record=record.text + popd + + python -c "import pyarrow.parquet" + python -c "import pyarrow.plasma" + + export PYARROW_PATH=$CONDA_PREFIX/lib/python$PYTHON_VERSION/site-packages/pyarrow +} + +build_arrow() { + mkdir -p $ARROW_CPP_BUILD_DIR + pushd $ARROW_CPP_BUILD_DIR + + cmake -GNinja \ + -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ + -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \ + -DARROW_NO_DEPRECATED_API=ON \ + -DARROW_PYTHON=ON \ + -DARROW_PLASMA=ON \ + -DARROW_BOOST_USE_SHARED=off \ + $ARROW_CPP_DIR + + ninja + ninja install + popd +} + +build_parquet() { + PARQUET_DIR=$BUILD_DIR/parquet + mkdir -p $PARQUET_DIR + + git clone https://github.com/apache/parquet-cpp.git $PARQUET_DIR + + pushd $PARQUET_DIR + mkdir build-dir + cd build-dir + + cmake \ + -GNinja \ + -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ + -DCMAKE_INSTALL_PREFIX=$PARQUET_HOME \ + -DPARQUET_BOOST_USE_SHARED=off \ + -DPARQUET_BUILD_BENCHMARKS=off \ + -DPARQUET_BUILD_EXECUTABLES=off \ + -DPARQUET_BUILD_TESTS=off \ + .. + + ninja + ninja install + + popd +} http://git-wip-us.apache.org/repos/asf/arrow/blob/8841bc07/python/testing/hdfs/Dockerfile ---------------------------------------------------------------------- diff --git a/python/testing/hdfs/Dockerfile b/python/testing/hdfs/Dockerfile new file mode 100644 index 0000000..9735513 --- /dev/null +++ b/python/testing/hdfs/Dockerfile @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# TODO Replace this with a complete clean image build +FROM cpcloud86/impala:metastore + +USER root + +RUN apt-add-repository -y ppa:ubuntu-toolchain-r/test && \ + apt-get update && \ + apt-get install -y \ + gcc-4.9 \ + g++-4.9 \ + build-essential \ + autotools-dev \ + autoconf \ + gtk-doc-tools \ + autoconf-archive \ + libgirepository1.0-dev \ + libtool \ + libjemalloc-dev \ + ccache \ + valgrind \ + gdb + +RUN wget -O - http://llvm.org/apt/llvm-snapshot.gpg.key|sudo apt-key add - && \ + apt-add-repository -y \ + "deb http://llvm.org/apt/trusty/ llvm-toolchain-trusty-4.0 main" && \ + apt-get update && \ + apt-get install -y clang-4.0 clang-format-4.0 clang-tidy-4.0 + +USER ubuntu + +RUN wget -O /tmp/miniconda.sh https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ + bash /tmp/miniconda.sh -b -p /home/ubuntu/miniconda && \ + rm /tmp/miniconda.sh http://git-wip-us.apache.org/repos/asf/arrow/blob/8841bc07/python/testing/hdfs/libhdfs3-hdfs-client.xml ---------------------------------------------------------------------- diff --git a/python/testing/hdfs/libhdfs3-hdfs-client.xml b/python/testing/hdfs/libhdfs3-hdfs-client.xml new file mode 100644 index 0000000..f929929 --- /dev/null +++ b/python/testing/hdfs/libhdfs3-hdfs-client.xml @@ -0,0 +1,332 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +<!-- From Apache HAWQ (incubating) --> + +<configuration> + +<!-- KDC + <property> + <name>hadoop.security.authentication</name> + <value>kerberos</value> + </property> +KDC --> + +<!-- HA + <property> + <name>dfs.nameservices</name> + <value>phdcluster</value> + </property> + + <property> + <name>dfs.ha.namenodes.phdcluster</name> + <value>nn1,nn2</value> + </property>6 + + <property> + <name>dfs.namenode.rpc-address.phdcluster.nn1</name> + <value>mdw:9000</value> + </property> + + <property> + <name>dfs.namenode.rpc-address.phdcluster.nn2</name> + <value>smdw:9000</value> + </property> + + <property> + <name>dfs.namenode.http-address.phdcluster.nn1</name> + <value>mdw:50070</value> + </property> + + <property> + <name>dfs.namenode.http-address.phdcluster.nn2</name> + <value>smdw:50070</value> + </property> + +HA --> + + <!-- RPC client configuration --> + <property> + <name>rpc.client.timeout</name> + <value>3600000</value> + <description> + timeout interval of a RPC invocation in millisecond. default is 3600000. + </description> + </property> + <property> + <name>rpc.client.connect.tcpnodelay</name> + <value>true</value> + <description> + whether set socket TCP_NODELAY to true when connect to RPC server. default is true. + </description> + </property> + + <property> + <name>rpc.client.max.idle</name> + <value>10000</value> + <description> + the max idle time of a RPC connection in millisecond. default is 10000. + </description> + </property> + + <property> + <name>rpc.client.ping.interval</name> + <value>10000</value> + <description> + the interval which the RPC client send a heart beat to server. 0 means disable, default is 10000. + </description> + </property> + + <property> + <name>rpc.client.connect.timeout</name> + <value>600000</value> + <description> + the timeout interval in millisecond when the RPC client is trying to setup the connection. default is 600000. + </description> + </property> + + <property> + <name>rpc.client.connect.retry</name> + <value>10</value> + <description> + the max retry times if the RPC client fail to setup the connection to server. default is 10. + </description> + </property> + + <property> + <name>rpc.client.read.timeout</name> + <value>3600000</value> + <description> + the timeout interval in millisecond when the RPC client is trying to read from server. default is 3600000. + </description> + </property> + + <property> + <name>rpc.client.write.timeout</name> + <value>3600000</value> + <description> + the timeout interval in millisecond when the RPC client is trying to write to server. default is 3600000. + </description> + </property> + + <property> + <name>rpc.client.socket.linger.timeout</name> + <value>-1</value> + <description> + set value to socket SO_LINGER when connect to RPC server. -1 means default OS value. default is -1. + </description> + </property> + + <!-- dfs client configuration --> + <property> + <name>dfs.client.read.shortcircuit</name> + <value>false</value> + <description> + whether reading block file bypass datanode if the block and the client are on the same node. default is true. + </description> + </property> + + <property> + <name>dfs.default.replica</name> + <value>1</value> + <description> + the default number of replica. default is 3. + </description> + </property> + + <property> + <name>dfs.prefetchsize</name> + <value>10</value> + <description> + the default number of blocks which information will be prefetched. default is 10. + </description> + </property> + + <property> + <name>dfs.client.failover.max.attempts</name> + <value>15</value> + <description> + if multiply namenodes are configured, it is the max retry times when the dfs client try to issue a RPC call. default is 15. + </description> + </property> + + <property> + <name>dfs.default.blocksize</name> + <value>134217728</value> + <description> + default block size. default is 134217728. + </description> + </property> + + <property> + <name>dfs.client.log.severity</name> + <value>INFO</value> + <description> + the minimal log severity level, valid values include FATAL, ERROR, INFO, DEBUG1, DEBUG2, DEBUG3. default is INFO. + </description> + </property> + + <!-- input client configuration --> + <property> + <name>input.connect.timeout</name> + <value>600000</value> + <description> + the timeout interval in millisecond when the input stream is trying to setup the connection to datanode. default is 600000. + </description> + </property> + + <property> + <name>input.read.timeout</name> + <value>3600000</value> + <description> + the timeout interval in millisecond when the input stream is trying to read from datanode. default is 3600000. + </description> + </property> + + <property> + <name>input.write.timeout</name> + <value>3600000</value> + <description> + the timeout interval in millisecond when the input stream is trying to write to datanode. default is 3600000. + </description> + </property> + + <property> + <name>input.localread.default.buffersize</name> + <value>2097152</value> + <description> + number of bytes of the buffer which is used to hold the data from block file and verify checksum. + it is only used when "dfs.client.read.shortcircuit" is set to true. default is 1048576. + </description> + </property> + + <property> + <name>input.localread.blockinfo.cachesize</name> + <value>1000</value> + <description> + the size of block file path information cache. default is 1000. + </description> + </property> + + <property> + <name>input.read.getblockinfo.retry</name> + <value>3</value> + <description> + the max retry times when the client fail to get block information from namenode. default is 3. + </description> + </property> + + <!-- output client configuration --> + <property> + <name>output.replace-datanode-on-failure</name> + <value>false</value> + <description> + whether the client add new datanode into pipeline if the number of nodes in pipeline is less the specified number of replicas. default is false. + </description> + </property> + + <property> + <name>output.default.chunksize</name> + <value>512</value> + <description> + the number of bytes of a chunk in pipeline. default is 512. + </description> + </property> + + <property> + <name>output.default.packetsize</name> + <value>65536</value> + <description> + the number of bytes of a packet in pipeline. default is 65536. + </description> + </property> + + <property> + <name>output.default.write.retry</name> + <value>10</value> + <description> + the max retry times when the client fail to setup the pipeline. default is 10. + </description> + </property> + + <property> + <name>output.connect.timeout</name> + <value>600000</value> + <description> + the timeout interval in millisecond when the output stream is trying to setup the connection to datanode. default is 600000. + </description> + </property> + + <property> + <name>output.read.timeout</name> + <value>3600000</value> + <description> + the timeout interval in millisecond when the output stream is trying to read from datanode. default is 3600000. + </description> + </property> + + <property> + <name>output.write.timeout</name> + <value>3600000</value> + <description> + the timeout interval in millisecond when the output stream is trying to write to datanode. default is 3600000. + </description> + </property> + + <property> + <name>output.packetpool.size</name> + <value>1024</value> + <description> + the max number of packets in a file's packet pool. default is 1024. + </description> + </property> + + <property> + <name>output.close.timeout</name> + <value>900000</value> + <description> + the timeout interval in millisecond when close an output stream. default is 900000. + </description> + </property> + + <property> + <name>dfs.domain.socket.path</name> + <value>/var/lib/hadoop-hdfs/dn_socket</value> + <description> + Optional. This is a path to a UNIX domain socket that will be used for + communication between the DataNode and local HDFS clients. + If the string "_PORT" is present in this path, it will be replaced by the + TCP port of the DataNode. + </description> + </property> + + <property> + <name>dfs.client.use.legacy.blockreader.local</name> + <value>false</value> + <description> + Legacy short-circuit reader implementation based on HDFS-2246 is used + if this configuration parameter is true. + This is for the platforms other than Linux + where the new implementation based on HDFS-347 is not available. + </description> + </property> + +</configuration> http://git-wip-us.apache.org/repos/asf/arrow/blob/8841bc07/python/testing/hdfs/restart_docker_container.sh ---------------------------------------------------------------------- diff --git a/python/testing/hdfs/restart_docker_container.sh b/python/testing/hdfs/restart_docker_container.sh new file mode 100644 index 0000000..15076cc --- /dev/null +++ b/python/testing/hdfs/restart_docker_container.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +export ARROW_TEST_NN_HOST=arrow-hdfs +export ARROW_TEST_IMPALA_HOST=$ARROW_TEST_NN_HOST +export ARROW_TEST_IMPALA_PORT=21050 +export ARROW_TEST_WEBHDFS_PORT=50070 +export ARROW_TEST_WEBHDFS_USER=ubuntu + +docker stop $ARROW_TEST_NN_HOST +docker rm $ARROW_TEST_NN_HOST + +docker run -d -it --name $ARROW_TEST_NN_HOST \ + -v $PWD:/io \ + --hostname $ARROW_TEST_NN_HOST \ + --shm-size=2gb \ + -p $ARROW_TEST_WEBHDFS_PORT -p $ARROW_TEST_IMPALA_PORT \ + arrow-hdfs-test + +while ! docker exec $ARROW_TEST_NN_HOST impala-shell -q 'SELECT VERSION()'; do + sleep 1 +done http://git-wip-us.apache.org/repos/asf/arrow/blob/8841bc07/python/testing/hdfs/run_tests.sh ---------------------------------------------------------------------- diff --git a/python/testing/hdfs/run_tests.sh b/python/testing/hdfs/run_tests.sh new file mode 100755 index 0000000..e0d36df --- /dev/null +++ b/python/testing/hdfs/run_tests.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +HERE=$(cd `dirname "${BASH_SOURCE[0]:-$0}"` && pwd) + +source $HERE/../set_env_common.sh +source $HERE/../setup_toolchain.sh +source $HERE/../functions.sh + +git clone https://github.com/apache/arrow.git $ARROW_CHECKOUT + +use_clang + +bootstrap_python_env 3.6 + +build_arrow +build_parquet + +build_pyarrow + +$ARROW_CPP_BUILD_DIR/debug/io-hdfs-test + +python -m pytest -vv -r sxX -s $PYARROW_PATH --parquet --hdfs http://git-wip-us.apache.org/repos/asf/arrow/blob/8841bc07/python/testing/set_env_common.sh ---------------------------------------------------------------------- diff --git a/python/testing/set_env_common.sh b/python/testing/set_env_common.sh new file mode 100644 index 0000000..00251f9 --- /dev/null +++ b/python/testing/set_env_common.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +export MINICONDA=$HOME/miniconda +export CPP_TOOLCHAIN=$HOME/cpp-toolchain + +export PATH="$MINICONDA/bin:$PATH" +export CONDA_PKGS_DIRS=$HOME/.conda_packages + +export ARROW_CHECKOUT=$HOME/arrow +export BUILD_DIR=$ARROW_CHECKOUT + +export BUILD_OS_NAME=linux +export BUILD_TYPE=debug + +export ARROW_CPP_DIR=$BUILD_DIR/cpp +export ARROW_PYTHON_DIR=$BUILD_DIR/python +export ARROW_C_GLIB_DIR=$BUILD_DIR/c_glib +export ARROW_JAVA_DIR=${BUILD_DIR}/java +export ARROW_JS_DIR=${BUILD_DIR}/js +export ARROW_INTEGRATION_DIR=$BUILD_DIR/integration + +export CPP_BUILD_DIR=$BUILD_DIR/cpp-build + +export ARROW_CPP_INSTALL=$BUILD_DIR/cpp-install +export ARROW_CPP_BUILD_DIR=$BUILD_DIR/cpp-build +export ARROW_C_GLIB_INSTALL=$BUILD_DIR/c-glib-install + +export ARROW_BUILD_TOOLCHAIN=$CPP_TOOLCHAIN +export PARQUET_BUILD_TOOLCHAIN=$CPP_TOOLCHAIN + +export BOOST_ROOT=$CPP_TOOLCHAIN +export PATH=$CPP_TOOLCHAIN/bin:$PATH +export LD_LIBRARY_PATH=$CPP_TOOLCHAIN/lib:$LD_LIBRARY_PATH + +export VALGRIND="valgrind --tool=memcheck" + +export ARROW_HOME=$CPP_TOOLCHAIN +export PARQUET_HOME=$CPP_TOOLCHAIN + +# Arrow test variables + +export JAVA_HOME=/usr/lib/jvm/java-7-oracle +export HADOOP_HOME=/usr/lib/hadoop +export CLASSPATH=`$HADOOP_HOME/bin/hadoop classpath --glob` +export HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=$HADOOP_HOME/lib/native" +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HADOOP_HOME/lib/native/ + +export ARROW_HDFS_TEST_HOST=arrow-hdfs +export ARROW_HDFS_TEST_PORT=9000 +export ARROW_HDFS_TEST_USER=ubuntu +export ARROW_LIBHDFS_DIR=/usr/lib + +export LIBHDFS3_CONF=/io/hdfs/libhdfs3-hdfs-client.xml http://git-wip-us.apache.org/repos/asf/arrow/blob/8841bc07/python/testing/setup_toolchain.sh ---------------------------------------------------------------------- diff --git a/python/testing/setup_toolchain.sh b/python/testing/setup_toolchain.sh new file mode 100644 index 0000000..c3837b4 --- /dev/null +++ b/python/testing/setup_toolchain.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +export PATH="$MINICONDA/bin:$PATH" +conda update -y -q conda +conda config --set auto_update_conda false +conda info -a + +conda config --set show_channel_urls True + +# Help with SSL timeouts to S3 +conda config --set remote_connect_timeout_secs 12 + +conda config --add channels https://repo.continuum.io/pkgs/free +conda config --add channels conda-forge +conda info -a + +# faster builds, please +conda install -y nomkl + +conda install --y conda-build jinja2 anaconda-client cmake curl + +# Set up C++ toolchain +conda create -y -q -p $CPP_TOOLCHAIN python=3.6 \ + jemalloc=4.4.0 \ + nomkl \ + boost-cpp \ + rapidjson \ + flatbuffers \ + gflags \ + lz4-c \ + snappy \ + zstd \ + brotli \ + zlib \ + git \ + cmake \ + curl \ + thrift-cpp \ + libhdfs3 \ + ninja + +if [ $BUILD_OS_NAME == "osx" ]; then + brew update > /dev/null + brew install jemalloc + brew install ccache +fi http://git-wip-us.apache.org/repos/asf/arrow/blob/8841bc07/python/testing/test_hdfs.sh ---------------------------------------------------------------------- diff --git a/python/testing/test_hdfs.sh b/python/testing/test_hdfs.sh new file mode 100755 index 0000000..016e54a --- /dev/null +++ b/python/testing/test_hdfs.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -ex + +docker build -t arrow-hdfs-test -f hdfs/Dockerfile . +bash hdfs/restart_docker_container.sh +docker exec -it arrow-hdfs /io/hdfs/run_tests.sh +docker stop arrow-hdfs
