This is an automated email from the ASF dual-hosted git repository.
kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 76979c9 ARROW-4420: [INTEGRATION] Make spark integration test pass
and test against spark's master branch
76979c9 is described below
commit 76979c924ef64da8a6fa2f5133e681cf0649c057
Author: Krisztián Szűcs <[email protected]>
AuthorDate: Mon Feb 18 22:43:44 2019 +0100
ARROW-4420: [INTEGRATION] Make spark integration test pass and test against
spark's master branch
[kszucs/crossbow/build-439](https://github.com/kszucs/crossbow/branches/all?utf8=%E2%9C%93&query=439)
was failing due to a spark bug
[kszucs/crossbow/build-440](https://github.com/kszucs/crossbow/branches/all?utf8=%E2%9C%93&query=440)
Author: Krisztián Szűcs <[email protected]>
Closes #3534 from kszucs/ARROW-4420 and squashes the following commits:
5dad1a9f <Krisztián Szűcs> silent Slf4jMavenTransferListener
cbb15677 <Krisztián Szűcs> try to silent maven
677bafa2 <Krisztián Szűcs> build spark's master
b6f7c13b <Krisztián Szűcs> remove blank test command
d87f098c <Krisztián Szűcs> use tar.gz
6ab52805 <Krisztián Szűcs> build master
---
ci/docker_build_java.sh | 4 +++-
docker-compose.yml | 4 ++--
integration/spark/2.4.0.patch | 24 ------------------------
integration/spark/Dockerfile | 17 +++++------------
integration/spark/runtest.sh | 25 +++++++++++++++++--------
5 files changed, 27 insertions(+), 47 deletions(-)
diff --git a/ci/docker_build_java.sh b/ci/docker_build_java.sh
index 0cbd00f..f3dd3f1 100755
--- a/ci/docker_build_java.sh
+++ b/ci/docker_build_java.sh
@@ -18,6 +18,8 @@
set -e
+export
MAVEN_OPTS="-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
+
# /arrow/java is read-only
mkdir -p /build/java
@@ -28,5 +30,5 @@ pushd /arrow
popd
pushd $arrow_src/java
- mvn -DskipTests -Drat.skip=true install
+ mvn -B -DskipTests -Drat.skip=true install
popd
diff --git a/docker-compose.yml b/docker-compose.yml
index b322d9b..926e3b5 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -327,9 +327,9 @@ services:
# docker-compose build python
# docker-compose build spark-integration
# docker-compose run spark-integration
- image: arrow:spark-${SPARK_VERSION:-2.4.0}
+ image: arrow:spark-${SPARK_VERSION:-master}
environment:
- - SPARK_VERSION=${SPARK_VERSION:-2.4.0}
+ - SPARK_VERSION=${SPARK_VERSION:-master}
build:
context: .
dockerfile: integration/spark/Dockerfile
diff --git a/integration/spark/2.4.0.patch b/integration/spark/2.4.0.patch
deleted file mode 100644
index 7992e01..0000000
--- a/integration/spark/2.4.0.patch
+++ /dev/null
@@ -1,24 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-diff --git a/pom.xml b/pom.xml
-index f0e5ed9c56..b30d4d61d6 100644
---- a/pom.xml
-+++ b/pom.xml
-@@ -2092,0 +2093,2 @@
-+ <arg>-Xmax-classfile-name</arg>
-+ <arg>128</arg>
diff --git a/integration/spark/Dockerfile b/integration/spark/Dockerfile
index 5c28cca..36b6479 100644
--- a/integration/spark/Dockerfile
+++ b/integration/spark/Dockerfile
@@ -30,18 +30,11 @@ RUN apt-get update -q -y && \
mv /apache-maven-$MAVEN_VERSION /usr/local/maven
# installing specific version of spark
-ARG SPARK_VERSION=2.4.0
-RUN mkdir /spark && \
- cd /spark && \
- wget -q -O spark.tar.gz
https://github.com/apache/spark/archive/v$SPARK_VERSION.tar.gz && \
- tar -xzf spark.tar.gz && \
- rm spark.tar.gz
-
-# patching spark is required in order to:
-# - avoid too long filenames error
https://issues.apache.org/jira/browse/SPARK-4820
-ADD integration/spark/$SPARK_VERSION.patch
/arrow/integration/spark/$SPARK_VERSION.patch
-RUN cd /spark/spark-$SPARK_VERSION && \
- patch -p1 < /arrow/integration/spark/$SPARK_VERSION.patch
+ARG SPARK_VERSION=master
+RUN wget -q -O /tmp/spark.tar.gz
https://github.com/apache/spark/archive/$SPARK_VERSION.tar.gz && \
+ mkdir /spark && \
+ tar -xzf /tmp/spark.tar.gz -C /spark --strip-components=1 && \
+ rm /tmp/spark.tar.gz
# build cpp with tests
ENV CC=gcc \
diff --git a/integration/spark/runtest.sh b/integration/spark/runtest.sh
index 173f69e..593683b 100755
--- a/integration/spark/runtest.sh
+++ b/integration/spark/runtest.sh
@@ -18,8 +18,6 @@
# exit on any error
set -e
-SPARK_VERSION=${SPARK_VERSION:-2.4.0}
-
# rsynced source directory to build java libs
arrow_src=/build/java/arrow
@@ -27,15 +25,16 @@ pushd $arrow_src/java
ARROW_VERSION=`mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate
-Dexpression=project.version | sed -n -e '/^\[.*\]/ !{ /^[0-9]/ { p; q } }'`
popd
-MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m
-Dorg.slf4j.simpleLogger.defaultLogLevel=warn"
+export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m
-Dorg.slf4j.simpleLogger.defaultLogLevel=warn"
+export MAVEN_OPTS="${MAVEN_OPTS}
-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
# build Spark with Arrow
-pushd /spark/spark-${SPARK_VERSION}
+pushd /spark
# update Spark pom with the Arrow version just installed and build Spark,
need package phase for pyspark
echo "Building Spark with Arrow $ARROW_VERSION"
- mvn -q versions:set-property -Dproperty=arrow.version
-DnewVersion=$ARROW_VERSION
+ mvn versions:set-property -Dproperty=arrow.version
-DnewVersion=$ARROW_VERSION
- build/mvn -DskipTests package -pl sql/core -pl assembly -am
+ build/mvn -B -DskipTests package -pl sql/core -pl assembly -am
SPARK_SCALA_TESTS=(
"org.apache.spark.sql.execution.arrow"
@@ -45,9 +44,19 @@ pushd /spark/spark-${SPARK_VERSION}
(echo "Testing Spark:"; IFS=$'\n'; echo "${SPARK_SCALA_TESTS[*]}")
# TODO: should be able to only build spark-sql tests with adding "-pl
sql/core" but not currently working
- build/mvn -Dtest=none -DwildcardSuites=$(IFS=,; echo
"${SPARK_SCALA_TESTS[*]}") test
+ build/mvn -B -Dtest=none -DwildcardSuites=$(IFS=,; echo
"${SPARK_SCALA_TESTS[*]}") test
# Run pyarrow related Python tests only
echo "Testing PySpark:"
- python/run-tests --modules pyspark-sql
+
+ SPARK_PYTHON_TESTS=(
+ "pyspark.sql.tests.test_arrow"
+ "pyspark.sql.tests.test_pandas_udf"
+ "pyspark.sql.tests.test_pandas_udf_scalar"
+ "pyspark.sql.tests.test_pandas_udf_grouped_agg"
+ "pyspark.sql.tests.test_pandas_udf_grouped_map"
+ "pyspark.sql.tests.test_pandas_udf_window")
+
+ (echo "Testing PySpark:"; IFS=$'\n'; echo "${SPARK_PYTHON_TESTS[*]}")
+ python/run-tests --testnames "$(IFS=,; echo "${SPARK_PYTHON_TESTS[*]}")"
popd