This is an automated email from the ASF dual-hosted git repository.

kszucs pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 76979c9  ARROW-4420: [INTEGRATION] Make spark integration test pass 
and test against spark's master branch
76979c9 is described below

commit 76979c924ef64da8a6fa2f5133e681cf0649c057
Author: Krisztián Szűcs <[email protected]>
AuthorDate: Mon Feb 18 22:43:44 2019 +0100

    ARROW-4420: [INTEGRATION] Make spark integration test pass and test against 
spark's master branch
    
    
[kszucs/crossbow/build-439](https://github.com/kszucs/crossbow/branches/all?utf8=%E2%9C%93&query=439)
 was failing due to a spark bug
    
[kszucs/crossbow/build-440](https://github.com/kszucs/crossbow/branches/all?utf8=%E2%9C%93&query=440)
    
    Author: Krisztián Szűcs <[email protected]>
    
    Closes #3534 from kszucs/ARROW-4420 and squashes the following commits:
    
    5dad1a9f <Krisztián Szűcs> silent Slf4jMavenTransferListener
    cbb15677 <Krisztián Szűcs> try to silent maven
    677bafa2 <Krisztián Szűcs> build spark's master
    b6f7c13b <Krisztián Szűcs>  remove blank test command
    d87f098c <Krisztián Szűcs>  use tar.gz
    6ab52805 <Krisztián Szűcs> build master
---
 ci/docker_build_java.sh       |  4 +++-
 docker-compose.yml            |  4 ++--
 integration/spark/2.4.0.patch | 24 ------------------------
 integration/spark/Dockerfile  | 17 +++++------------
 integration/spark/runtest.sh  | 25 +++++++++++++++++--------
 5 files changed, 27 insertions(+), 47 deletions(-)

diff --git a/ci/docker_build_java.sh b/ci/docker_build_java.sh
index 0cbd00f..f3dd3f1 100755
--- a/ci/docker_build_java.sh
+++ b/ci/docker_build_java.sh
@@ -18,6 +18,8 @@
 
 set -e
 
+export 
MAVEN_OPTS="-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
+
 # /arrow/java is read-only
 mkdir -p /build/java
 
@@ -28,5 +30,5 @@ pushd /arrow
 popd
 
 pushd $arrow_src/java
-  mvn -DskipTests -Drat.skip=true install
+  mvn -B -DskipTests -Drat.skip=true install
 popd
diff --git a/docker-compose.yml b/docker-compose.yml
index b322d9b..926e3b5 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -327,9 +327,9 @@ services:
     #   docker-compose build python
     #   docker-compose build spark-integration
     #   docker-compose run spark-integration
-    image: arrow:spark-${SPARK_VERSION:-2.4.0}
+    image: arrow:spark-${SPARK_VERSION:-master}
     environment:
-      - SPARK_VERSION=${SPARK_VERSION:-2.4.0}
+      - SPARK_VERSION=${SPARK_VERSION:-master}
     build:
       context: .
       dockerfile: integration/spark/Dockerfile
diff --git a/integration/spark/2.4.0.patch b/integration/spark/2.4.0.patch
deleted file mode 100644
index 7992e01..0000000
--- a/integration/spark/2.4.0.patch
+++ /dev/null
@@ -1,24 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-diff --git a/pom.xml b/pom.xml
-index f0e5ed9c56..b30d4d61d6 100644
---- a/pom.xml
-+++ b/pom.xml
-@@ -2092,0 +2093,2 @@
-+              <arg>-Xmax-classfile-name</arg>
-+              <arg>128</arg>
diff --git a/integration/spark/Dockerfile b/integration/spark/Dockerfile
index 5c28cca..36b6479 100644
--- a/integration/spark/Dockerfile
+++ b/integration/spark/Dockerfile
@@ -30,18 +30,11 @@ RUN apt-get update -q -y && \
     mv /apache-maven-$MAVEN_VERSION /usr/local/maven
 
 # installing specific version of spark
-ARG SPARK_VERSION=2.4.0
-RUN mkdir /spark && \
-    cd /spark && \
-    wget -q -O spark.tar.gz 
https://github.com/apache/spark/archive/v$SPARK_VERSION.tar.gz && \
-    tar -xzf spark.tar.gz && \
-    rm spark.tar.gz
-
-# patching spark is required in order to:
-# - avoid too long filenames error 
https://issues.apache.org/jira/browse/SPARK-4820
-ADD integration/spark/$SPARK_VERSION.patch 
/arrow/integration/spark/$SPARK_VERSION.patch
-RUN cd /spark/spark-$SPARK_VERSION && \
-    patch -p1 < /arrow/integration/spark/$SPARK_VERSION.patch
+ARG SPARK_VERSION=master
+RUN wget -q -O /tmp/spark.tar.gz 
https://github.com/apache/spark/archive/$SPARK_VERSION.tar.gz && \
+    mkdir /spark && \
+    tar -xzf /tmp/spark.tar.gz -C /spark --strip-components=1 && \
+    rm /tmp/spark.tar.gz
 
 # build cpp with tests
 ENV CC=gcc \
diff --git a/integration/spark/runtest.sh b/integration/spark/runtest.sh
index 173f69e..593683b 100755
--- a/integration/spark/runtest.sh
+++ b/integration/spark/runtest.sh
@@ -18,8 +18,6 @@
 # exit on any error
 set -e
 
-SPARK_VERSION=${SPARK_VERSION:-2.4.0}
-
 # rsynced source directory to build java libs
 arrow_src=/build/java/arrow
 
@@ -27,15 +25,16 @@ pushd $arrow_src/java
   ARROW_VERSION=`mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate 
-Dexpression=project.version | sed -n -e '/^\[.*\]/ !{ /^[0-9]/ { p; q } }'`
 popd
 
-MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m 
-Dorg.slf4j.simpleLogger.defaultLogLevel=warn"
+export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m 
-Dorg.slf4j.simpleLogger.defaultLogLevel=warn"
+export MAVEN_OPTS="${MAVEN_OPTS} 
-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
 
 # build Spark with Arrow
-pushd /spark/spark-${SPARK_VERSION}
+pushd /spark
   # update Spark pom with the Arrow version just installed and build Spark, 
need package phase for pyspark
   echo "Building Spark with Arrow $ARROW_VERSION"
-  mvn -q versions:set-property -Dproperty=arrow.version 
-DnewVersion=$ARROW_VERSION
+  mvn versions:set-property -Dproperty=arrow.version 
-DnewVersion=$ARROW_VERSION
 
-  build/mvn -DskipTests package -pl sql/core -pl assembly -am
+  build/mvn -B -DskipTests package -pl sql/core -pl assembly -am
 
   SPARK_SCALA_TESTS=(
     "org.apache.spark.sql.execution.arrow"
@@ -45,9 +44,19 @@ pushd /spark/spark-${SPARK_VERSION}
   (echo "Testing Spark:"; IFS=$'\n'; echo "${SPARK_SCALA_TESTS[*]}")
 
   # TODO: should be able to only build spark-sql tests with adding "-pl 
sql/core" but not currently working
-  build/mvn -Dtest=none -DwildcardSuites=$(IFS=,; echo 
"${SPARK_SCALA_TESTS[*]}") test
+  build/mvn -B -Dtest=none -DwildcardSuites=$(IFS=,; echo 
"${SPARK_SCALA_TESTS[*]}") test
 
   # Run pyarrow related Python tests only
   echo "Testing PySpark:"
-  python/run-tests --modules pyspark-sql
+
+  SPARK_PYTHON_TESTS=(
+    "pyspark.sql.tests.test_arrow"
+    "pyspark.sql.tests.test_pandas_udf"
+    "pyspark.sql.tests.test_pandas_udf_scalar"
+    "pyspark.sql.tests.test_pandas_udf_grouped_agg"
+    "pyspark.sql.tests.test_pandas_udf_grouped_map"
+    "pyspark.sql.tests.test_pandas_udf_window")
+
+  (echo "Testing PySpark:"; IFS=$'\n'; echo "${SPARK_PYTHON_TESTS[*]}")
+  python/run-tests --testnames "$(IFS=,; echo "${SPARK_PYTHON_TESTS[*]}")"
 popd

Reply via email to