Repository: incubator-zeppelin Updated Branches: refs/heads/master a7a7bdb68 -> 67e0fd554
ZEPPELIN-783 fix CI failure on Spark download ### What is this PR for? Improve CI by hard-ending spark download failures that are responsible for recent CI red on `master`. ### What type of PR is it? Bug Fix | Hot Fix ### Todos - [x] cleanup on spark download attempts - [x] leverage Travis CI [cacheing](https://docs.travis-ci.com/user/caching) for spark and pyspark binaries under `.spark-dist` ### What is the Jira issue? [ZEPPELIN-783](https://issues.apache.org/jira/browse/ZEPPELIN-783) ### How should this be tested? CI must be green ### Questions: * Does the licenses files need update? No * Is there breaking changes for older versions? No * Does this needs documentation? No Author: Alexander Bezzubov <[email protected]> Closes #810 from bzz/ZEPPELIN-783-fix-ci-spark-download and squashes the following commits: 9d59646 [Alexander Bezzubov] ZEPPELIN-783: consistent download timeout b6310f0 [Alexander Bezzubov] ZEPPELIN-783: add debug info: download, Zepeplin config 5d0eb2d [Alexander Bezzubov] ZEPPELIN-783: pyspark&spark cache under .spark-distr, but unpack to root d4ef96d [Alexander Bezzubov] ZEPPELIN-783: exclude .spark-dist cache from RAT 388d76b [Alexander Bezzubov] ZEPPELIN-783: backport from Spark download to start\stop scripts fa8b516 [Alexander Bezzubov] ZEPPELIN-783: reconcile CI-time and build-time Spark download locations 542a305 [Alexander Bezzubov] ZEPPELIN-783: use TravisCI caching for relieable Spark download bd1d5e2 [Alexander Bezzubov] ZEPPELIN-783: add cleanup on download failure b413743 [Alexander Bezzubov] ZEPPELIN-783: refactoring - extract SPARK_ARCHIVE var 346e075 [Alexander Bezzubov] ZEPPELIN-783: upd shell style Project: http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/commit/67e0fd55 Tree: http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/tree/67e0fd55 Diff: http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/diff/67e0fd55 Branch: refs/heads/master Commit: 67e0fd554f33c547d80cfff9e80940977b4b2b29 Parents: a7a7bdb Author: Alexander Bezzubov <[email protected]> Authored: Sat Apr 2 15:54:55 2016 +0900 Committer: Alexander Bezzubov <[email protected]> Committed: Sat Apr 2 17:12:56 2016 +0900 ---------------------------------------------------------------------- .gitignore | 1 + .travis.yml | 5 +++ pom.xml | 11 +++++++ spark-dependencies/pom.xml | 33 +++++++++++++++---- testing/downloadSpark.sh | 69 +++++++++++++++++++++++++-------------- testing/startSparkCluster.sh | 19 ++++++----- testing/stopSparkCluster.sh | 11 ++++--- zeppelin-server/pom.xml | 2 +- zeppelin-web/pom.xml | 2 +- 9 files changed, 107 insertions(+), 46 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/67e0fd55/.gitignore ---------------------------------------------------------------------- diff --git a/.gitignore b/.gitignore index ad473ed..9dd02a6 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ spark/derby.log spark/metastore_db spark-1.*-bin-hadoop* +.spark-dist zeppelin-server/derby.log lens/lens-cli-hist.log http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/67e0fd55/.travis.yml ---------------------------------------------------------------------- diff --git a/.travis.yml b/.travis.yml index 2ef9025..72b748e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,6 +16,9 @@ language: java sudo: false +cache: + directories: + - .spark-dist matrix: include: @@ -48,6 +51,7 @@ matrix: env: TEST_SELENIUM="true" SPARK_VER="1.6.0" HADOOP_VER="2.3" PROFILE="-Pspark-1.6 -Phadoop-2.3 -Ppyspark" BUILD_FLAG="package -DskipTests" TEST_FLAG="verify" TEST_PROJECTS="-pl zeppelin-interpreter,zeppelin-zengine,zeppelin-server,zeppelin-display,spark-dependencies,spark -Dtest=org.apache.zeppelin.AbstractFunctionalSuite -DfailIfNoTests=false" before_install: + - "ls -la .spark-dist" - "export DISPLAY=:99.0" - "sh -e /etc/init.d/xvfb start" @@ -58,6 +62,7 @@ before_script: - travis_retry ./testing/downloadSpark.sh $SPARK_VER $HADOOP_VER - ./testing/startSparkCluster.sh $SPARK_VER $HADOOP_VER - echo "export SPARK_HOME=`pwd`/spark-$SPARK_VER-bin-hadoop$HADOOP_VER" > conf/zeppelin-env.sh + - tail conf/zeppelin-env.sh script: - mvn $TEST_FLAG $PROFILE -B $TEST_PROJECTS http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/67e0fd55/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 5a14040..e5f7d9a 100755 --- a/pom.xml +++ b/pom.xml @@ -241,6 +241,7 @@ <groupId>org.apache.rat</groupId> <artifactId>apache-rat-plugin</artifactId> </plugin> + <plugin> <artifactId>maven-compiler-plugin</artifactId> <version>3.1</version> @@ -249,6 +250,7 @@ <target>1.7</target> </configuration> </plugin> + <!-- Test coverage plugin --> <plugin> <groupId>org.codehaus.mojo</groupId> @@ -270,6 +272,7 @@ </execution> </executions> </plugin> + <!-- Checkstyle plugin --> <plugin> <groupId>org.apache.maven.plugins</groupId> @@ -488,6 +491,7 @@ <exclude>conf/notebook-authorization.json</exclude> <exclude>conf/zeppelin-env.sh</exclude> <exclude>spark-*-bin*/**</exclude> + <exclude>.spark-dist/**</exclude> <!-- bundled from bootstrap --> <exclude>docs/assets/themes/zeppelin/bootstrap/**</exclude> @@ -640,6 +644,13 @@ </lifecycleMappingMetadata> </configuration> </plugin> + + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-antrun-plugin</artifactId> + <version>1.7</version> + </plugin> + </plugins> </pluginManagement> </build> http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/67e0fd55/spark-dependencies/pom.xml ---------------------------------------------------------------------- diff --git a/spark-dependencies/pom.xml b/spark-dependencies/pom.xml index 819de93..02d24e2 100644 --- a/spark-dependencies/pom.xml +++ b/spark-dependencies/pom.xml @@ -50,7 +50,11 @@ <akka.group>org.spark-project.akka</akka.group> <akka.version>2.3.4-spark</akka.version> - <spark.download.url>http://archive.apache.org/dist/spark/spark-${spark.version}/spark-${spark.version}.tgz</spark.download.url> + <spark.archive>spark-${spark.version}</spark.archive> + <spark.download.url> + http://archive.apache.org/dist/spark/${spark.archive}/${spark.archive}.tgz + </spark.download.url> + <spark.dist.cache>${project.build.directory}/../../.spark-dist</spark.dist.cache> <py4j.version>0.8.2.1</py4j.version> </properties> @@ -787,12 +791,12 @@ </goals> <configuration> <url>${spark.download.url}</url> - <unpack>true</unpack> - <outputDirectory>${project.build.directory}/spark-dist</outputDirectory> + <outputDirectory>${spark.dist.cache}</outputDirectory> </configuration> </execution> </executions> </plugin> + <plugin> <artifactId>maven-clean-plugin</artifactId> <configuration> @@ -806,13 +810,28 @@ </filesets> </configuration> </plugin> + <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-antrun-plugin</artifactId> - <version>1.7</version> <executions> <execution> - <id>download-and-zip-pyspark-files</id> + <id>unzip-pyspark-files</id> + <phase>validate</phase> + <goals> + <goal>run</goal> + </goals> + <configuration> + <target> + <untar src="${spark.dist.cache}/${spark.archive}.tgz" + dest="${project.build.directory}/spark-dist" + compression="gzip"/> + </target> + </configuration> + </execution> + + <execution> + <id>zip-pyspark-files</id> <phase>generate-resources</phase> <goals> <goal>run</goal> @@ -821,9 +840,9 @@ <target> <delete dir="../interpreter/spark/pyspark"/> <copy todir="../interpreter/spark/pyspark" - file="${project.build.directory}/spark-dist/spark-${spark.version}/python/lib/py4j-${py4j.version}-src.zip"/> + file="${project.build.directory}/spark-dist/${spark.archive}/python/lib/py4j-${py4j.version}-src.zip"/> <zip destfile="${project.build.directory}/../../interpreter/spark/pyspark/pyspark.zip" - basedir="${project.build.directory}/spark-dist/spark-${spark.version}/python" + basedir="${project.build.directory}/spark-dist/${spark.archive}/python" includes="pyspark/*.py,pyspark/**/*.py"/> </target> </configuration> http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/67e0fd55/testing/downloadSpark.sh ---------------------------------------------------------------------- diff --git a/testing/downloadSpark.sh b/testing/downloadSpark.sh index 7c907fc..d12580f 100755 --- a/testing/downloadSpark.sh +++ b/testing/downloadSpark.sh @@ -17,7 +17,7 @@ # -if [ $# -ne 2 ]; then +if [[ "$#" -ne 2 ]]; then echo "usage) $0 [spark version] [hadoop version]" echo " eg) $0 1.3.1 2.6" exit 1 @@ -26,10 +26,10 @@ fi SPARK_VERSION="${1}" HADOOP_VERSION="${2}" -echo ${SPARK_VERSION} | grep "^1.[123].[0-9]" > /dev/null -if [ $? -eq 0 ]; then +echo "${SPARK_VERSION}" | grep "^1.[123].[0-9]" > /dev/null +if [[ "$?" -eq 0 ]]; then echo "${SPARK_VERSION}" | grep "^1.[12].[0-9]" > /dev/null - if [ $? -eq 0 ]; then + if [[ "$?" -eq 0 ]]; then SPARK_VER_RANGE="<=1.2" else SPARK_VER_RANGE="<=1.3" @@ -40,31 +40,52 @@ fi set -xe -FWDIR=$(dirname "${BASH_SOURCE-$0}") +TIMEOUT_SEC=590 +FWDIR="$(dirname "${BASH_SOURCE-$0}")" ZEPPELIN_HOME="$(cd "${FWDIR}/.."; pwd)" -export SPARK_HOME=${ZEPPELIN_HOME}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} + +SPARK_CACHE=".spark-dist" +SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" +export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_ARCHIVE}" echo "SPARK_HOME is ${SPARK_HOME}" -if [ ! -d "${SPARK_HOME}" ]; then - if [ "${SPARK_VER_RANGE}" == "<=1.2" ]; then - # spark 1.1.x and spark 1.2.x can be downloaded from archive - STARTTIME=`date +%s` - timeout -s KILL 300 wget -q http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz - ENDTIME=`date +%s` - DOWNLOADTIME=$((ENDTIME-STARTTIME)) - else - # spark 1.3.x and later can be downloaded from mirror - # get download address from mirror - MIRROR_INFO=$(curl -s "http://www.apache.org/dyn/closer.cgi/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz?asjson=1") - PREFFERED=$(echo "${MIRROR_INFO}" | grep preferred | sed 's/[^"]*.preferred.: .\([^"]*\).*/\1/g') - PATHINFO=$(echo "${MIRROR_INFO}" | grep path_info | sed 's/[^"]*.path_info.: .\([^"]*\).*/\1/g') +if [[ ! -d "${SPARK_HOME}" ]]; then + mkdir -p "${SPARK_CACHE}" + cd "${SPARK_CACHE}" + if [[ ! -f "${SPARK_ARCHIVE}.tgz" ]]; then + pwd + ls -la . + echo "${SPARK_CACHE} does not have ${SPARK_ARCHIVE} downloading ..." + # download archive if not cached + if [[ "${SPARK_VER_RANGE}" == "<=1.2" ]]; then + # spark 1.1.x and spark 1.2.x can be downloaded from archive + STARTTIME=`date +%s` + timeout -s KILL "${TIMEOUT_SEC}" wget -q "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz" + ENDTIME=`date +%s` + DOWNLOADTIME="$((ENDTIME-STARTTIME))" + else + # spark 1.3.x and later can be downloaded from mirror + # get download address from mirror + MIRROR_INFO=$(curl -s "http://www.apache.org/dyn/closer.cgi/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz?asjson=1") + + PREFFERED=$(echo "${MIRROR_INFO}" | grep preferred | sed 's/[^"]*.preferred.: .\([^"]*\).*/\1/g') + PATHINFO=$(echo "${MIRROR_INFO}" | grep path_info | sed 's/[^"]*.path_info.: .\([^"]*\).*/\1/g') + + STARTTIME=`date +%s` + timeout -s KILL "${TIMEOUT_SEC}" wget -q "${PREFFERED}${PATHINFO}" + ENDTIME=`date +%s` + DOWNLOADTIME="$((ENDTIME-STARTTIME))" + fi + fi - STARTTIME=`date +%s` - timeout -s KILL 590 wget -q "${PREFFERED}${PATHINFO}" - ENDTIME=`date +%s` - DOWNLOADTIME=$((ENDTIME-STARTTIME)) + # extract archive in un-cached root, clean-up on failure + cp "${SPARK_ARCHIVE}.tgz" .. + cd .. + if ! tar zxf "${SPARK_ARCHIVE}.tgz" ; then + echo "Unable to extract ${SPARK_ARCHIVE}.tgz" >&2 + rm -rf "${SPARK_ARCHIVE}" + rm -f "${SPARK_ARCHIVE}.tgz" fi - tar zxf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz fi set +xe http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/67e0fd55/testing/startSparkCluster.sh ---------------------------------------------------------------------- diff --git a/testing/startSparkCluster.sh b/testing/startSparkCluster.sh index e47edc1..dc7613d 100755 --- a/testing/startSparkCluster.sh +++ b/testing/startSparkCluster.sh @@ -17,7 +17,7 @@ # -if [ $# -ne 2 ]; then +if [[ "$#" -ne 2 ]]; then echo "usage) $0 [spark version] [hadoop version]" echo " eg) $0 1.3.1 2.6" exit 1 @@ -26,10 +26,10 @@ fi SPARK_VERSION="${1}" HADOOP_VERSION="${2}" -echo ${SPARK_VERSION} | grep "^1.[123].[0-9]" > /dev/null -if [ $? -eq 0 ]; then +echo "${SPARK_VERSION}" | grep "^1.[123].[0-9]" > /dev/null +if [[ "$?" -eq 0 ]]; then echo "${SPARK_VERSION}" | grep "^1.[12].[0-9]" > /dev/null - if [ $? -eq 0 ]; then + if [[ "$?" -eq 0 ]]; then SPARK_VER_RANGE="<=1.2" else SPARK_VER_RANGE="<=1.3" @@ -38,17 +38,18 @@ else SPARK_VER_RANGE=">1.3" fi - set -xe -FWDIR=$(dirname "${BASH_SOURCE-$0}") +FWDIR="$(dirname "${BASH_SOURCE-$0}")" ZEPPELIN_HOME="$(cd "${FWDIR}/.."; pwd)" -export SPARK_HOME=${ZEPPELIN_HOME}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} + +SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" +export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_ARCHIVE}" echo "SPARK_HOME is ${SPARK_HOME}" # create PID dir. test case detect pid file so they can select active spark home dir for test -mkdir -p ${SPARK_HOME}/run -export SPARK_PID_DIR=${SPARK_HOME}/run +export SPARK_PID_DIR="${SPARK_HOME}/run" +mkdir -p "${SPARK_PID_DIR}" # start export SPARK_MASTER_PORT=7071 http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/67e0fd55/testing/stopSparkCluster.sh ---------------------------------------------------------------------- diff --git a/testing/stopSparkCluster.sh b/testing/stopSparkCluster.sh index 1bf8eac..e049ec4 100755 --- a/testing/stopSparkCluster.sh +++ b/testing/stopSparkCluster.sh @@ -16,7 +16,7 @@ # limitations under the License. # -if [ $# -ne 2 ]; then +if [[ "$#" -ne 2 ]]; then echo "usage) $0 [spark version] [hadoop version]" echo " eg) $0 1.3.1 2.6" exit 1 @@ -27,12 +27,15 @@ HADOOP_VERSION="${2}" set -xe -FWDIR=$(dirname "${BASH_SOURCE-$0}") +FWDIR="$(dirname "${BASH_SOURCE-$0}")" ZEPPELIN_HOME="$(cd "${FWDIR}/.."; pwd)" -export SPARK_HOME=${ZEPPELIN_HOME}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} + +SPARK_ARCHIVE="spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}" +export SPARK_HOME="${ZEPPELIN_HOME}/${SPARK_ARCHIVE}" +echo "SPARK_HOME is ${SPARK_HOME}" # set create PID dir -export SPARK_PID_DIR=${SPARK_HOME}/run +export SPARK_PID_DIR="${SPARK_HOME}/run" ${SPARK_HOME}/sbin/spark-daemon.sh stop org.apache.spark.deploy.worker.Worker 1 ${SPARK_HOME}/sbin/stop-master.sh http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/67e0fd55/zeppelin-server/pom.xml ---------------------------------------------------------------------- diff --git a/zeppelin-server/pom.xml b/zeppelin-server/pom.xml index ee03c33..f2c9ced 100644 --- a/zeppelin-server/pom.xml +++ b/zeppelin-server/pom.xml @@ -369,8 +369,8 @@ </plugin> <plugin> + <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-antrun-plugin</artifactId> - <version>1.6</version> <executions> <execution> <id>start-zeppelin</id> http://git-wip-us.apache.org/repos/asf/incubator-zeppelin/blob/67e0fd55/zeppelin-web/pom.xml ---------------------------------------------------------------------- diff --git a/zeppelin-web/pom.xml b/zeppelin-web/pom.xml index 21f17df..8878e9a 100644 --- a/zeppelin-web/pom.xml +++ b/zeppelin-web/pom.xml @@ -47,10 +47,10 @@ <webXml>dist\WEB-INF\web.xml</webXml> </configuration> </plugin> + <plugin> <groupId>org.apache.rat</groupId> <artifactId>apache-rat-plugin</artifactId> - <version>0.11</version> <configuration> <excludes> <exclude>**/.idea/</exclude>
