Repository: incubator-systemml Updated Branches: refs/heads/master 21b96855b -> cf92e8417
[SYSTEMML-1246] Use correct jar name in sparkDML.sh of -bin artifact Added resource filtering to set jar file name used in sparkDML.sh for -bin artifact only. Closes #399. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/cf92e841 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/cf92e841 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/cf92e841 Branch: refs/heads/master Commit: cf92e8417382812943d88ec3a55b3e4a9b9cc533 Parents: 21b9685 Author: Glenn Weidner <gweid...@us.ibm.com> Authored: Fri Feb 17 14:52:18 2017 -0800 Committer: Glenn Weidner <gweid...@us.ibm.com> Committed: Fri Feb 17 14:52:18 2017 -0800 ---------------------------------------------------------------------- pom.xml | 21 +++++ scripts/sparkDML.sh | 2 +- src/assembly/bin.xml | 9 +- src/main/resources/scripts/sparkDML.sh | 123 ++++++++++++++++++++++++++++ 4 files changed, 153 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/cf92e841/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 2f4abdc..30dbe28 100644 --- a/pom.xml +++ b/pom.xml @@ -282,6 +282,27 @@ <outputDirectory>${basedir}/target/lib/hadoop/bin</outputDirectory> </configuration> </execution> + + <execution> + <id>copy-resources-filtered</id> + <phase>compile</phase> + <goals> + <goal>copy-resources</goal> + </goals> + <configuration> + <overwrite>true</overwrite> + <outputDirectory>${basedir}/target/scripts</outputDirectory> + <resources> + <resource> + <directory>${basedir}/src/main/resources/scripts</directory> + <includes> + <include>sparkDML.sh</include> + </includes> + <filtering>true</filtering> + </resource> + </resources> + </configuration> + </execution> </executions> </plugin> http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/cf92e841/scripts/sparkDML.sh ---------------------------------------------------------------------- diff --git a/scripts/sparkDML.sh b/scripts/sparkDML.sh index a404fee..5548859 100755 --- a/scripts/sparkDML.sh +++ b/scripts/sparkDML.sh @@ -26,7 +26,7 @@ # Environment # Following variables must be rewritten by your installation paths. -DEFAULT_SPARK_HOME=/usr/local/spark-1.4.0/spark-1.4.0-SNAPSHOT-bin-hadoop2.4 +DEFAULT_SPARK_HOME=/usr/local/spark-2.1.0/spark-2.1.0-bin-hadoop2.6 DEFAULT_SYSTEMML_HOME=. if [ -z ${SPARK_HOME} ]; then http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/cf92e841/src/assembly/bin.xml ---------------------------------------------------------------------- diff --git a/src/assembly/bin.xml b/src/assembly/bin.xml index c1d5f8a..fb1fdf6 100644 --- a/src/assembly/bin.xml +++ b/src/assembly/bin.xml @@ -41,10 +41,17 @@ <exclude>perftest</exclude> <exclude>staging/**/*</exclude> <exclude>staging</exclude> - <!-- <exclude>*.sh</exclude> --> <!-- applies to sparkDML.sh --> + <exclude>sparkDML.sh</exclude> </excludes> <outputDirectory>scripts</outputDirectory> </fileSet> + <fileSet> + <directory>${basedir}/target/scripts</directory> + <includes> + <include>sparkDML.sh</include> + </includes> + <outputDirectory>scripts</outputDirectory> + </fileSet> <fileSet> <directory>${basedir}/src/main/standalone</directory> http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/cf92e841/src/main/resources/scripts/sparkDML.sh ---------------------------------------------------------------------- diff --git a/src/main/resources/scripts/sparkDML.sh b/src/main/resources/scripts/sparkDML.sh new file mode 100644 index 0000000..cd57ae0 --- /dev/null +++ b/src/main/resources/scripts/sparkDML.sh @@ -0,0 +1,123 @@ +#!/bin/bash +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +#set -x + + +# Environment + +# Following variables must be rewritten by your installation paths. +DEFAULT_SPARK_HOME=/usr/local/spark-2.1.0/spark-2.1.0-bin-hadoop2.6 +DEFAULT_SYSTEMML_HOME=. + +if [ -z ${SPARK_HOME} ]; then + SPARK_HOME=${DEFAULT_SPARK_HOME} +fi + +if [ -z ${SYSTEMML_HOME} ]; then + SYSTEMML_HOME=${DEFAULT_SYSTEMML_HOME} +fi + +# Default Values + +master="--master yarn-client" +driver_memory="--driver-memory 20G" +num_executors="--num-executors 5" +executor_memory="--executor-memory 60G" +executor_cores="--executor-cores 24" +conf="--conf spark.driver.maxResultSize=0 --conf spark.akka.frameSize=128" + + +# error help print + +printUsageExit() +{ +cat <<EOF + +Usage: $0 [-h] [SPARK-SUBMIT OPTIONS] -f <dml-filename> [SYSTEMML OPTIONS] + + Examples: + $0 -f genGNMF.dml --nvargs V=/tmp/V.mtx W=/tmp/W.mtx H=/tmp/H.mtx rows=100000 cols=800 k=50 + $0 --driver-memory 5G -f GNMF.dml --explain hops -nvargs ... + $0 --master yarn-cluster -f hdfs:/user/GNMF.dml + + -h | -? Print this usage message and exit + + SPARK-SUBMIT OPTIONS: + --conf <property>=<value> Configuration settings: + spark.driver.maxResultSize Default: 0 + spark.akka.frameSize Default: 128 + --driver-memory <num> Memory for driver (e.g. 512M)] Default: 20G + --master <string> local | yarn-client | yarn-cluster] Default: yarn-client + --num-executors <num> Number of executors to launch (e.g. 2) Default: 5 + --executor-memory <num> Memory per executor (e.g. 1G) Default: 60G + --executor-cores <num> Memory per executor (e.g. ) Default: 24 + + -f DML script file name, e.g. hdfs:/user/biadmin/test.dml + + SYSTEMML OPTIONS: + --stats Monitor and report caching/recompilation statistics + --explain Explain plan (runtime) + --explain2 <string> Explain plan (hops, runtime, recompile_hops, recompile_runtime) + --nvargs <varName>=<value> List of attributeName-attributeValue pairs + --args <string> List of positional argument values +EOF + exit 1 +} + +# command line parameter processing + +while true ; do + case "$1" in + -h) printUsageExit ; exit 1 ;; + --master) master="--master "$2 ; shift 2 ;; + --driver-memory) driver_memory="--driver-memory "$2 ; shift 2 ;; + --num-executors) num_executors="--num-executors "$2 ; shift 2 ;; + --executor-memory) executor_memory="--executor-memory "$2 ; shift 2 ;; + --executor-cores) executor_cores="--executor-cores "$2 ; shift 2 ;; + --conf) conf=${conf}' --conf '$2 ; shift 2 ;; + -f) if [ -z "$2" ]; then echo "Error: Wrong usage. Try -h" ; exit 1 ; else f=$2 ; shift 2 ; fi ;; + --stats) stats="-stats" ; shift 1 ;; + --explain) explain="-explain" ; shift 1 ;; + --explain2) explain="-explain "$2 ; shift 2 ;; + --nvargs) shift 1 ; nvargs="-nvargs "$@ ; break ;; + --args) shift 1 ; args="-args "$@ ; break ;; + *) if [ -z "$f" ]; then echo "Error: Wrong usage. Try -h" ; exit 1 ; else break ; fi ;; + esac +done + +# SystemML Spark invocation + +$SPARK_HOME/bin/spark-submit \ + ${master} \ + ${driver_memory} \ + ${num_executors} \ + ${executor_memory} \ + ${executor_cores} \ + ${conf} \ + ${SYSTEMML_HOME}/${project.artifactId}-${project.version}.jar \ + -f ${f} \ + -config=${SYSTEMML_HOME}/SystemML-config.xml \ + -exec hybrid_spark \ + $explain \ + $stats \ + $nvargs $args