git commit: [SPARK-1395] Allow "local:" URIs to work on Yarn.

2014-04-17 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master bb76eae1b -> 69047506b


[SPARK-1395] Allow "local:" URIs to work on Yarn.

This only works for the three paths defined in the environment
(SPARK_JAR, SPARK_YARN_APP_JAR and SPARK_LOG4J_CONF).

Tested by running SparkPi with local: and file: URIs against Yarn cluster (no 
"upload" shows up in logs in the local case).

Author: Marcelo Vanzin 

Closes #303 from vanzin/yarn-local and squashes the following commits:

82219c1 [Marcelo Vanzin] [SPARK-1395] Allow "local:" URIs to work on Yarn.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/69047506
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/69047506
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/69047506

Branch: refs/heads/master
Commit: 69047506bf97e6e37e4079c87cb0327d3760ac41
Parents: bb76eae
Author: Marcelo Vanzin 
Authored: Thu Apr 17 10:29:38 2014 -0500
Committer: Thomas Graves 
Committed: Thu Apr 17 10:29:38 2014 -0500

--
 .../org/apache/spark/deploy/SparkSubmit.scala   |   4 +-
 .../spark/deploy/yarn/ExecutorRunnable.scala|   2 +-
 .../apache/spark/deploy/yarn/ClientBase.scala   | 190 +--
 .../deploy/yarn/ExecutorRunnableUtil.scala  |  17 +-
 .../spark/deploy/yarn/YarnSparkHadoopUtil.scala |   6 +-
 .../spark/deploy/yarn/ExecutorRunnable.scala|   2 +-
 6 files changed, 142 insertions(+), 79 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/69047506/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index e05fbfe..e5d593c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.deploy
 
 import java.io.{PrintStream, File}
-import java.net.URL
+import java.net.{URI, URL}
 
 import org.apache.spark.executor.ExecutorURLClassLoader
 
@@ -216,7 +216,7 @@ object SparkSubmit {
   }
 
   private def addJarToClasspath(localJar: String, loader: 
ExecutorURLClassLoader) {
-val localJarFile = new File(localJar)
+val localJarFile = new File(new URI(localJar).getPath())
 if (!localJarFile.exists()) {
   printWarning(s"Jar $localJar does not exist, skipping.")
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/69047506/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
--
diff --git 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
index 3469b7d..7dae248 100644
--- 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -82,7 +82,7 @@ class ExecutorRunnable(
 ctx.setContainerTokens(ByteBuffer.wrap(dob.getData()))
 
 val commands = prepareCommand(masterAddress, slaveId, hostname, 
executorMemory, executorCores,
-  localResources.contains(ClientBase.LOG4J_PROP))
+  localResources)
 logInfo("Setting up executor with commands: " + commands)
 ctx.setCommands(commands)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/69047506/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
index 628dd98..566de71 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.deploy.yarn
 
 import java.io.File
-import java.net.{InetAddress, UnknownHostException, URI}
+import java.net.{InetAddress, UnknownHostException, URI, URISyntaxException}
 import java.nio.ByteBuffer
 
 import scala.collection.JavaConversions._
@@ -209,53 +209,35 @@ trait ClientBase extends Logging {
 
 Map(
   ClientBase.SPARK_JAR -> System.getenv("SPARK_JAR"), ClientBase.APP_JAR 
-> args.userJar,
-  ClientBase.LOG4J_PROP -> System.getenv("SPARK_LOG4J_CONF")
+  ClientBase.LOG4J_PROP -> System.getenv(ClientBase.LOG4J_CONF_ENV_KEY)
 ).foreach { case(destName, _localPath) =>
   val localPath: String = if (_localPath != null) _localPath.trim() else ""
   if (! localPath.isEmpty()) {
 val localURI = new URI(localPath)
-val setPermissions = if (destNam

git commit: [SPARK-1395] Allow "local:" URIs to work on Yarn.

2014-04-17 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/branch-1.0 b3ad707c4 -> a83a794f1


[SPARK-1395] Allow "local:" URIs to work on Yarn.

This only works for the three paths defined in the environment
(SPARK_JAR, SPARK_YARN_APP_JAR and SPARK_LOG4J_CONF).

Tested by running SparkPi with local: and file: URIs against Yarn cluster (no 
"upload" shows up in logs in the local case).

Author: Marcelo Vanzin 

Closes #303 from vanzin/yarn-local and squashes the following commits:

82219c1 [Marcelo Vanzin] [SPARK-1395] Allow "local:" URIs to work on Yarn.

(cherry picked from commit 69047506bf97e6e37e4079c87cb0327d3760ac41)
Signed-off-by: Thomas Graves 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a83a794f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a83a794f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a83a794f

Branch: refs/heads/branch-1.0
Commit: a83a794f1accc616cfccde78af44c5cbf066c647
Parents: b3ad707
Author: Marcelo Vanzin 
Authored: Thu Apr 17 10:29:38 2014 -0500
Committer: Thomas Graves 
Committed: Thu Apr 17 10:33:15 2014 -0500

--
 .../org/apache/spark/deploy/SparkSubmit.scala   |   4 +-
 .../spark/deploy/yarn/ExecutorRunnable.scala|   2 +-
 .../apache/spark/deploy/yarn/ClientBase.scala   | 190 +--
 .../deploy/yarn/ExecutorRunnableUtil.scala  |  17 +-
 .../spark/deploy/yarn/YarnSparkHadoopUtil.scala |   6 +-
 .../spark/deploy/yarn/ExecutorRunnable.scala|   2 +-
 6 files changed, 142 insertions(+), 79 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a83a794f/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index e05fbfe..e5d593c 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.deploy
 
 import java.io.{PrintStream, File}
-import java.net.URL
+import java.net.{URI, URL}
 
 import org.apache.spark.executor.ExecutorURLClassLoader
 
@@ -216,7 +216,7 @@ object SparkSubmit {
   }
 
   private def addJarToClasspath(localJar: String, loader: 
ExecutorURLClassLoader) {
-val localJarFile = new File(localJar)
+val localJarFile = new File(new URI(localJar).getPath())
 if (!localJarFile.exists()) {
   printWarning(s"Jar $localJar does not exist, skipping.")
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/a83a794f/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
--
diff --git 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
index 3469b7d..7dae248 100644
--- 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -82,7 +82,7 @@ class ExecutorRunnable(
 ctx.setContainerTokens(ByteBuffer.wrap(dob.getData()))
 
 val commands = prepareCommand(masterAddress, slaveId, hostname, 
executorMemory, executorCores,
-  localResources.contains(ClientBase.LOG4J_PROP))
+  localResources)
 logInfo("Setting up executor with commands: " + commands)
 ctx.setCommands(commands)
 

http://git-wip-us.apache.org/repos/asf/spark/blob/a83a794f/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
--
diff --git 
a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala 
b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
index 628dd98..566de71 100644
--- a/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
+++ b/yarn/common/src/main/scala/org/apache/spark/deploy/yarn/ClientBase.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.deploy.yarn
 
 import java.io.File
-import java.net.{InetAddress, UnknownHostException, URI}
+import java.net.{InetAddress, UnknownHostException, URI, URISyntaxException}
 import java.nio.ByteBuffer
 
 import scala.collection.JavaConversions._
@@ -209,53 +209,35 @@ trait ClientBase extends Logging {
 
 Map(
   ClientBase.SPARK_JAR -> System.getenv("SPARK_JAR"), ClientBase.APP_JAR 
-> args.userJar,
-  ClientBase.LOG4J_PROP -> System.getenv("SPARK_LOG4J_CONF")
+  ClientBase.LOG4J_PROP -> System.getenv(ClientBase.LOG4J_CONF_ENV_KEY)
 ).foreach { case(destName, _localPath) =>
   val localPath: String = if (_localPath != null) _localPath.trim() else ""
   if (!

git commit: SPARK-1408 Modify Spark on Yarn to point to the history server when app ...

2014-04-17 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/master 69047506b -> 0058b5d2c


SPARK-1408 Modify Spark on Yarn to point to the history server when app ...

...finishes

Note this is dependent on https://github.com/apache/spark/pull/204 to have a 
working history server, but there are no code dependencies.

This also fixes SPARK-1288 yarn stable finishApplicationMaster incomplete. 
Since I was in there I made the diagnostic message be passed properly.

Author: Thomas Graves 

Closes #362 from tgravescs/SPARK-1408 and squashes the following commits:

ec89705 [Thomas Graves] Fix typo.
446122d [Thomas Graves] Make config yarn specific
f5d5373 [Thomas Graves] SPARK-1408 Modify Spark on Yarn to point to the history 
server when app finishes


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0058b5d2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0058b5d2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0058b5d2

Branch: refs/heads/master
Commit: 0058b5d2c74147d24b127a5432f89ebc7050dc18
Parents: 6904750
Author: Thomas Graves 
Authored: Thu Apr 17 16:36:37 2014 -0500
Committer: Thomas Graves 
Committed: Thu Apr 17 16:36:37 2014 -0500

--
 docs/running-on-yarn.md  | 1 +
 .../scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala   | 3 +--
 .../scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala   | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0058b5d2/docs/running-on-yarn.md
--
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 9825143..9765062 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -42,6 +42,7 @@ System Properties:
 * `spark.yarn.preserve.staging.files`, set to true to preserve the staged 
files(spark jar, app jar, distributed cache files) at the end of the job rather 
then delete them.
 * `spark.yarn.scheduler.heartbeat.interval-ms`, the interval in ms in which 
the Spark application master heartbeats into the YARN ResourceManager. Default 
is 5 seconds. 
 * `spark.yarn.max.executor.failures`, the maximum number of executor failures 
before failing the application. Default is the number of executors requested 
times 2 with minimum of 3.
+* `spark.yarn.historyServer.address`, the address of the Spark history server 
(i.e. host.com:18080). The address should not contain a scheme (http://). 
Defaults to not being set since the history server is an optional service. This 
address is given to the Yarn ResourceManager when the Spark application 
finishes to link the application from the ResourceManager UI to the Spark 
history server UI. 
 
 # Launching Spark on YARN
 

http://git-wip-us.apache.org/repos/asf/spark/blob/0058b5d2/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
--
diff --git 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 67ec95c..f078d06 100644
--- 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -366,8 +366,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, 
conf: Configuration,
 finishReq.setAppAttemptId(appAttemptId)
 finishReq.setFinishApplicationStatus(status)
 finishReq.setDiagnostics(diagnostics)
-// Set tracking url to empty since we don't have a history server.
-finishReq.setTrackingUrl("")
+
finishReq.setTrackingUrl(sparkConf.get("spark.yarn.historyServer.address", ""))
 resourceManager.finishApplicationMaster(finishReq)
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/0058b5d2/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
--
diff --git 
a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
 
b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 581cfe4..b225be6 100644
--- 
a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ 
b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -347,8 +347,8 @@ class ApplicationMaster(args: ApplicationMasterArguments, 
conf: Configuration,
 
   logInfo("finishApplicationMaster with " + status)
   if (registered) {
-// Set tracking URL to empty since we don't have a history server.
-amClient.unregisterApplicationMaster(status, "" /* appMessa

git commit: SPARK-1408 Modify Spark on Yarn to point to the history server when app ...

2014-04-17 Thread tgraves
Repository: spark
Updated Branches:
  refs/heads/branch-1.0 a83a794f1 -> 6195fb8db


SPARK-1408 Modify Spark on Yarn to point to the history server when app ...

...finishes

Note this is dependent on https://github.com/apache/spark/pull/204 to have a 
working history server, but there are no code dependencies.

This also fixes SPARK-1288 yarn stable finishApplicationMaster incomplete. 
Since I was in there I made the diagnostic message be passed properly.

Author: Thomas Graves 

Closes #362 from tgravescs/SPARK-1408 and squashes the following commits:

ec89705 [Thomas Graves] Fix typo.
446122d [Thomas Graves] Make config yarn specific
f5d5373 [Thomas Graves] SPARK-1408 Modify Spark on Yarn to point to the history 
server when app finishes

(cherry picked from commit 0058b5d2c74147d24b127a5432f89ebc7050dc18)
Signed-off-by: Thomas Graves 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6195fb8d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6195fb8d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6195fb8d

Branch: refs/heads/branch-1.0
Commit: 6195fb8db9f194856df7f5c02f5023596d0c3253
Parents: a83a794
Author: Thomas Graves 
Authored: Thu Apr 17 16:36:37 2014 -0500
Committer: Thomas Graves 
Committed: Thu Apr 17 16:36:50 2014 -0500

--
 docs/running-on-yarn.md  | 1 +
 .../scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala   | 3 +--
 .../scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala   | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/6195fb8d/docs/running-on-yarn.md
--
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 9825143..9765062 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -42,6 +42,7 @@ System Properties:
 * `spark.yarn.preserve.staging.files`, set to true to preserve the staged 
files(spark jar, app jar, distributed cache files) at the end of the job rather 
then delete them.
 * `spark.yarn.scheduler.heartbeat.interval-ms`, the interval in ms in which 
the Spark application master heartbeats into the YARN ResourceManager. Default 
is 5 seconds. 
 * `spark.yarn.max.executor.failures`, the maximum number of executor failures 
before failing the application. Default is the number of executors requested 
times 2 with minimum of 3.
+* `spark.yarn.historyServer.address`, the address of the Spark history server 
(i.e. host.com:18080). The address should not contain a scheme (http://). 
Defaults to not being set since the history server is an optional service. This 
address is given to the Yarn ResourceManager when the Spark application 
finishes to link the application from the ResourceManager UI to the Spark 
history server UI. 
 
 # Launching Spark on YARN
 

http://git-wip-us.apache.org/repos/asf/spark/blob/6195fb8d/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
--
diff --git 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 67ec95c..f078d06 100644
--- 
a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ 
b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -366,8 +366,7 @@ class ApplicationMaster(args: ApplicationMasterArguments, 
conf: Configuration,
 finishReq.setAppAttemptId(appAttemptId)
 finishReq.setFinishApplicationStatus(status)
 finishReq.setDiagnostics(diagnostics)
-// Set tracking url to empty since we don't have a history server.
-finishReq.setTrackingUrl("")
+
finishReq.setTrackingUrl(sparkConf.get("spark.yarn.historyServer.address", ""))
 resourceManager.finishApplicationMaster(finishReq)
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/6195fb8d/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
--
diff --git 
a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
 
b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 581cfe4..b225be6 100644
--- 
a/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ 
b/yarn/stable/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -347,8 +347,8 @@ class ApplicationMaster(args: ApplicationMasterArguments, 
conf: Configuration,
 
   logInfo("finishApplicationMaster with " + status)
   if (registered) {
-// Set tracking URL to empt

git commit: FIX: Don't build Hive in assembly unless running Hive tests.

2014-04-17 Thread pwendell
Repository: spark
Updated Branches:
  refs/heads/master 0058b5d2c -> 6c746ba3a


FIX: Don't build Hive in assembly unless running Hive tests.

This will make the tests more stable when not running SQL tests.

Author: Patrick Wendell 

Closes #439 from pwendell/hive-tests and squashes the following commits:

88a6032 [Patrick Wendell] FIX: Don't build Hive in assembly unless running Hive 
tests.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6c746ba3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6c746ba3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6c746ba3

Branch: refs/heads/master
Commit: 6c746ba3a921364405b58c0c5621c6c517572500
Parents: 0058b5d
Author: Patrick Wendell 
Authored: Thu Apr 17 17:24:00 2014 -0700
Committer: Patrick Wendell 
Committed: Thu Apr 17 17:24:00 2014 -0700

--
 dev/run-tests| 13 ++---
 python/run-tests |  4 +++-
 2 files changed, 9 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/6c746ba3/dev/run-tests
--
diff --git a/dev/run-tests b/dev/run-tests
index 7be5858..6043f85 100755
--- a/dev/run-tests
+++ b/dev/run-tests
@@ -38,7 +38,7 @@ if [ -n "$AMPLAB_JENKINS" ]; then
   diffs=`git diff --dirstat master | awk '{ print $2; }' | grep "^sql/"`
   if [ -n "$diffs" ]; then
 echo "Detected changes in SQL. Will run Hive test suite."
-run_sql_tests=true
+export _RUN_SQL_TESTS=true # exported for PySpark tests
   fi
 fi
 
@@ -62,13 +62,12 @@ echo 
"="
 # echo "q" is needed because sbt on encountering a build file with failure 
 # (either resolution or compilation) prompts the user for input either q, r, 
 # etc to quit or retry. This echo is there to make it not block.
-echo -e "q\n" | SPARK_HIVE=true sbt/sbt clean assembly | \
-  grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
-
-if [ -n "$run_sql_tests" ]; then
-  echo -e "q\n" | SPARK_HIVE=true sbt/sbt test | grep -v -e "info.*Resolving" 
+if [ -n "$_RUN_SQL_TESTS" ]; then
+  echo -e "q\n" | SPARK_HIVE=true sbt/sbt clean assembly test | \
+grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
 else
-  echo -e "q\n" | sbt/sbt test | grep -v -e "info.*Resolving" 
+  echo -e "q\n" | sbt/sbt clean assembly test | \
+grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
 fi
 
 echo 
"="

http://git-wip-us.apache.org/repos/asf/spark/blob/6c746ba3/python/run-tests
--
diff --git a/python/run-tests b/python/run-tests
index 7bbf10d..36a9612 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -49,7 +49,9 @@ function run_test() {
 run_test "pyspark/rdd.py"
 run_test "pyspark/context.py"
 run_test "pyspark/conf.py"
-run_test "pyspark/sql.py"
+if [ -n "$_RUN_SQL_TESTS" ]; then
+  run_test "pyspark/sql.py"
+fi
 run_test "-m doctest pyspark/broadcast.py"
 run_test "-m doctest pyspark/accumulators.py"
 run_test "-m doctest pyspark/serializers.py"



[1/3] git commit: HOTFIX: Use file name and not paths for excludes

2014-04-17 Thread pwendell
Repository: spark
Updated Branches:
  refs/heads/branch-1.0 6195fb8db -> 322527259


HOTFIX: Use file name and not paths for excludes


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5349fabc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5349fabc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5349fabc

Branch: refs/heads/branch-1.0
Commit: 5349fabc53fea76b22466fbaa2145f422755f87b
Parents: 6195fb8
Author: Patrick Wendell 
Authored: Mon Apr 14 15:51:54 2014 -0700
Committer: Patrick Wendell 
Committed: Thu Apr 17 17:25:48 2014 -0700

--
 .rat-excludes | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5349fabc/.rat-excludes
--
diff --git a/.rat-excludes b/.rat-excludes
index 8954330..9e9abb3 100644
--- a/.rat-excludes
+++ b/.rat-excludes
@@ -40,5 +40,5 @@ work
 golden
 test.out/*
 .*iml
-python/metastore/service.properties
-python/metastore/db.lck
+service.properties
+db.lck



[3/3] git commit: FIX: Don't build Hive in assembly unless running Hive tests.

2014-04-17 Thread pwendell
FIX: Don't build Hive in assembly unless running Hive tests.

This will make the tests more stable when not running SQL tests.

Author: Patrick Wendell 

Closes #439 from pwendell/hive-tests and squashes the following commits:

88a6032 [Patrick Wendell] FIX: Don't build Hive in assembly unless running Hive 
tests.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/32252725
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/32252725
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/32252725

Branch: refs/heads/branch-1.0
Commit: 3225272595e325b8918671f06efae98827c7d413
Parents: 67d01d8
Author: Patrick Wendell 
Authored: Thu Apr 17 17:24:00 2014 -0700
Committer: Patrick Wendell 
Committed: Thu Apr 17 17:26:17 2014 -0700

--
 dev/run-tests| 13 ++---
 python/run-tests |  4 +++-
 2 files changed, 9 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/32252725/dev/run-tests
--
diff --git a/dev/run-tests b/dev/run-tests
index 7be5858..6043f85 100755
--- a/dev/run-tests
+++ b/dev/run-tests
@@ -38,7 +38,7 @@ if [ -n "$AMPLAB_JENKINS" ]; then
   diffs=`git diff --dirstat master | awk '{ print $2; }' | grep "^sql/"`
   if [ -n "$diffs" ]; then
 echo "Detected changes in SQL. Will run Hive test suite."
-run_sql_tests=true
+export _RUN_SQL_TESTS=true # exported for PySpark tests
   fi
 fi
 
@@ -62,13 +62,12 @@ echo 
"="
 # echo "q" is needed because sbt on encountering a build file with failure 
 # (either resolution or compilation) prompts the user for input either q, r, 
 # etc to quit or retry. This echo is there to make it not block.
-echo -e "q\n" | SPARK_HIVE=true sbt/sbt clean assembly | \
-  grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
-
-if [ -n "$run_sql_tests" ]; then
-  echo -e "q\n" | SPARK_HIVE=true sbt/sbt test | grep -v -e "info.*Resolving" 
+if [ -n "$_RUN_SQL_TESTS" ]; then
+  echo -e "q\n" | SPARK_HIVE=true sbt/sbt clean assembly test | \
+grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
 else
-  echo -e "q\n" | sbt/sbt test | grep -v -e "info.*Resolving" 
+  echo -e "q\n" | sbt/sbt clean assembly test | \
+grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
 fi
 
 echo 
"="

http://git-wip-us.apache.org/repos/asf/spark/blob/32252725/python/run-tests
--
diff --git a/python/run-tests b/python/run-tests
index 7bbf10d..36a9612 100755
--- a/python/run-tests
+++ b/python/run-tests
@@ -49,7 +49,9 @@ function run_test() {
 run_test "pyspark/rdd.py"
 run_test "pyspark/context.py"
 run_test "pyspark/conf.py"
-run_test "pyspark/sql.py"
+if [ -n "$_RUN_SQL_TESTS" ]; then
+  run_test "pyspark/sql.py"
+fi
 run_test "-m doctest pyspark/broadcast.py"
 run_test "-m doctest pyspark/accumulators.py"
 run_test "-m doctest pyspark/serializers.py"



[2/3] git commit: Add clean to build

2014-04-17 Thread pwendell
Add clean to build


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/67d01d85
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/67d01d85
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/67d01d85

Branch: refs/heads/branch-1.0
Commit: 67d01d85b5394f9fdebaf9c4be83be2ccdd3b929
Parents: 5349fab
Author: Patrick Wendell 
Authored: Wed Apr 16 16:32:34 2014 -0700
Committer: Patrick Wendell 
Committed: Thu Apr 17 17:26:06 2014 -0700

--
 dev/run-tests | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/67d01d85/dev/run-tests
--
diff --git a/dev/run-tests b/dev/run-tests
index 6805993..7be5858 100755
--- a/dev/run-tests
+++ b/dev/run-tests
@@ -62,7 +62,7 @@ echo 
"="
 # echo "q" is needed because sbt on encountering a build file with failure 
 # (either resolution or compilation) prompts the user for input either q, r, 
 # etc to quit or retry. This echo is there to make it not block.
-echo -e "q\n" | SPARK_HIVE=true sbt/sbt assembly | \
+echo -e "q\n" | SPARK_HIVE=true sbt/sbt clean assembly | \
   grep -v -e "info.*Resolving" -e "warn.*Merging" -e "info.*Including"
 
 if [ -n "$run_sql_tests" ]; then



git commit: HOTFIX: Ignore streaming UI test

2014-04-17 Thread pwendell
Repository: spark
Updated Branches:
  refs/heads/branch-1.0 322527259 -> 1c0dc3733


HOTFIX: Ignore streaming UI test

This is currently causing many builds to hang.

https://issues.apache.org/jira/browse/SPARK-1530

Author: Patrick Wendell 

Closes #440 from pwendell/uitest-fix and squashes the following commits:

9a143dc [Patrick Wendell] Ignore streaming UI test
(cherry picked from commit 7863ecca35be9af1eca0dfe5fd8806c5dd710fd6)

Signed-off-by: Patrick Wendell 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1c0dc373
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1c0dc373
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1c0dc373

Branch: refs/heads/branch-1.0
Commit: 1c0dc373365e64348566ca9fdb8349bf43b82e4e
Parents: 3225272
Author: Patrick Wendell 
Authored: Thu Apr 17 17:33:24 2014 -0700
Committer: Patrick Wendell 
Committed: Thu Apr 17 17:33:36 2014 -0700

--
 streaming/src/test/scala/org/apache/spark/streaming/UISuite.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1c0dc373/streaming/src/test/scala/org/apache/spark/streaming/UISuite.scala
--
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/UISuite.scala 
b/streaming/src/test/scala/org/apache/spark/streaming/UISuite.scala
index 031e93a..2a0db75 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/UISuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/UISuite.scala
@@ -26,7 +26,8 @@ import org.scalatest.time.SpanSugar._
 
 class UISuite extends FunSuite {
 
-  test("streaming tab in spark UI") {
+  // Ignored: See SPARK-1530
+  ignore("streaming tab in spark UI") {
 val ssc = new StreamingContext("local", "test", Seconds(1))
 eventually(timeout(10 seconds), interval(50 milliseconds)) {
   val html = Source.fromURL(ssc.sparkContext.ui.appUIAddress).mkString



git commit: HOTFIX: Ignore streaming UI test

2014-04-17 Thread pwendell
Repository: spark
Updated Branches:
  refs/heads/master 6c746ba3a -> 7863ecca3


HOTFIX: Ignore streaming UI test

This is currently causing many builds to hang.

https://issues.apache.org/jira/browse/SPARK-1530

Author: Patrick Wendell 

Closes #440 from pwendell/uitest-fix and squashes the following commits:

9a143dc [Patrick Wendell] Ignore streaming UI test


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7863ecca
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7863ecca
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7863ecca

Branch: refs/heads/master
Commit: 7863ecca35be9af1eca0dfe5fd8806c5dd710fd6
Parents: 6c746ba
Author: Patrick Wendell 
Authored: Thu Apr 17 17:33:24 2014 -0700
Committer: Patrick Wendell 
Committed: Thu Apr 17 17:33:24 2014 -0700

--
 streaming/src/test/scala/org/apache/spark/streaming/UISuite.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7863ecca/streaming/src/test/scala/org/apache/spark/streaming/UISuite.scala
--
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/UISuite.scala 
b/streaming/src/test/scala/org/apache/spark/streaming/UISuite.scala
index 031e93a..2a0db75 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/UISuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/UISuite.scala
@@ -26,7 +26,8 @@ import org.scalatest.time.SpanSugar._
 
 class UISuite extends FunSuite {
 
-  test("streaming tab in spark UI") {
+  // Ignored: See SPARK-1530
+  ignore("streaming tab in spark UI") {
 val ssc = new StreamingContext("local", "test", Seconds(1))
 eventually(timeout(10 seconds), interval(50 milliseconds)) {
   val html = Source.fromURL(ssc.sparkContext.ui.appUIAddress).mkString