spark git commit: [STREAMING][MINOR] Fix typo in function name of StateImpl

2015-12-15 Thread zsxwing
Repository: spark
Updated Branches:
  refs/heads/master c59df8c51 -> bc1ff9f4a


[STREAMING][MINOR] Fix typo in function name of StateImpl

cc\ tdas zsxwing , please review. Thanks a lot.

Author: jerryshao 

Closes #10305 from jerryshao/fix-typo-state-impl.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bc1ff9f4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bc1ff9f4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bc1ff9f4

Branch: refs/heads/master
Commit: bc1ff9f4a41401599d3a87fb3c23a2078228a29b
Parents: c59df8c
Author: jerryshao 
Authored: Tue Dec 15 09:41:40 2015 -0800
Committer: Shixiong Zhu 
Committed: Tue Dec 15 09:41:40 2015 -0800

--
 streaming/src/main/scala/org/apache/spark/streaming/State.scala| 2 +-
 .../scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala | 2 +-
 .../test/scala/org/apache/spark/streaming/MapWithStateSuite.scala  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/bc1ff9f4/streaming/src/main/scala/org/apache/spark/streaming/State.scala
--
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/State.scala 
b/streaming/src/main/scala/org/apache/spark/streaming/State.scala
index b47bdda..42424d6 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/State.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/State.scala
@@ -206,7 +206,7 @@ private[streaming] class StateImpl[S] extends State[S] {
* Update the internal data and flags in `this` to the given state that is 
going to be timed out.
* This method allows `this` object to be reused across many state records.
*/
-  def wrapTiminoutState(newState: S): Unit = {
+  def wrapTimingOutState(newState: S): Unit = {
 this.state = newState
 defined = true
 timingOut = true

http://git-wip-us.apache.org/repos/asf/spark/blob/bc1ff9f4/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala
--
diff --git 
a/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala 
b/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala
index ed95171..fdf6167 100644
--- 
a/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala
+++ 
b/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala
@@ -67,7 +67,7 @@ private[streaming] object MapWithStateRDDRecord {
 // data returned
 if (removeTimedoutData && timeoutThresholdTime.isDefined) {
   newStateMap.getByTime(timeoutThresholdTime.get).foreach { case (key, 
state, _) =>
-wrappedState.wrapTiminoutState(state)
+wrappedState.wrapTimingOutState(state)
 val returned = mappingFunction(batchTime, key, None, wrappedState)
 mappedData ++= returned
 newStateMap.remove(key)

http://git-wip-us.apache.org/repos/asf/spark/blob/bc1ff9f4/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
--
diff --git 
a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala 
b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
index 4b08085..6b21433 100644
--- 
a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
+++ 
b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
@@ -125,7 +125,7 @@ class MapWithStateSuite extends SparkFunSuite
 state.remove()
 testState(None, shouldBeRemoved = true)
 
-state.wrapTiminoutState(3)
+state.wrapTimingOutState(3)
 testState(Some(3), shouldBeTimingOut = true)
   }
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [STREAMING][MINOR] Fix typo in function name of StateImpl

2015-12-15 Thread zsxwing
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 352a0c80f -> 23c884605


[STREAMING][MINOR] Fix typo in function name of StateImpl

cc\ tdas zsxwing , please review. Thanks a lot.

Author: jerryshao 

Closes #10305 from jerryshao/fix-typo-state-impl.

(cherry picked from commit bc1ff9f4a41401599d3a87fb3c23a2078228a29b)
Signed-off-by: Shixiong Zhu 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/23c88460
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/23c88460
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/23c88460

Branch: refs/heads/branch-1.6
Commit: 23c8846050b307fdfe2307f7e7ca9d0f69f969a9
Parents: 352a0c8
Author: jerryshao 
Authored: Tue Dec 15 09:41:40 2015 -0800
Committer: Shixiong Zhu 
Committed: Tue Dec 15 09:41:50 2015 -0800

--
 streaming/src/main/scala/org/apache/spark/streaming/State.scala| 2 +-
 .../scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala | 2 +-
 .../test/scala/org/apache/spark/streaming/MapWithStateSuite.scala  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/23c88460/streaming/src/main/scala/org/apache/spark/streaming/State.scala
--
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/State.scala 
b/streaming/src/main/scala/org/apache/spark/streaming/State.scala
index b47bdda..42424d6 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/State.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/State.scala
@@ -206,7 +206,7 @@ private[streaming] class StateImpl[S] extends State[S] {
* Update the internal data and flags in `this` to the given state that is 
going to be timed out.
* This method allows `this` object to be reused across many state records.
*/
-  def wrapTiminoutState(newState: S): Unit = {
+  def wrapTimingOutState(newState: S): Unit = {
 this.state = newState
 defined = true
 timingOut = true

http://git-wip-us.apache.org/repos/asf/spark/blob/23c88460/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala
--
diff --git 
a/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala 
b/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala
index ed95171..fdf6167 100644
--- 
a/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala
+++ 
b/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala
@@ -67,7 +67,7 @@ private[streaming] object MapWithStateRDDRecord {
 // data returned
 if (removeTimedoutData && timeoutThresholdTime.isDefined) {
   newStateMap.getByTime(timeoutThresholdTime.get).foreach { case (key, 
state, _) =>
-wrappedState.wrapTiminoutState(state)
+wrappedState.wrapTimingOutState(state)
 val returned = mappingFunction(batchTime, key, None, wrappedState)
 mappedData ++= returned
 newStateMap.remove(key)

http://git-wip-us.apache.org/repos/asf/spark/blob/23c88460/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
--
diff --git 
a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala 
b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
index 4b08085..6b21433 100644
--- 
a/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
+++ 
b/streaming/src/test/scala/org/apache/spark/streaming/MapWithStateSuite.scala
@@ -125,7 +125,7 @@ class MapWithStateSuite extends SparkFunSuite
 state.remove()
 testState(None, shouldBeRemoved = true)
 
-state.wrapTiminoutState(3)
+state.wrapTimingOutState(3)
 testState(Some(3), shouldBeTimingOut = true)
   }
 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-12332][TRIVIAL][TEST] Fix minor typo in ResetSystemProperties

2015-12-15 Thread srowen
Repository: spark
Updated Branches:
  refs/heads/master 606f99b94 -> c59df8c51


[SPARK-12332][TRIVIAL][TEST] Fix minor typo in ResetSystemProperties

Fix a minor typo (unbalanced bracket) in ResetSystemProperties.

Author: Holden Karau 

Closes #10303 from 
holdenk/SPARK-12332-trivial-typo-in-ResetSystemProperties-comment.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c59df8c5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c59df8c5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c59df8c5

Branch: refs/heads/master
Commit: c59df8c51609a0d6561ae1868e7970b516fb1811
Parents: 606f99b
Author: Holden Karau 
Authored: Tue Dec 15 11:38:57 2015 +
Committer: Sean Owen 
Committed: Tue Dec 15 11:38:57 2015 +

--
 .../test/scala/org/apache/spark/util/ResetSystemProperties.scala   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c59df8c5/core/src/test/scala/org/apache/spark/util/ResetSystemProperties.scala
--
diff --git 
a/core/src/test/scala/org/apache/spark/util/ResetSystemProperties.scala 
b/core/src/test/scala/org/apache/spark/util/ResetSystemProperties.scala
index c58db5e..60fb7ab 100644
--- a/core/src/test/scala/org/apache/spark/util/ResetSystemProperties.scala
+++ b/core/src/test/scala/org/apache/spark/util/ResetSystemProperties.scala
@@ -45,7 +45,7 @@ private[spark] trait ResetSystemProperties extends 
BeforeAndAfterEach { this: Su
   var oldProperties: Properties = null
 
   override def beforeEach(): Unit = {
-// we need SerializationUtils.clone instead of `new 
Properties(System.getProperties()` because
+// we need SerializationUtils.clone instead of `new 
Properties(System.getProperties())` because
 // the later way of creating a copy does not copy the properties but it 
initializes a new
 // Properties object with the given properties as defaults. They are not 
recognized at all
 // by standard Scala wrapper over Java Properties then.


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[1/2] spark git commit: Preparing Spark release v1.6.0-rc3

2015-12-15 Thread pwendell
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 80d261718 -> 08aa3b47e


Preparing Spark release v1.6.0-rc3


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/00a39d9c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/00a39d9c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/00a39d9c

Branch: refs/heads/branch-1.6
Commit: 00a39d9c05c55b5ffcd4f49aadc91cedf227669a
Parents: 80d2617
Author: Patrick Wendell 
Authored: Tue Dec 15 15:09:57 2015 -0800
Committer: Patrick Wendell 
Committed: Tue Dec 15 15:09:57 2015 -0800

--
 assembly/pom.xml| 2 +-
 bagel/pom.xml   | 2 +-
 core/pom.xml| 2 +-
 docker-integration-tests/pom.xml| 2 +-
 examples/pom.xml| 2 +-
 external/flume-assembly/pom.xml | 2 +-
 external/flume-sink/pom.xml | 2 +-
 external/flume/pom.xml  | 2 +-
 external/kafka-assembly/pom.xml | 2 +-
 external/kafka/pom.xml  | 2 +-
 external/mqtt-assembly/pom.xml  | 2 +-
 external/mqtt/pom.xml   | 2 +-
 external/twitter/pom.xml| 2 +-
 external/zeromq/pom.xml | 2 +-
 extras/java8-tests/pom.xml  | 2 +-
 extras/kinesis-asl-assembly/pom.xml | 2 +-
 extras/kinesis-asl/pom.xml  | 2 +-
 extras/spark-ganglia-lgpl/pom.xml   | 2 +-
 graphx/pom.xml  | 2 +-
 launcher/pom.xml| 2 +-
 mllib/pom.xml   | 2 +-
 network/common/pom.xml  | 2 +-
 network/shuffle/pom.xml | 2 +-
 network/yarn/pom.xml| 2 +-
 pom.xml | 2 +-
 repl/pom.xml| 2 +-
 sql/catalyst/pom.xml| 2 +-
 sql/core/pom.xml| 2 +-
 sql/hive-thriftserver/pom.xml   | 2 +-
 sql/hive/pom.xml| 2 +-
 streaming/pom.xml   | 2 +-
 tags/pom.xml| 2 +-
 tools/pom.xml   | 2 +-
 unsafe/pom.xml  | 2 +-
 yarn/pom.xml| 2 +-
 35 files changed, 35 insertions(+), 35 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/00a39d9c/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 4b60ee0..fbabaa5 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.0-SNAPSHOT
+1.6.0
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/00a39d9c/bagel/pom.xml
--
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 672e946..1b3e417 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.0-SNAPSHOT
+1.6.0
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/00a39d9c/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index 61744bb..15b8d75 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.0-SNAPSHOT
+1.6.0
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/00a39d9c/docker-integration-tests/pom.xml
--
diff --git a/docker-integration-tests/pom.xml b/docker-integration-tests/pom.xml
index 39d3f34..d579879 100644
--- a/docker-integration-tests/pom.xml
+++ b/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.0-SNAPSHOT
+1.6.0
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/00a39d9c/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index f5ab2a7..37b15bb 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.0-SNAPSHOT
+1.6.0
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/00a39d9c/external/flume-assembly/pom.xml
--
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index dceedcf..295455a 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.0-SNAPSHOT
+1.6.0
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/00a39d9c/external/flume-sink/pom.xml

spark git commit: [MINOR][ML] Rename weights to coefficients for examples/DeveloperApiExample

2015-12-15 Thread jkbradley
Repository: spark
Updated Branches:
  refs/heads/master bc1ff9f4a -> b24c12d73


[MINOR][ML] Rename weights to coefficients for examples/DeveloperApiExample

Rename ```weights``` to ```coefficients``` for examples/DeveloperApiExample.

cc mengxr jkbradley

Author: Yanbo Liang 

Closes #10280 from yanboliang/spark-coefficients.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b24c12d7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b24c12d7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b24c12d7

Branch: refs/heads/master
Commit: b24c12d7338b47b637698e7458ba90f34cba28c0
Parents: bc1ff9f
Author: Yanbo Liang 
Authored: Tue Dec 15 16:29:39 2015 -0800
Committer: Joseph K. Bradley 
Committed: Tue Dec 15 16:29:39 2015 -0800

--
 .../examples/ml/JavaDeveloperApiExample.java| 22 ++--
 .../spark/examples/ml/DeveloperApiExample.scala | 16 +++---
 2 files changed, 19 insertions(+), 19 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b24c12d7/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java
--
diff --git 
a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java
 
b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java
index 0b4c0d9..b9dd3ad 100644
--- 
a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java
+++ 
b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java
@@ -89,7 +89,7 @@ public class JavaDeveloperApiExample {
 }
 if (sumPredictions != 0.0) {
   throw new Exception("MyJavaLogisticRegression predicted something other 
than 0," +
-  " even though all weights are 0!");
+  " even though all coefficients are 0!");
 }
 
 jsc.stop();
@@ -149,12 +149,12 @@ class MyJavaLogisticRegression
 // Extract columns from data using helper method.
 JavaRDD oldDataset = 
extractLabeledPoints(dataset).toJavaRDD();
 
-// Do learning to estimate the weight vector.
+// Do learning to estimate the coefficients vector.
 int numFeatures = oldDataset.take(1).get(0).features().size();
-Vector weights = Vectors.zeros(numFeatures); // Learning would happen here.
+Vector coefficients = Vectors.zeros(numFeatures); // Learning would happen 
here.
 
 // Create a model, and return it.
-return new MyJavaLogisticRegressionModel(uid(), weights).setParent(this);
+return new MyJavaLogisticRegressionModel(uid(), 
coefficients).setParent(this);
   }
 
   @Override
@@ -173,12 +173,12 @@ class MyJavaLogisticRegression
 class MyJavaLogisticRegressionModel
   extends ClassificationModel {
 
-  private Vector weights_;
-  public Vector weights() { return weights_; }
+  private Vector coefficients_;
+  public Vector coefficients() { return coefficients_; }
 
-  public MyJavaLogisticRegressionModel(String uid, Vector weights) {
+  public MyJavaLogisticRegressionModel(String uid, Vector coefficients) {
 this.uid_ = uid;
-this.weights_ = weights;
+this.coefficients_ = coefficients;
   }
 
   private String uid_ = Identifiable$.MODULE$.randomUID("myJavaLogReg");
@@ -208,7 +208,7 @@ class MyJavaLogisticRegressionModel
* modifier.
*/
   public Vector predictRaw(Vector features) {
-double margin = BLAS.dot(features, weights_);
+double margin = BLAS.dot(features, coefficients_);
 // There are 2 classes (binary classification), so we return a length-2 
vector,
 // where index i corresponds to class i (i = 0, 1).
 return Vectors.dense(-margin, margin);
@@ -222,7 +222,7 @@ class MyJavaLogisticRegressionModel
   /**
* Number of features the model was trained on.
*/
-  public int numFeatures() { return weights_.size(); }
+  public int numFeatures() { return coefficients_.size(); }
 
   /**
* Create a copy of the model.
@@ -235,7 +235,7 @@ class MyJavaLogisticRegressionModel
*/
   @Override
   public MyJavaLogisticRegressionModel copy(ParamMap extra) {
-return copyValues(new MyJavaLogisticRegressionModel(uid(), weights_), 
extra)
+return copyValues(new MyJavaLogisticRegressionModel(uid(), coefficients_), 
extra)
   .setParent(parent());
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/b24c12d7/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
--
diff --git 
a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
 
b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
index 3758edc..c1f63c6 100644
--- 

spark git commit: [SPARK-12271][SQL] Improve error message when Dataset.as[ ] has incompatible schemas.

2015-12-15 Thread marmbrus
Repository: spark
Updated Branches:
  refs/heads/master b24c12d73 -> 86ea64dd1


[SPARK-12271][SQL] Improve error message when Dataset.as[ ] has incompatible 
schemas.

Author: Nong Li 

Closes #10260 from nongli/spark-11271.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/86ea64dd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/86ea64dd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/86ea64dd

Branch: refs/heads/master
Commit: 86ea64dd146757c8f997d05fb5bb44f6aa58515c
Parents: b24c12d
Author: Nong Li 
Authored: Tue Dec 15 16:55:58 2015 -0800
Committer: Michael Armbrust 
Committed: Tue Dec 15 16:55:58 2015 -0800

--
 .../org/apache/spark/sql/catalyst/ScalaReflection.scala |  2 +-
 .../spark/sql/catalyst/encoders/ExpressionEncoder.scala |  1 +
 .../apache/spark/sql/catalyst/expressions/objects.scala | 12 +++-
 .../test/scala/org/apache/spark/sql/DatasetSuite.scala  | 10 +-
 4 files changed, 18 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/86ea64dd/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index 9013fd0..ecff860 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -184,7 +184,7 @@ object ScalaReflection extends ScalaReflection {
 val TypeRef(_, _, Seq(optType)) = t
 val className = getClassNameFromType(optType)
 val newTypePath = s"""- option value class: "$className +: 
walkedTypePath
-WrapOption(constructorFor(optType, path, newTypePath))
+WrapOption(constructorFor(optType, path, newTypePath), 
dataTypeFor(optType))
 
   case t if t <:< localTypeOf[java.lang.Integer] =>
 val boxedType = classOf[java.lang.Integer]

http://git-wip-us.apache.org/repos/asf/spark/blob/86ea64dd/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index 3e8420e..363178b 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -251,6 +251,7 @@ case class ExpressionEncoder[T](
 
 val plan = Project(Alias(unbound, "")() :: Nil, LocalRelation(schema))
 val analyzedPlan = SimpleAnalyzer.execute(plan)
+SimpleAnalyzer.checkAnalysis(analyzedPlan)
 val optimizedPlan = SimplifyCasts(analyzedPlan)
 
 // In order to construct instances of inner classes (for example those 
declared in a REPL cell),

http://git-wip-us.apache.org/repos/asf/spark/blob/86ea64dd/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
index 96bc4fe..10ec75e 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
@@ -23,11 +23,9 @@ import scala.reflect.ClassTag
 import org.apache.spark.SparkConf
 import org.apache.spark.serializer._
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer
-import org.apache.spark.sql.catalyst.plans.logical.{Project, LocalRelation}
-import org.apache.spark.sql.catalyst.util.GenericArrayData
 import org.apache.spark.sql.catalyst.InternalRow
-import 
org.apache.spark.sql.catalyst.expressions.codegen.{GeneratedExpressionCode, 
CodeGenContext}
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodeGenContext, 
GeneratedExpressionCode}
+import org.apache.spark.sql.catalyst.util.GenericArrayData
 import org.apache.spark.sql.types._
 
 /**
@@ -295,13 +293,17 @@ case class UnwrapOption(
  * Converts the result of evaluating `child` into an option, checking both the 
isNull bit and
  * (in the case of reference types) equality with null.
  * @param child The expression to evaluate and wrap.
+ * @param optType The 

[3/3] spark git commit: Update branch-1.6 for 1.6.0 release

2015-12-15 Thread marmbrus
Update branch-1.6 for 1.6.0 release

Author: Michael Armbrust 

Closes #10317 from marmbrus/versions.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/80d26171
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/80d26171
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/80d26171

Branch: refs/heads/branch-1.6
Commit: 80d261718c1157e5cd4b0ac27e36ef919ea65afa
Parents: 23c8846
Author: Michael Armbrust 
Authored: Tue Dec 15 15:03:33 2015 -0800
Committer: Michael Armbrust 
Committed: Tue Dec 15 15:03:33 2015 -0800

--
 CHANGES.txt | 34172 +
 .../main/scala/org/apache/spark/package.scala   | 2 +-
 dev/create-release/generate-changelist.py   | 4 +-
 docs/_config.yml| 2 +-
 4 files changed, 34176 insertions(+), 4 deletions(-)
--



-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] Git Push Summary

2015-12-15 Thread pwendell
Repository: spark
Updated Tags:  refs/tags/v1.6.0-rc3 [created] 00a39d9c0

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[2/2] spark git commit: Preparing development version 1.6.0-SNAPSHOT

2015-12-15 Thread pwendell
Preparing development version 1.6.0-SNAPSHOT


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/08aa3b47
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/08aa3b47
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/08aa3b47

Branch: refs/heads/branch-1.6
Commit: 08aa3b47e6a295a8297e741effa14cd0d834aea8
Parents: 00a39d9
Author: Patrick Wendell 
Authored: Tue Dec 15 15:10:04 2015 -0800
Committer: Patrick Wendell 
Committed: Tue Dec 15 15:10:04 2015 -0800

--
 assembly/pom.xml| 2 +-
 bagel/pom.xml   | 2 +-
 core/pom.xml| 2 +-
 docker-integration-tests/pom.xml| 2 +-
 examples/pom.xml| 2 +-
 external/flume-assembly/pom.xml | 2 +-
 external/flume-sink/pom.xml | 2 +-
 external/flume/pom.xml  | 2 +-
 external/kafka-assembly/pom.xml | 2 +-
 external/kafka/pom.xml  | 2 +-
 external/mqtt-assembly/pom.xml  | 2 +-
 external/mqtt/pom.xml   | 2 +-
 external/twitter/pom.xml| 2 +-
 external/zeromq/pom.xml | 2 +-
 extras/java8-tests/pom.xml  | 2 +-
 extras/kinesis-asl-assembly/pom.xml | 2 +-
 extras/kinesis-asl/pom.xml  | 2 +-
 extras/spark-ganglia-lgpl/pom.xml   | 2 +-
 graphx/pom.xml  | 2 +-
 launcher/pom.xml| 2 +-
 mllib/pom.xml   | 2 +-
 network/common/pom.xml  | 2 +-
 network/shuffle/pom.xml | 2 +-
 network/yarn/pom.xml| 2 +-
 pom.xml | 2 +-
 repl/pom.xml| 2 +-
 sql/catalyst/pom.xml| 2 +-
 sql/core/pom.xml| 2 +-
 sql/hive-thriftserver/pom.xml   | 2 +-
 sql/hive/pom.xml| 2 +-
 streaming/pom.xml   | 2 +-
 tags/pom.xml| 2 +-
 tools/pom.xml   | 2 +-
 unsafe/pom.xml  | 2 +-
 yarn/pom.xml| 2 +-
 35 files changed, 35 insertions(+), 35 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/08aa3b47/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index fbabaa5..4b60ee0 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.0
+1.6.0-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/08aa3b47/bagel/pom.xml
--
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 1b3e417..672e946 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.0
+1.6.0-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/08aa3b47/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index 15b8d75..61744bb 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.0
+1.6.0-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/08aa3b47/docker-integration-tests/pom.xml
--
diff --git a/docker-integration-tests/pom.xml b/docker-integration-tests/pom.xml
index d579879..39d3f34 100644
--- a/docker-integration-tests/pom.xml
+++ b/docker-integration-tests/pom.xml
@@ -22,7 +22,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.0
+1.6.0-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/08aa3b47/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index 37b15bb..f5ab2a7 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.0
+1.6.0-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/08aa3b47/external/flume-assembly/pom.xml
--
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 295455a..dceedcf 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.6.0
+1.6.0-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/08aa3b47/external/flume-sink/pom.xml
--
diff --git a/external/flume-sink/pom.xml 

[2/3] spark git commit: Update branch-1.6 for 1.6.0 release

2015-12-15 Thread marmbrus
http://git-wip-us.apache.org/repos/asf/spark/blob/80d26171/CHANGES.txt
--
diff --git a/CHANGES.txt b/CHANGES.txt
new file mode 100644
index 000..ff59371
--- /dev/null
+++ b/CHANGES.txt
@@ -0,0 +1,34172 @@
+Spark Change Log
+
+
+Release 1.6.0
+
+  [STREAMING][MINOR] Fix typo in function name of StateImpl
+  jerryshao 
+  2015-12-15 09:41:40 -0800
+  Commit: 23c8846, github.com/apache/spark/pull/10305
+
+  [SPARK-12327] Disable commented code lintr temporarily
+  Shivaram Venkataraman 
+  2015-12-14 16:13:55 -0800
+  Commit: 352a0c8, github.com/apache/spark/pull/10300
+
+  [MINOR][DOC] Fix broken word2vec link
+  BenFradet 
+  2015-12-14 13:50:30 +
+  Commit: c0f0f6c, github.com/apache/spark/pull/10282
+
+  [SPARK-12275][SQL] No plan for BroadcastHint in some condition
+  yucai 
+  2015-12-13 23:08:21 -0800
+  Commit: 94ce502, github.com/apache/spark/pull/10265
+
+  [SPARK-12281][CORE] Fix a race condition when reporting ExecutorState in the 
shutdown hook
+  Shixiong Zhu 
+  2015-12-13 22:06:39 -0800
+  Commit: fbf16da, github.com/apache/spark/pull/10269
+
+  [SPARK-12267][CORE] Store the remote RpcEnv address to send the correct 
disconnetion message
+  Shixiong Zhu 
+  2015-12-12 21:58:55 -0800
+  Commit: d7e3bfd, github.com/apache/spark/pull/10261
+
+  [SPARK-12199][DOC] Follow-up: Refine example code in ml-features.md
+  Xusen Yin 
+  2015-12-12 17:47:01 -0800
+  Commit: e05364b, github.com/apache/spark/pull/10193
+
+  [SPARK-11193] Use Java ConcurrentHashMap instead of SynchronizedMap trait in 
order to avoid ClassCastException due to KryoSerializer in KinesisReceiver
+  Jean-Baptiste Onofré 
+  2015-12-12 08:51:52 +
+  Commit: 2679fce, github.com/apache/spark/pull/10203
+
+  [SPARK-12158][SPARKR][SQL] Fix 'sample' functions that break R unit test 
cases
+  gatorsmile 
+  2015-12-11 20:55:16 -0800
+  Commit: 47461fe, github.com/apache/spark/pull/10160
+
+  [SPARK-12298][SQL] Fix infinite loop in DataFrame.sortWithinPartitions
+  Ankur Dave 
+  2015-12-11 19:07:48 -0800
+  Commit: 03d8015, github.com/apache/spark/pull/10271
+
+  [SPARK-11978][ML] Move dataset_example.py to examples/ml and rename to 
dataframe_example.py
+  Yanbo Liang 
+  2015-12-11 18:02:24 -0800
+  Commit: c2f2046, github.com/apache/spark/pull/9957
+
+  [SPARK-12217][ML] Document invalid handling for StringIndexer
+  BenFradet 
+  2015-12-11 15:43:00 -0800
+  Commit: 75531c7, github.com/apache/spark/pull/10257
+
+  [SPARK-11497][MLLIB][PYTHON] PySpark RowMatrix Constructor Has Type Erasure 
Issue
+  Mike Dusenberry 
+  2015-12-11 14:21:33 -0800
+  Commit: bfcc8cf, github.com/apache/spark/pull/9458
+
+  [SPARK-11964][DOCS][ML] Add in Pipeline Import/Export Documentation
+  anabranch , Bill Chambers 

+  2015-12-11 12:55:56 -0800
+  Commit: 2ddd104, github.com/apache/spark/pull/10179
+
+  [SPARK-12146][SPARKR] SparkR jsonFile should support multiple input files
+  Yanbo Liang 
+  2015-12-11 11:47:35 -0800
+  Commit: f05bae4, github.com/apache/spark/pull/10145
+
+  Preparing development version 1.6.0-SNAPSHOT
+  Patrick Wendell 
+  2015-12-11 11:25:09 -0800
+  Commit: 2e45231
+
+  Preparing Spark release v1.6.0-rc2
+  Patrick Wendell 
+  2015-12-11 11:25:03 -0800
+  Commit: 23f8dfd
+
+  [SPARK-12258] [SQL] passing null into ScalaUDF (follow-up)
+  Davies Liu 
+  2015-12-11 11:15:53 -0800
+  Commit: eec3660, github.com/apache/spark/pull/10266
+
+  Preparing development version 1.6.0-SNAPSHOT
+  Patrick Wendell 
+  2015-12-10 18:45:42 -0800
+  Commit: 250249e
+
+  Preparing Spark release v1.6.0-rc2
+  Patrick Wendell 
+  2015-12-10 18:45:36 -0800
+  Commit: 3e39925
+
+  [SPARK-12258][SQL] passing null into ScalaUDF
+  Davies Liu 
+  2015-12-10 17:22:18 -0800
+  Commit: d09af2c, github.com/apache/spark/pull/10249
+
+  [STREAMING][DOC][MINOR] Update the description of direct Kafka stream doc
+  jerryshao 
+  2015-12-10 15:31:46 -0800
+  Commit: 5d3722f, github.com/apache/spark/pull/10246
+
+  [SPARK-12155][SPARK-12253] Fix executor OOM in unified memory management
+  Andrew Or 
+  2015-12-10 15:30:08 -0800
+  Commit: c247b6a, github.com/apache/spark/pull/10240
+
+  [SPARK-12251] Document and improve off-heap memory configurations
+  Josh Rosen 
+  2015-12-10 15:29:04 -0800
+  Commit: 9870e5c, github.com/apache/spark/pull/10237
+
+  [SPARK-12212][ML][DOC] Clarifies the difference 

[1/3] spark git commit: Update branch-1.6 for 1.6.0 release

2015-12-15 Thread marmbrus
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 23c884605 -> 80d261718


http://git-wip-us.apache.org/repos/asf/spark/blob/80d26171/core/src/main/scala/org/apache/spark/package.scala
--
diff --git a/core/src/main/scala/org/apache/spark/package.scala 
b/core/src/main/scala/org/apache/spark/package.scala
index 7515aad..e37bf09 100644
--- a/core/src/main/scala/org/apache/spark/package.scala
+++ b/core/src/main/scala/org/apache/spark/package.scala
@@ -43,5 +43,5 @@ package org.apache
 
 package object spark {
   // For package docs only
-  val SPARK_VERSION = "1.6.0-SNAPSHOT"
+  val SPARK_VERSION = "1.6.0"
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/80d26171/dev/create-release/generate-changelist.py
--
diff --git a/dev/create-release/generate-changelist.py 
b/dev/create-release/generate-changelist.py
index 2e1a35a..9798158 100755
--- a/dev/create-release/generate-changelist.py
+++ b/dev/create-release/generate-changelist.py
@@ -31,8 +31,8 @@ import time
 import traceback
 
 SPARK_HOME = os.environ["SPARK_HOME"]
-NEW_RELEASE_VERSION = "1.0.0"
-PREV_RELEASE_GIT_TAG = "v0.9.1"
+NEW_RELEASE_VERSION = "1.6.0"
+PREV_RELEASE_GIT_TAG = "v1.5.2"
 
 CHANGELIST = "CHANGES.txt"
 OLD_CHANGELIST = "%s.old" % (CHANGELIST)

http://git-wip-us.apache.org/repos/asf/spark/blob/80d26171/docs/_config.yml
--
diff --git a/docs/_config.yml b/docs/_config.yml
index 2c70b76..e5fde18 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,7 +14,7 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 1.6.0-SNAPSHOT
+SPARK_VERSION: 1.6.0
 SPARK_VERSION_SHORT: 1.6.0
 SCALA_BINARY_VERSION: "2.10"
 SCALA_VERSION: "2.10.5"


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-12236][SQL] JDBC filter tests all pass if filters are not really pushed down

2015-12-15 Thread marmbrus
Repository: spark
Updated Branches:
  refs/heads/master 86ea64dd1 -> 28112657e


[SPARK-12236][SQL] JDBC filter tests all pass if filters are not really pushed 
down

https://issues.apache.org/jira/browse/SPARK-12236
Currently JDBC filters are not tested properly. All the tests pass even if the 
filters are not pushed down due to Spark-side filtering.

In this PR,
Firstly, I corrected the tests to properly check the pushed down filters by 
removing Spark-side filtering.
Also, `!=` was being tested which is actually not pushed down. So I removed 
them.
Lastly, I moved the `stripSparkFilter()` function to `SQLTestUtils` as this 
functions would be shared for all tests for pushed down filters. This function 
would be also shared with ORC datasource as the filters for that are also not 
being tested properly.

Author: hyukjinkwon 

Closes #10221 from HyukjinKwon/SPARK-12236.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/28112657
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/28112657
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/28112657

Branch: refs/heads/master
Commit: 28112657ea5919451291c21b4b8e1eb3db0ec8d4
Parents: 86ea64d
Author: hyukjinkwon 
Authored: Tue Dec 15 17:02:14 2015 -0800
Committer: Michael Armbrust 
Committed: Tue Dec 15 17:02:14 2015 -0800

--
 .../datasources/parquet/ParquetFilterSuite.scala | 15 ---
 .../scala/org/apache/spark/sql/jdbc/JDBCSuite.scala  | 10 --
 .../org/apache/spark/sql/test/SQLTestUtils.scala | 15 +++
 3 files changed, 19 insertions(+), 21 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/28112657/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index daf41bc..6178e37 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -110,21 +110,6 @@ class ParquetFilterSuite extends QueryTest with 
ParquetTest with SharedSQLContex
 checkBinaryFilterPredicate(predicate, filterClass, Seq(Row(expected)))(df)
   }
 
-  /**
-   * Strip Spark-side filtering in order to check if a datasource filters rows 
correctly.
-   */
-  protected def stripSparkFilter(df: DataFrame): DataFrame = {
-val schema = df.schema
-val childRDD = df
-  .queryExecution
-  .executedPlan.asInstanceOf[org.apache.spark.sql.execution.Filter]
-  .child
-  .execute()
-  .map(row => Row.fromSeq(row.toSeq(schema)))
-
-sqlContext.createDataFrame(childRDD, schema)
-  }
-
   test("filter pushdown - boolean") {
 withParquetDataFrame((true :: false :: Nil).map(b => 
Tuple1.apply(Option(b { implicit df =>
   checkFilterPredicate('_1.isNull, classOf[Eq[_]], Seq.empty[Row])

http://git-wip-us.apache.org/repos/asf/spark/blob/28112657/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
--
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 8c24aa3..a360947 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -176,12 +176,10 @@ class JDBCSuite extends SparkFunSuite with BeforeAndAfter 
with SharedSQLContext
   }
 
   test("SELECT * WHERE (simple predicates)") {
-assert(sql("SELECT * FROM foobar WHERE THEID < 1").collect().size === 0)
-assert(sql("SELECT * FROM foobar WHERE THEID != 2").collect().size === 2)
-assert(sql("SELECT * FROM foobar WHERE THEID = 1").collect().size === 1)
-assert(sql("SELECT * FROM foobar WHERE NAME = 'fred'").collect().size === 
1)
-assert(sql("SELECT * FROM foobar WHERE NAME > 'fred'").collect().size === 
2)
-assert(sql("SELECT * FROM foobar WHERE NAME != 'fred'").collect().size === 
2)
+assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID < 
1")).collect().size === 0)
+assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID = 
1")).collect().size === 1)
+assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME = 
'fred'")).collect().size === 1)
+assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME > 
'fred'")).collect().size === 2)
   }
 
   test("SELECT * WHERE 

spark git commit: [SPARK-12304][STREAMING] Make Spark Streaming web UI display more fri…

2015-12-15 Thread zsxwing
Repository: spark
Updated Branches:
  refs/heads/master ca0690b5e -> d52bf47e1


[SPARK-12304][STREAMING] Make Spark Streaming web UI display more fri…

…endly Receiver graphs

Currently, the Spark Streaming web UI uses the same maxY when displays 'Input 
Rate Times& Histograms' and 'Per-Receiver Times& Histograms'.

This may lead to somewhat un-friendly graphs: once we have tens of Receivers or 
more, every 'Per-Receiver Times' line almost hits the ground.

This issue proposes to calculate a new maxY against the original one, which is 
shared among all the `Per-Receiver Times& Histograms' graphs.

Before:
![before-5](https://cloud.githubusercontent.com/assets/15843379/11761362/d790c356-a0fa-11e5-860e-4b834603de1d.png)

After:
![after-5](https://cloud.githubusercontent.com/assets/15843379/11761361/cfabf692-a0fa-11e5-97d0-4ad124aaca2a.png)

Author: proflin 

Closes #10318 from proflin/SPARK-12304.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d52bf47e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d52bf47e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d52bf47e

Branch: refs/heads/master
Commit: d52bf47e13e0186590437f71040100d2f6f11da9
Parents: ca0690b
Author: proflin 
Authored: Tue Dec 15 20:22:56 2015 -0800
Committer: Shixiong Zhu 
Committed: Tue Dec 15 20:22:56 2015 -0800

--
 .../scala/org/apache/spark/streaming/ui/StreamingPage.scala  | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d52bf47e/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
--
diff --git 
a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala 
b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
index 88a4483..b3692c3 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
@@ -392,9 +392,15 @@ private[ui] class StreamingPage(parent: StreamingTab)
   maxX: Long,
   minY: Double,
   maxY: Double): Seq[Node] = {
+val maxYCalculated = listener.receivedEventRateWithBatchTime.values
+  .flatMap { case streamAndRates => streamAndRates.map { case (_, 
eventRate) => eventRate } }
+  .reduceOption[Double](math.max)
+  .map(_.ceil.toLong)
+  .getOrElse(0L)
+
 val content = 
listener.receivedEventRateWithBatchTime.toList.sortBy(_._1).map {
   case (streamId, eventRates) =>
-generateInputDStreamRow(jsCollector, streamId, eventRates, minX, maxX, 
minY, maxY)
+generateInputDStreamRow(jsCollector, streamId, eventRates, minX, maxX, 
minY, maxYCalculated)
 }.foldLeft[Seq[Node]](Nil)(_ ++ _)
 
 // scalastyle:off


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [HOTFIX] Compile error from commit 31b3910

2015-12-15 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/master 31b391019 -> 840bd2e00


[HOTFIX] Compile error from commit 31b3910


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/840bd2e0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/840bd2e0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/840bd2e0

Branch: refs/heads/master
Commit: 840bd2e008da5b22bfa73c587ea2c57666fffc60
Parents: 31b3910
Author: Andrew Or 
Authored: Tue Dec 15 18:11:53 2015 -0800
Committer: Andrew Or 
Committed: Tue Dec 15 18:11:53 2015 -0800

--
 sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/840bd2e0/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index b69d441..33b03be 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -234,7 +234,7 @@ class DataFrame private[sql](
 // For Data that has more than "_numRows" records
 if (hasMoreData) {
   val rowsString = if (_numRows == 1) "row" else "rows"
-  sb.append(s"only showing top $_numRows $rowsString\n")
+  sb.append(s"only showing top ${_numRows} $rowsString\n")
 }
 
 sb.toString()


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-12130] Replace shuffleManagerClass with shortShuffleMgrNames in ExternalShuffleBlockResolver

2015-12-15 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/master f725b2ec1 -> 369127f03


[SPARK-12130] Replace shuffleManagerClass with shortShuffleMgrNames in 
ExternalShuffleBlockResolver

Replace shuffleManagerClassName with shortShuffleMgrName is  to reduce time of 
string's comparison. and put sort's comparison on the front. cc JoshRosen 
andrewor14

Author: Lianhui Wang 

Closes #10131 from lianhuiwang/spark-12130.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/369127f0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/369127f0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/369127f0

Branch: refs/heads/master
Commit: 369127f03257e7081d2aa1fc445e773b26f0d5e3
Parents: f725b2e
Author: Lianhui Wang 
Authored: Tue Dec 15 18:16:22 2015 -0800
Committer: Andrew Or 
Committed: Tue Dec 15 18:17:48 2015 -0800

--
 .../main/scala/org/apache/spark/shuffle/ShuffleManager.scala  | 4 
 .../org/apache/spark/shuffle/hash/HashShuffleManager.scala| 2 ++
 .../org/apache/spark/shuffle/sort/SortShuffleManager.scala| 2 ++
 .../main/scala/org/apache/spark/storage/BlockManager.scala| 2 +-
 .../spark/network/shuffle/ExternalShuffleBlockResolver.java   | 7 +++
 .../org/apache/spark/network/sasl/SaslIntegrationSuite.java   | 3 +--
 .../network/shuffle/ExternalShuffleBlockResolverSuite.java| 6 +++---
 .../network/shuffle/ExternalShuffleIntegrationSuite.java  | 4 ++--
 8 files changed, 18 insertions(+), 12 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/369127f0/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
--
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala 
b/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
index 978366d..a3444bf 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleManager.scala
@@ -28,6 +28,10 @@ import org.apache.spark.{TaskContext, ShuffleDependency}
  * boolean isDriver as parameters.
  */
 private[spark] trait ShuffleManager {
+
+  /** Return short name for the ShuffleManager */
+  val shortName: String
+
   /**
* Register a shuffle with the manager and obtain a handle for it to pass to 
tasks.
*/

http://git-wip-us.apache.org/repos/asf/spark/blob/369127f0/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala 
b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala
index d2e2fc4..4f30da0 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/hash/HashShuffleManager.scala
@@ -34,6 +34,8 @@ private[spark] class HashShuffleManager(conf: SparkConf) 
extends ShuffleManager
 
   private val fileShuffleBlockResolver = new FileShuffleBlockResolver(conf)
 
+  override val shortName: String = "hash"
+
   /* Register a shuffle with the manager and obtain a handle for it to pass to 
tasks. */
   override def registerShuffle[K, V, C](
   shuffleId: Int,

http://git-wip-us.apache.org/repos/asf/spark/blob/369127f0/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala 
b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
index 66b6bbc..9b1a279 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleManager.scala
@@ -79,6 +79,8 @@ private[spark] class SortShuffleManager(conf: SparkConf) 
extends ShuffleManager
*/
   private[this] val numMapsForShuffle = new ConcurrentHashMap[Int, Int]()
 
+  override val shortName: String = "sort"
+
   override val shuffleBlockResolver = new IndexShuffleBlockResolver(conf)
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/369127f0/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
--
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala 
b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index ed05143..540e1ec 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -200,7 +200,7 @@ private[spark] class BlockManager(
 val 

spark git commit: [SPARK-4117][YARN] Spark on Yarn handle AM being told command from RM

2015-12-15 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/master a89e8b612 -> ca0690b5e


[SPARK-4117][YARN] Spark on Yarn handle AM being told command from RM

Spark on Yarn handle AM being told command from RM

When RM throws ApplicationAttemptNotFoundException for allocate
invocation, making the ApplicationMaster to finish immediately without any
retries.

Author: Devaraj K 

Closes #10129 from devaraj-kavali/SPARK-4117.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ca0690b5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ca0690b5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ca0690b5

Branch: refs/heads/master
Commit: ca0690b5ef10b14ce57a0c30d5308eb02f163f39
Parents: a89e8b6
Author: Devaraj K 
Authored: Tue Dec 15 18:30:59 2015 -0800
Committer: Andrew Or 
Committed: Tue Dec 15 18:30:59 2015 -0800

--
 .../org/apache/spark/deploy/yarn/ApplicationMaster.scala| 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ca0690b5/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
--
diff --git 
a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala 
b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 1970f7d..fc742df 100644
--- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -376,7 +376,14 @@ private[spark] class ApplicationMaster(
 case i: InterruptedException =>
 case e: Throwable => {
   failureCount += 1
-  if (!NonFatal(e) || failureCount >= reporterMaxFailures) {
+  // this exception was introduced in hadoop 2.4 and this code 
would not compile
+  // with earlier versions if we refer it directly.
+  if 
("org.apache.hadoop.yarn.exceptions.ApplicationAttemptNotFoundException" ==
+e.getClass().getName()) {
+logError("Exception from Reporter thread.", e)
+finish(FinalApplicationStatus.FAILED, 
ApplicationMaster.EXIT_REPORTER_FAILURE,
+  e.getMessage)
+  } else if (!NonFatal(e) || failureCount >= reporterMaxFailures) {
 finish(FinalApplicationStatus.FAILED,
   ApplicationMaster.EXIT_REPORTER_FAILURE, "Exception was 
thrown " +
 s"$failureCount time(s) from Reporter thread.")


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-12056][CORE] Part 2 Create a TaskAttemptContext only after calling setConf

2015-12-15 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 08aa3b47e -> 9e4ac5645


[SPARK-12056][CORE] Part 2 Create a TaskAttemptContext only after calling 
setConf

This is continuation of SPARK-12056 where change is applied to 
SqlNewHadoopRDD.scala

andrewor14
FYI

Author: tedyu 

Closes #10164 from tedyu/master.

(cherry picked from commit f725b2ec1ab0d89e35b5e2d3ddeddb79fec85f6d)
Signed-off-by: Andrew Or 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9e4ac564
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9e4ac564
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9e4ac564

Branch: refs/heads/branch-1.6
Commit: 9e4ac56452710ddd8efb695e69c8de49317e3f28
Parents: 08aa3b4
Author: tedyu 
Authored: Tue Dec 15 18:15:10 2015 -0800
Committer: Andrew Or 
Committed: Tue Dec 15 18:15:53 2015 -0800

--
 .../apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9e4ac564/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala
index 56cb63d..eea780c 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala
@@ -148,14 +148,14 @@ private[spark] class SqlNewHadoopRDD[V: ClassTag](
   }
   inputMetrics.setBytesReadCallback(bytesReadCallback)
 
-  val attemptId = newTaskAttemptID(jobTrackerId, id, isMap = true, 
split.index, 0)
-  val hadoopAttemptContext = newTaskAttemptContext(conf, attemptId)
   val format = inputFormatClass.newInstance
   format match {
 case configurable: Configurable =>
   configurable.setConf(conf)
 case _ =>
   }
+  val attemptId = newTaskAttemptID(jobTrackerId, id, isMap = true, 
split.index, 0)
+  val hadoopAttemptContext = newTaskAttemptContext(conf, attemptId)
   private[this] var reader: RecordReader[Void, V] = null
 
   /**


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-10477][SQL] using DSL in ColumnPruningSuite to improve readability

2015-12-15 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/master c5b6b398d -> a89e8b612


[SPARK-10477][SQL] using DSL in ColumnPruningSuite to improve readability

Author: Wenchen Fan 

Closes #8645 from cloud-fan/test.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a89e8b61
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a89e8b61
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a89e8b61

Branch: refs/heads/master
Commit: a89e8b6122ee5a1517fbcf405b1686619db56696
Parents: c5b6b39
Author: Wenchen Fan 
Authored: Tue Dec 15 18:29:19 2015 -0800
Committer: Andrew Or 
Committed: Tue Dec 15 18:29:19 2015 -0800

--
 .../apache/spark/sql/catalyst/dsl/package.scala |  7 ++--
 .../catalyst/optimizer/ColumnPruningSuite.scala | 41 +++-
 2 files changed, 27 insertions(+), 21 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a89e8b61/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index af594c2..e509711 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -275,13 +275,14 @@ package object dsl {
 
   def unionAll(otherPlan: LogicalPlan): LogicalPlan = Union(logicalPlan, 
otherPlan)
 
-  // TODO specify the output column names
   def generate(
 generator: Generator,
 join: Boolean = false,
 outer: Boolean = false,
-alias: Option[String] = None): LogicalPlan =
-Generate(generator, join = join, outer = outer, alias, Nil, 
logicalPlan)
+alias: Option[String] = None,
+outputNames: Seq[String] = Nil): LogicalPlan =
+Generate(generator, join = join, outer = outer, alias,
+  outputNames.map(UnresolvedAttribute(_)), logicalPlan)
 
   def insertInto(tableName: String, overwrite: Boolean = false): 
LogicalPlan =
 InsertIntoTable(

http://git-wip-us.apache.org/repos/asf/spark/blob/a89e8b61/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
index 4a1e7ce..9bf61ae 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.sql.catalyst.expressions.Explode
 import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.catalyst.plans.logical.{Project, LocalRelation, 
Generate, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
@@ -35,12 +35,11 @@ class ColumnPruningSuite extends PlanTest {
   test("Column pruning for Generate when Generate.join = false") {
 val input = LocalRelation('a.int, 'b.array(StringType))
 
-val query = Generate(Explode('b), false, false, None, 's.string :: Nil, 
input).analyze
+val query = input.generate(Explode('b), join = false).analyze
+
 val optimized = Optimize.execute(query)
 
-val correctAnswer =
-  Generate(Explode('b), false, false, None, 's.string :: Nil,
-Project('b.attr :: Nil, input)).analyze
+val correctAnswer = input.select('b).generate(Explode('b), join = 
false).analyze
 
 comparePlans(optimized, correctAnswer)
   }
@@ -49,16 +48,19 @@ class ColumnPruningSuite extends PlanTest {
 val input = LocalRelation('a.int, 'b.int, 'c.array(StringType))
 
 val query =
-  Project(Seq('a, 's),
-Generate(Explode('c), true, false, None, 's.string :: Nil,
-  input)).analyze
+  input
+.generate(Explode('c), join = true, outputNames = "explode" :: Nil)
+.select('a, 'explode)
+.analyze
+
 val optimized = Optimize.execute(query)
 
 val correctAnswer =
-  Project(Seq('a, 's),
-Generate(Explode('c), true, false, None, 's.string :: Nil,
-  Project(Seq('a, 'c),
-input))).analyze
+  input
+.select('a, 'c)
+

spark git commit: [SPARK-12062][CORE] Change Master to asyc rebuild UI when application completes

2015-12-15 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 8e9a60031 -> 93095eb29


[SPARK-12062][CORE] Change Master to asyc rebuild UI when application completes

This change builds the event history of completed apps asynchronously so the 
RPC thread will not be blocked and allow new workers to register/remove if the 
event log history is very large and takes a long time to rebuild.

Author: Bryan Cutler 

Closes #10284 from BryanCutler/async-MasterUI-SPARK-12062.

(cherry picked from commit c5b6b398d5e368626e589feede80355fb74c2bd8)
Signed-off-by: Andrew Or 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/93095eb2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/93095eb2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/93095eb2

Branch: refs/heads/branch-1.6
Commit: 93095eb29a1e59dbdbf6220bfa732b502330e6ae
Parents: 8e9a600
Author: Bryan Cutler 
Authored: Tue Dec 15 18:28:16 2015 -0800
Committer: Andrew Or 
Committed: Tue Dec 15 18:28:26 2015 -0800

--
 .../org/apache/spark/deploy/master/Master.scala | 79 +---
 .../spark/deploy/master/MasterMessages.scala|  2 +
 2 files changed, 52 insertions(+), 29 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/93095eb2/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala 
b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 1355e1a..fc42bf0 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -21,9 +21,11 @@ import java.io.FileNotFoundException
 import java.net.URLEncoder
 import java.text.SimpleDateFormat
 import java.util.Date
-import java.util.concurrent.{ScheduledFuture, TimeUnit}
+import java.util.concurrent.{ConcurrentHashMap, ScheduledFuture, TimeUnit}
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
+import scala.concurrent.duration.Duration
+import scala.concurrent.{Await, ExecutionContext, Future}
 import scala.language.postfixOps
 import scala.util.Random
 
@@ -56,6 +58,10 @@ private[deploy] class Master(
   private val forwardMessageThread =
 
ThreadUtils.newDaemonSingleThreadScheduledExecutor("master-forward-message-thread")
 
+  private val rebuildUIThread =
+ThreadUtils.newDaemonSingleThreadExecutor("master-rebuild-ui-thread")
+  private val rebuildUIContext = ExecutionContext.fromExecutor(rebuildUIThread)
+
   private val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
 
   private def createDateFormat = new SimpleDateFormat("MMddHHmmss") // For 
application IDs
@@ -78,7 +84,8 @@ private[deploy] class Master(
   private val addressToApp = new HashMap[RpcAddress, ApplicationInfo]
   private val completedApps = new ArrayBuffer[ApplicationInfo]
   private var nextAppNumber = 0
-  private val appIdToUI = new HashMap[String, SparkUI]
+  // Using ConcurrentHashMap so that master-rebuild-ui-thread can add a UI 
after asyncRebuildUI
+  private val appIdToUI = new ConcurrentHashMap[String, SparkUI]
 
   private val drivers = new HashSet[DriverInfo]
   private val completedDrivers = new ArrayBuffer[DriverInfo]
@@ -191,6 +198,7 @@ private[deploy] class Master(
   checkForWorkerTimeOutTask.cancel(true)
 }
 forwardMessageThread.shutdownNow()
+rebuildUIThread.shutdownNow()
 webUi.stop()
 restServer.foreach(_.stop())
 masterMetricsSystem.stop()
@@ -367,6 +375,10 @@ private[deploy] class Master(
 case CheckForWorkerTimeOut => {
   timeOutDeadWorkers()
 }
+
+case AttachCompletedRebuildUI(appId) =>
+  // An asyncRebuildSparkUI has completed, so need to attach to master 
webUi
+  Option(appIdToUI.get(appId)).foreach { ui => webUi.attachSparkUI(ui) }
   }
 
   override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, 
Unit] = {
@@ -809,7 +821,7 @@ private[deploy] class Master(
   if (completedApps.size >= RETAINED_APPLICATIONS) {
 val toRemove = math.max(RETAINED_APPLICATIONS / 10, 1)
 completedApps.take(toRemove).foreach( a => {
-  appIdToUI.remove(a.id).foreach { ui => webUi.detachSparkUI(ui) }
+  Option(appIdToUI.remove(a.id)).foreach { ui => 
webUi.detachSparkUI(ui) }
   applicationMetricsSystem.removeSource(a.appSource)
 })
 completedApps.trimStart(toRemove)
@@ -818,7 +830,7 @@ private[deploy] class Master(
   waitingApps -= app
 
   // If application events are logged, use them to rebuild the UI
-  rebuildSparkUI(app)
+  asyncRebuildSparkUI(app)
 
   for (exec <- 

spark git commit: [SPARK-12062][CORE] Change Master to asyc rebuild UI when application completes

2015-12-15 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/master 8a215d233 -> c5b6b398d


[SPARK-12062][CORE] Change Master to asyc rebuild UI when application completes

This change builds the event history of completed apps asynchronously so the 
RPC thread will not be blocked and allow new workers to register/remove if the 
event log history is very large and takes a long time to rebuild.

Author: Bryan Cutler 

Closes #10284 from BryanCutler/async-MasterUI-SPARK-12062.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c5b6b398
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c5b6b398
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c5b6b398

Branch: refs/heads/master
Commit: c5b6b398d5e368626e589feede80355fb74c2bd8
Parents: 8a215d2
Author: Bryan Cutler 
Authored: Tue Dec 15 18:28:16 2015 -0800
Committer: Andrew Or 
Committed: Tue Dec 15 18:28:16 2015 -0800

--
 .../org/apache/spark/deploy/master/Master.scala | 79 +---
 .../spark/deploy/master/MasterMessages.scala|  2 +
 2 files changed, 52 insertions(+), 29 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c5b6b398/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
--
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala 
b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 1355e1a..fc42bf0 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -21,9 +21,11 @@ import java.io.FileNotFoundException
 import java.net.URLEncoder
 import java.text.SimpleDateFormat
 import java.util.Date
-import java.util.concurrent.{ScheduledFuture, TimeUnit}
+import java.util.concurrent.{ConcurrentHashMap, ScheduledFuture, TimeUnit}
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
+import scala.concurrent.duration.Duration
+import scala.concurrent.{Await, ExecutionContext, Future}
 import scala.language.postfixOps
 import scala.util.Random
 
@@ -56,6 +58,10 @@ private[deploy] class Master(
   private val forwardMessageThread =
 
ThreadUtils.newDaemonSingleThreadScheduledExecutor("master-forward-message-thread")
 
+  private val rebuildUIThread =
+ThreadUtils.newDaemonSingleThreadExecutor("master-rebuild-ui-thread")
+  private val rebuildUIContext = ExecutionContext.fromExecutor(rebuildUIThread)
+
   private val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
 
   private def createDateFormat = new SimpleDateFormat("MMddHHmmss") // For 
application IDs
@@ -78,7 +84,8 @@ private[deploy] class Master(
   private val addressToApp = new HashMap[RpcAddress, ApplicationInfo]
   private val completedApps = new ArrayBuffer[ApplicationInfo]
   private var nextAppNumber = 0
-  private val appIdToUI = new HashMap[String, SparkUI]
+  // Using ConcurrentHashMap so that master-rebuild-ui-thread can add a UI 
after asyncRebuildUI
+  private val appIdToUI = new ConcurrentHashMap[String, SparkUI]
 
   private val drivers = new HashSet[DriverInfo]
   private val completedDrivers = new ArrayBuffer[DriverInfo]
@@ -191,6 +198,7 @@ private[deploy] class Master(
   checkForWorkerTimeOutTask.cancel(true)
 }
 forwardMessageThread.shutdownNow()
+rebuildUIThread.shutdownNow()
 webUi.stop()
 restServer.foreach(_.stop())
 masterMetricsSystem.stop()
@@ -367,6 +375,10 @@ private[deploy] class Master(
 case CheckForWorkerTimeOut => {
   timeOutDeadWorkers()
 }
+
+case AttachCompletedRebuildUI(appId) =>
+  // An asyncRebuildSparkUI has completed, so need to attach to master 
webUi
+  Option(appIdToUI.get(appId)).foreach { ui => webUi.attachSparkUI(ui) }
   }
 
   override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, 
Unit] = {
@@ -809,7 +821,7 @@ private[deploy] class Master(
   if (completedApps.size >= RETAINED_APPLICATIONS) {
 val toRemove = math.max(RETAINED_APPLICATIONS / 10, 1)
 completedApps.take(toRemove).foreach( a => {
-  appIdToUI.remove(a.id).foreach { ui => webUi.detachSparkUI(ui) }
+  Option(appIdToUI.remove(a.id)).foreach { ui => 
webUi.detachSparkUI(ui) }
   applicationMetricsSystem.removeSource(a.appSource)
 })
 completedApps.trimStart(toRemove)
@@ -818,7 +830,7 @@ private[deploy] class Master(
   waitingApps -= app
 
   // If application events are logged, use them to rebuild the UI
-  rebuildSparkUI(app)
+  asyncRebuildSparkUI(app)
 
   for (exec <- app.executors.values) {
 killExecutor(exec)
@@ -923,49 +935,57 @@ private[deploy] class Master(
* Return the UI if 

spark git commit: [SPARK-12056][CORE] Part 2 Create a TaskAttemptContext only after calling setConf

2015-12-15 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/master 840bd2e00 -> f725b2ec1


[SPARK-12056][CORE] Part 2 Create a TaskAttemptContext only after calling 
setConf

This is continuation of SPARK-12056 where change is applied to 
SqlNewHadoopRDD.scala

andrewor14
FYI

Author: tedyu 

Closes #10164 from tedyu/master.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f725b2ec
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f725b2ec
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f725b2ec

Branch: refs/heads/master
Commit: f725b2ec1ab0d89e35b5e2d3ddeddb79fec85f6d
Parents: 840bd2e
Author: tedyu 
Authored: Tue Dec 15 18:15:10 2015 -0800
Committer: Andrew Or 
Committed: Tue Dec 15 18:15:10 2015 -0800

--
 .../apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/f725b2ec/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala
index 56cb63d..eea780c 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala
@@ -148,14 +148,14 @@ private[spark] class SqlNewHadoopRDD[V: ClassTag](
   }
   inputMetrics.setBytesReadCallback(bytesReadCallback)
 
-  val attemptId = newTaskAttemptID(jobTrackerId, id, isMap = true, 
split.index, 0)
-  val hadoopAttemptContext = newTaskAttemptContext(conf, attemptId)
   val format = inputFormatClass.newInstance
   format match {
 case configurable: Configurable =>
   configurable.setConf(conf)
 case _ =>
   }
+  val attemptId = newTaskAttemptID(jobTrackerId, id, isMap = true, 
split.index, 0)
+  val hadoopAttemptContext = newTaskAttemptContext(conf, attemptId)
   private[this] var reader: RecordReader[Void, V] = null
 
   /**


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-9516][UI] Improvement of Thread Dump Page

2015-12-15 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/master c2de99a7c -> a63d9edcf


[SPARK-9516][UI] Improvement of Thread Dump Page

https://issues.apache.org/jira/browse/SPARK-9516

- [x] new look of Thread Dump Page

- [x] click column title to sort

- [x] grep

- [x] search as you type

squito JoshRosen It's ready for the review now

Author: CodingCat 

Closes #7910 from CodingCat/SPARK-9516.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a63d9edc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a63d9edc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a63d9edc

Branch: refs/heads/master
Commit: a63d9edcfb8a714a17492517927aa114dea8fea0
Parents: c2de99a
Author: CodingCat 
Authored: Tue Dec 15 18:21:00 2015 -0800
Committer: Andrew Or 
Committed: Tue Dec 15 18:21:00 2015 -0800

--
 .../org/apache/spark/ui/static/sorttable.js |  6 +-
 .../org/apache/spark/ui/static/table.js | 72 +++
 .../org/apache/spark/ui/static/webui.css| 10 ++-
 .../spark/ui/exec/ExecutorThreadDumpPage.scala  | 73 +++-
 4 files changed, 118 insertions(+), 43 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a63d9edc/core/src/main/resources/org/apache/spark/ui/static/sorttable.js
--
diff --git a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js 
b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js
index a73d9a5..ff24147 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js
@@ -169,7 +169,7 @@ sorttable = {
 for (var i=0; i

spark git commit: [SPARK-12351][MESOS] Add documentation about submitting Spark with mesos cluster mode.

2015-12-15 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 9e4ac5645 -> 2c324d35a


[SPARK-12351][MESOS] Add documentation about submitting Spark with mesos 
cluster mode.

Adding more documentation about submitting jobs with mesos cluster mode.

Author: Timothy Chen 

Closes #10086 from tnachen/mesos_supervise_docs.

(cherry picked from commit c2de99a7c3a52b0da96517c7056d2733ef45495f)
Signed-off-by: Andrew Or 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2c324d35
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2c324d35
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2c324d35

Branch: refs/heads/branch-1.6
Commit: 2c324d35a698b353c2193e2f9bd8ba08c741c548
Parents: 9e4ac56
Author: Timothy Chen 
Authored: Tue Dec 15 18:20:00 2015 -0800
Committer: Andrew Or 
Committed: Tue Dec 15 18:20:09 2015 -0800

--
 docs/running-on-mesos.md| 26 +-
 docs/submitting-applications.md | 15 ++-
 2 files changed, 35 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2c324d35/docs/running-on-mesos.md
--
diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index a197d0e..3193e17 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -150,14 +150,30 @@ it does not need to be redundantly passed in as a system 
property.
 Spark on Mesos also supports cluster mode, where the driver is launched in the 
cluster and the client
 can find the results of the driver from the Mesos Web UI.
 
-To use cluster mode, you must start the MesosClusterDispatcher in your cluster 
via the `sbin/start-mesos-dispatcher.sh` script,
-passing in the Mesos master url (e.g: mesos://host:5050).
+To use cluster mode, you must start the `MesosClusterDispatcher` in your 
cluster via the `sbin/start-mesos-dispatcher.sh` script,
+passing in the Mesos master URL (e.g: mesos://host:5050). This starts the 
`MesosClusterDispatcher` as a daemon running on the host.
 
-From the client, you can submit a job to Mesos cluster by running 
`spark-submit` and specifying the master url
-to the url of the MesosClusterDispatcher (e.g: mesos://dispatcher:7077). You 
can view driver statuses on the
+If you like to run the `MesosClusterDispatcher` with Marathon, you need to run 
the `MesosClusterDispatcher` in the foreground (i.e: `bin/spark-class 
org.apache.spark.deploy.mesos.MesosClusterDispatcher`).
+
+From the client, you can submit a job to Mesos cluster by running 
`spark-submit` and specifying the master URL
+to the URL of the `MesosClusterDispatcher` (e.g: mesos://dispatcher:7077). You 
can view driver statuses on the
 Spark cluster Web UI.
 
-Note that jars or python files that are passed to spark-submit should be URIs 
reachable by Mesos slaves.
+For example:
+{% highlight bash %}
+./bin/spark-submit \
+  --class org.apache.spark.examples.SparkPi \
+  --master mesos://207.184.161.138:7077 \
+  --deploy-mode cluster
+  --supervise
+  --executor-memory 20G \
+  --total-executor-cores 100 \
+  http://path/to/examples.jar \
+  1000
+{% endhighlight %}
+
+
+Note that jars or python files that are passed to spark-submit should be URIs 
reachable by Mesos slaves, as the Spark driver doesn't automatically upload 
local jars.
 
 # Mesos Run Modes
 

http://git-wip-us.apache.org/repos/asf/spark/blob/2c324d35/docs/submitting-applications.md
--
diff --git a/docs/submitting-applications.md b/docs/submitting-applications.md
index ac2a14e..acbb0f2 100644
--- a/docs/submitting-applications.md
+++ b/docs/submitting-applications.md
@@ -115,6 +115,18 @@ export HADOOP_CONF_DIR=XXX
   --master spark://207.184.161.138:7077 \
   examples/src/main/python/pi.py \
   1000
+
+# Run on a Mesos cluster in cluster deploy mode with supervise
+./bin/spark-submit \
+  --class org.apache.spark.examples.SparkPi \
+  --master mesos://207.184.161.138:7077 \
+  --deploy-mode cluster
+  --supervise
+  --executor-memory 20G \
+  --total-executor-cores 100 \
+  http://path/to/examples.jar \
+  1000
+
 {% endhighlight %}
 
 # Master URLs
@@ -132,9 +144,10 @@ The master URL passed to Spark can be in one of the 
following formats:
  mesos://HOST:PORT  Connect to the given Mesos cluster.
 The port must be whichever one your is configured to use, which is 
5050 by default.
 Or, for a Mesos cluster using ZooKeeper, use 
mesos://zk://
+To submit with --deploy-mode cluster, the HOST:PORT 
should be configured to connect to the MesosClusterDispatcher.
 
  yarn  Connect to a  YARN  cluster in
-client or cluster mode depending on the 
value 

spark git commit: [SPARK-12351][MESOS] Add documentation about submitting Spark with mesos cluster mode.

2015-12-15 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/master 369127f03 -> c2de99a7c


[SPARK-12351][MESOS] Add documentation about submitting Spark with mesos 
cluster mode.

Adding more documentation about submitting jobs with mesos cluster mode.

Author: Timothy Chen 

Closes #10086 from tnachen/mesos_supervise_docs.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c2de99a7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c2de99a7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c2de99a7

Branch: refs/heads/master
Commit: c2de99a7c3a52b0da96517c7056d2733ef45495f
Parents: 369127f
Author: Timothy Chen 
Authored: Tue Dec 15 18:20:00 2015 -0800
Committer: Andrew Or 
Committed: Tue Dec 15 18:20:00 2015 -0800

--
 docs/running-on-mesos.md| 26 +-
 docs/submitting-applications.md | 15 ++-
 2 files changed, 35 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c2de99a7/docs/running-on-mesos.md
--
diff --git a/docs/running-on-mesos.md b/docs/running-on-mesos.md
index a197d0e..3193e17 100644
--- a/docs/running-on-mesos.md
+++ b/docs/running-on-mesos.md
@@ -150,14 +150,30 @@ it does not need to be redundantly passed in as a system 
property.
 Spark on Mesos also supports cluster mode, where the driver is launched in the 
cluster and the client
 can find the results of the driver from the Mesos Web UI.
 
-To use cluster mode, you must start the MesosClusterDispatcher in your cluster 
via the `sbin/start-mesos-dispatcher.sh` script,
-passing in the Mesos master url (e.g: mesos://host:5050).
+To use cluster mode, you must start the `MesosClusterDispatcher` in your 
cluster via the `sbin/start-mesos-dispatcher.sh` script,
+passing in the Mesos master URL (e.g: mesos://host:5050). This starts the 
`MesosClusterDispatcher` as a daemon running on the host.
 
-From the client, you can submit a job to Mesos cluster by running 
`spark-submit` and specifying the master url
-to the url of the MesosClusterDispatcher (e.g: mesos://dispatcher:7077). You 
can view driver statuses on the
+If you like to run the `MesosClusterDispatcher` with Marathon, you need to run 
the `MesosClusterDispatcher` in the foreground (i.e: `bin/spark-class 
org.apache.spark.deploy.mesos.MesosClusterDispatcher`).
+
+From the client, you can submit a job to Mesos cluster by running 
`spark-submit` and specifying the master URL
+to the URL of the `MesosClusterDispatcher` (e.g: mesos://dispatcher:7077). You 
can view driver statuses on the
 Spark cluster Web UI.
 
-Note that jars or python files that are passed to spark-submit should be URIs 
reachable by Mesos slaves.
+For example:
+{% highlight bash %}
+./bin/spark-submit \
+  --class org.apache.spark.examples.SparkPi \
+  --master mesos://207.184.161.138:7077 \
+  --deploy-mode cluster
+  --supervise
+  --executor-memory 20G \
+  --total-executor-cores 100 \
+  http://path/to/examples.jar \
+  1000
+{% endhighlight %}
+
+
+Note that jars or python files that are passed to spark-submit should be URIs 
reachable by Mesos slaves, as the Spark driver doesn't automatically upload 
local jars.
 
 # Mesos Run Modes
 

http://git-wip-us.apache.org/repos/asf/spark/blob/c2de99a7/docs/submitting-applications.md
--
diff --git a/docs/submitting-applications.md b/docs/submitting-applications.md
index ac2a14e..acbb0f2 100644
--- a/docs/submitting-applications.md
+++ b/docs/submitting-applications.md
@@ -115,6 +115,18 @@ export HADOOP_CONF_DIR=XXX
   --master spark://207.184.161.138:7077 \
   examples/src/main/python/pi.py \
   1000
+
+# Run on a Mesos cluster in cluster deploy mode with supervise
+./bin/spark-submit \
+  --class org.apache.spark.examples.SparkPi \
+  --master mesos://207.184.161.138:7077 \
+  --deploy-mode cluster
+  --supervise
+  --executor-memory 20G \
+  --total-executor-cores 100 \
+  http://path/to/examples.jar \
+  1000
+
 {% endhighlight %}
 
 # Master URLs
@@ -132,9 +144,10 @@ The master URL passed to Spark can be in one of the 
following formats:
  mesos://HOST:PORT  Connect to the given Mesos cluster.
 The port must be whichever one your is configured to use, which is 
5050 by default.
 Or, for a Mesos cluster using ZooKeeper, use 
mesos://zk://
+To submit with --deploy-mode cluster, the HOST:PORT 
should be configured to connect to the MesosClusterDispatcher.
 
  yarn  Connect to a  YARN  cluster in
-client or cluster mode depending on the 
value of --deploy-mode. 
+client or cluster mode depending on the 
value of --deploy-mode.
 The cluster location will 

spark git commit: [SPARK-9026][SPARK-4514] Modifications to JobWaiter, FutureAction, and AsyncRDDActions to support non-blocking operation

2015-12-15 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/master a63d9edcf -> 765a48849


[SPARK-9026][SPARK-4514] Modifications to JobWaiter, FutureAction, and 
AsyncRDDActions to support non-blocking operation

These changes rework the implementations of `SimpleFutureAction`, 
`ComplexFutureAction`, `JobWaiter`, and `AsyncRDDActions` such that 
asynchronous callbacks on the generated `Futures` NEVER block waiting for a job 
to complete. A small amount of mutex synchronization is necessary to protect 
the internal fields that manage cancellation, but these locks are only held 
very briefly and in practice should almost never cause any blocking to occur. 
The existing blocking APIs of these classes are retained, but they simply 
delegate to the underlying non-blocking API and `Await` the results with 
indefinite timeouts.

Associated JIRA ticket: https://issues.apache.org/jira/browse/SPARK-9026
Also fixes: https://issues.apache.org/jira/browse/SPARK-4514

This pull request contains all my own original work, which I release to the 
Spark project under its open source license.

Author: Richard W. Eggert II 

Closes #9264 from reggert/fix-futureaction.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/765a4884
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/765a4884
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/765a4884

Branch: refs/heads/master
Commit: 765a488494dac0ed38d2b81742c06467b79d96b2
Parents: a63d9ed
Author: Richard W. Eggert II 
Authored: Tue Dec 15 18:22:58 2015 -0800
Committer: Andrew Or 
Committed: Tue Dec 15 18:22:58 2015 -0800

--
 .../scala/org/apache/spark/FutureAction.scala   | 164 +++
 .../org/apache/spark/rdd/AsyncRDDActions.scala  |  48 +++---
 .../apache/spark/scheduler/DAGScheduler.scala   |   8 +-
 .../org/apache/spark/scheduler/JobWaiter.scala  |  48 +++---
 .../test/scala/org/apache/spark/Smuggle.scala   |  82 ++
 .../org/apache/spark/StatusTrackerSuite.scala   |  26 +++
 .../apache/spark/rdd/AsyncRDDActionsSuite.scala |  33 +++-
 7 files changed, 251 insertions(+), 158 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/765a4884/core/src/main/scala/org/apache/spark/FutureAction.scala
--
diff --git a/core/src/main/scala/org/apache/spark/FutureAction.scala 
b/core/src/main/scala/org/apache/spark/FutureAction.scala
index 48792a9..2a8220f 100644
--- a/core/src/main/scala/org/apache/spark/FutureAction.scala
+++ b/core/src/main/scala/org/apache/spark/FutureAction.scala
@@ -20,13 +20,15 @@ package org.apache.spark
 import java.util.Collections
 import java.util.concurrent.TimeUnit
 
+import scala.concurrent._
+import scala.concurrent.duration.Duration
+import scala.util.Try
+
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.api.java.JavaFutureAction
 import org.apache.spark.rdd.RDD
-import org.apache.spark.scheduler.{JobFailed, JobSucceeded, JobWaiter}
+import org.apache.spark.scheduler.JobWaiter
 
-import scala.concurrent._
-import scala.concurrent.duration.Duration
-import scala.util.{Failure, Try}
 
 /**
  * A future for the result of an action to support cancellation. This is an 
extension of the
@@ -105,6 +107,7 @@ trait FutureAction[T] extends Future[T] {
  * A [[FutureAction]] holding the result of an action that triggers a single 
job. Examples include
  * count, collect, reduce.
  */
+@DeveloperApi
 class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], 
resultFunc: => T)
   extends FutureAction[T] {
 
@@ -116,142 +119,96 @@ class SimpleFutureAction[T] private[spark](jobWaiter: 
JobWaiter[_], resultFunc:
   }
 
   override def ready(atMost: Duration)(implicit permit: CanAwait): 
SimpleFutureAction.this.type = {
-if (!atMost.isFinite()) {
-  awaitResult()
-} else jobWaiter.synchronized {
-  val finishTime = System.currentTimeMillis() + atMost.toMillis
-  while (!isCompleted) {
-val time = System.currentTimeMillis()
-if (time >= finishTime) {
-  throw new TimeoutException
-} else {
-  jobWaiter.wait(finishTime - time)
-}
-  }
-}
+jobWaiter.completionFuture.ready(atMost)
 this
   }
 
   @throws(classOf[Exception])
   override def result(atMost: Duration)(implicit permit: CanAwait): T = {
-ready(atMost)(permit)
-awaitResult() match {
-  case scala.util.Success(res) => res
-  case scala.util.Failure(e) => throw e
-}
+jobWaiter.completionFuture.ready(atMost)
+assert(value.isDefined, "Future has not completed properly")
+value.get.get
   }
 
   override def onComplete[U](func: (Try[T]) => U)(implicit executor: 
ExecutionContext) {
-

spark git commit: [SPARK-10477][SQL] using DSL in ColumnPruningSuite to improve readability

2015-12-15 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 93095eb29 -> fb08f7b78


[SPARK-10477][SQL] using DSL in ColumnPruningSuite to improve readability

Author: Wenchen Fan 

Closes #8645 from cloud-fan/test.

(cherry picked from commit a89e8b6122ee5a1517fbcf405b1686619db56696)
Signed-off-by: Andrew Or 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fb08f7b7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fb08f7b7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fb08f7b7

Branch: refs/heads/branch-1.6
Commit: fb08f7b784bc8b5e0cd110f315f72c7d9fc65e08
Parents: 93095eb
Author: Wenchen Fan 
Authored: Tue Dec 15 18:29:19 2015 -0800
Committer: Andrew Or 
Committed: Tue Dec 15 18:29:25 2015 -0800

--
 .../apache/spark/sql/catalyst/dsl/package.scala |  7 ++--
 .../catalyst/optimizer/ColumnPruningSuite.scala | 41 +++-
 2 files changed, 27 insertions(+), 21 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/fb08f7b7/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index af594c2..e509711 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -275,13 +275,14 @@ package object dsl {
 
   def unionAll(otherPlan: LogicalPlan): LogicalPlan = Union(logicalPlan, 
otherPlan)
 
-  // TODO specify the output column names
   def generate(
 generator: Generator,
 join: Boolean = false,
 outer: Boolean = false,
-alias: Option[String] = None): LogicalPlan =
-Generate(generator, join = join, outer = outer, alias, Nil, 
logicalPlan)
+alias: Option[String] = None,
+outputNames: Seq[String] = Nil): LogicalPlan =
+Generate(generator, join = join, outer = outer, alias,
+  outputNames.map(UnresolvedAttribute(_)), logicalPlan)
 
   def insertInto(tableName: String, overwrite: Boolean = false): 
LogicalPlan =
 InsertIntoTable(

http://git-wip-us.apache.org/repos/asf/spark/blob/fb08f7b7/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
index 4a1e7ce..9bf61ae 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.sql.catalyst.expressions.Explode
 import org.apache.spark.sql.catalyst.plans.PlanTest
-import org.apache.spark.sql.catalyst.plans.logical.{Project, LocalRelation, 
Generate, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
@@ -35,12 +35,11 @@ class ColumnPruningSuite extends PlanTest {
   test("Column pruning for Generate when Generate.join = false") {
 val input = LocalRelation('a.int, 'b.array(StringType))
 
-val query = Generate(Explode('b), false, false, None, 's.string :: Nil, 
input).analyze
+val query = input.generate(Explode('b), join = false).analyze
+
 val optimized = Optimize.execute(query)
 
-val correctAnswer =
-  Generate(Explode('b), false, false, None, 's.string :: Nil,
-Project('b.attr :: Nil, input)).analyze
+val correctAnswer = input.select('b).generate(Explode('b), join = 
false).analyze
 
 comparePlans(optimized, correctAnswer)
   }
@@ -49,16 +48,19 @@ class ColumnPruningSuite extends PlanTest {
 val input = LocalRelation('a.int, 'b.int, 'c.array(StringType))
 
 val query =
-  Project(Seq('a, 's),
-Generate(Explode('c), true, false, None, 's.string :: Nil,
-  input)).analyze
+  input
+.generate(Explode('c), join = true, outputNames = "explode" :: Nil)
+.select('a, 'explode)
+.analyze
+
 val optimized = Optimize.execute(query)
 
 val correctAnswer =
-  Project(Seq('a, 's),
-Generate(Explode('c), true, false, None, 

spark git commit: [SPARK-12105] [SQL] add convenient show functions

2015-12-15 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/master 28112657e -> 31b391019


[SPARK-12105] [SQL] add convenient show functions

Author: Jean-Baptiste Onofré 

Closes #10130 from jbonofre/SPARK-12105.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/31b39101
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/31b39101
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/31b39101

Branch: refs/heads/master
Commit: 31b391019ff6eb5a483f4b3e62fd082de7ff8416
Parents: 2811265
Author: Jean-Baptiste Onofré 
Authored: Tue Dec 15 18:06:30 2015 -0800
Committer: Andrew Or 
Committed: Tue Dec 15 18:08:09 2015 -0800

--
 .../scala/org/apache/spark/sql/DataFrame.scala  | 25 +---
 1 file changed, 16 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/31b39101/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
index 497bd48..b69d441 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
@@ -161,16 +161,23 @@ class DataFrame private[sql](
   }
 
   /**
+* Compose the string representing rows for output
+*/
+  def showString(): String = {
+showString(20)
+  }
+
+  /**
* Compose the string representing rows for output
-   * @param _numRows Number of rows to show
+   * @param numRows Number of rows to show
* @param truncate Whether truncate long strings and align cells right
*/
-  private[sql] def showString(_numRows: Int, truncate: Boolean = true): String 
= {
-val numRows = _numRows.max(0)
+  def showString(numRows: Int, truncate: Boolean = true): String = {
+val _numRows = numRows.max(0)
 val sb = new StringBuilder
-val takeResult = take(numRows + 1)
-val hasMoreData = takeResult.length > numRows
-val data = takeResult.take(numRows)
+val takeResult = take(_numRows + 1)
+val hasMoreData = takeResult.length > _numRows
+val data = takeResult.take(_numRows)
 val numCols = schema.fieldNames.length
 
 // For array values, replace Seq and Array with square brackets
@@ -224,10 +231,10 @@ class DataFrame private[sql](
 
 sb.append(sep)
 
-// For Data that has more than "numRows" records
+// For Data that has more than "_numRows" records
 if (hasMoreData) {
-  val rowsString = if (numRows == 1) "row" else "rows"
-  sb.append(s"only showing top $numRows $rowsString\n")
+  val rowsString = if (_numRows == 1) "row" else "rows"
+  sb.append(s"only showing top $_numRows $rowsString\n")
 }
 
 sb.toString()


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-10123][DEPLOY] Support specifying deploy mode from configuration

2015-12-15 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/master 765a48849 -> 63ccdef81


[SPARK-10123][DEPLOY] Support specifying deploy mode from configuration

Please help to review, thanks a lot.

Author: jerryshao 

Closes #10195 from jerryshao/SPARK-10123.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/63ccdef8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/63ccdef8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/63ccdef8

Branch: refs/heads/master
Commit: 63ccdef81329e785807f37b4e918a9247fc70e3c
Parents: 765a488
Author: jerryshao 
Authored: Tue Dec 15 18:24:23 2015 -0800
Committer: Andrew Or 
Committed: Tue Dec 15 18:24:23 2015 -0800

--
 .../spark/deploy/SparkSubmitArguments.scala |  5 ++-
 .../apache/spark/deploy/SparkSubmitSuite.scala  | 41 
 docs/configuration.md   | 15 +--
 .../apache/spark/launcher/SparkLauncher.java|  3 ++
 .../launcher/SparkSubmitCommandBuilder.java |  7 ++--
 5 files changed, 64 insertions(+), 7 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/63ccdef8/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index 18a1c52..915ef81 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -176,7 +176,10 @@ private[deploy] class SparkSubmitArguments(args: 
Seq[String], env: Map[String, S
 packages = 
Option(packages).orElse(sparkProperties.get("spark.jars.packages")).orNull
 packagesExclusions = Option(packagesExclusions)
   .orElse(sparkProperties.get("spark.jars.excludes")).orNull
-deployMode = Option(deployMode).orElse(env.get("DEPLOY_MODE")).orNull
+deployMode = Option(deployMode)
+  .orElse(sparkProperties.get("spark.submit.deployMode"))
+  .orElse(env.get("DEPLOY_MODE"))
+  .orNull
 numExecutors = Option(numExecutors)
   .getOrElse(sparkProperties.get("spark.executor.instances").orNull)
 keytab = 
Option(keytab).orElse(sparkProperties.get("spark.yarn.keytab")).orNull

http://git-wip-us.apache.org/repos/asf/spark/blob/63ccdef8/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
--
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala 
b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index d494b0c..2626f5a 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -136,6 +136,47 @@ class SparkSubmitSuite
 appArgs.childArgs should be (Seq("--master", "local", "some", "--weird", 
"args"))
   }
 
+  test("specify deploy mode through configuration") {
+val clArgs = Seq(
+  "--master", "yarn",
+  "--conf", "spark.submit.deployMode=client",
+  "--class", "org.SomeClass",
+  "thejar.jar"
+)
+val appArgs = new SparkSubmitArguments(clArgs)
+val (_, _, sysProps, _) = prepareSubmitEnvironment(appArgs)
+
+appArgs.deployMode should be ("client")
+sysProps("spark.submit.deployMode") should be ("client")
+
+// Both cmd line and configuration are specified, cmdline option takes the 
priority
+val clArgs1 = Seq(
+  "--master", "yarn",
+  "--deploy-mode", "cluster",
+  "--conf", "spark.submit.deployMode=client",
+  "-class", "org.SomeClass",
+  "thejar.jar"
+)
+val appArgs1 = new SparkSubmitArguments(clArgs1)
+val (_, _, sysProps1, _) = prepareSubmitEnvironment(appArgs1)
+
+appArgs1.deployMode should be ("cluster")
+sysProps1("spark.submit.deployMode") should be ("cluster")
+
+// Neither cmdline nor configuration are specified, client mode is the 
default choice
+val clArgs2 = Seq(
+  "--master", "yarn",
+  "--class", "org.SomeClass",
+  "thejar.jar"
+)
+val appArgs2 = new SparkSubmitArguments(clArgs2)
+appArgs2.deployMode should be (null)
+
+val (_, _, sysProps2, _) = prepareSubmitEnvironment(appArgs2)
+appArgs2.deployMode should be ("client")
+sysProps2("spark.submit.deployMode") should be ("client")
+  }
+
   test("handles YARN cluster mode") {
 val clArgs = Seq(
   "--deploy-mode", "cluster",

http://git-wip-us.apache.org/repos/asf/spark/blob/63ccdef8/docs/configuration.md
--
diff --git a/docs/configuration.md 

spark git commit: [SPARK-9886][CORE] Fix to use ShutdownHookManager in

2015-12-15 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/master 63ccdef81 -> 8a215d233


[SPARK-9886][CORE] Fix to use ShutdownHookManager in

ExternalBlockStore.scala

Author: Naveen 

Closes #10313 from naveenminchu/branch-fix-SPARK-9886.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8a215d23
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8a215d23
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8a215d23

Branch: refs/heads/master
Commit: 8a215d2338c6286253e20122640592f9d69896c8
Parents: 63ccdef
Author: Naveen 
Authored: Tue Dec 15 18:25:22 2015 -0800
Committer: Andrew Or 
Committed: Tue Dec 15 18:25:22 2015 -0800

--
 .../apache/spark/storage/ExternalBlockStore.scala   | 16 +---
 1 file changed, 5 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8a215d23/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala 
b/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala
index db965d5..94883a5 100644
--- a/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala
@@ -22,7 +22,7 @@ import java.nio.ByteBuffer
 import scala.util.control.NonFatal
 
 import org.apache.spark.Logging
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{ShutdownHookManager, Utils}
 
 
 /**
@@ -177,15 +177,6 @@ private[spark] class ExternalBlockStore(blockManager: 
BlockManager, executorId:
 }
   }
 
-  private def addShutdownHook() {
-Runtime.getRuntime.addShutdownHook(new Thread("ExternalBlockStore shutdown 
hook") {
-  override def run(): Unit = Utils.logUncaughtExceptions {
-logDebug("Shutdown hook called")
-externalBlockManager.map(_.shutdown())
-  }
-})
-  }
-
   // Create concrete block manager and fall back to Tachyon by default for 
backward compatibility.
   private def createBlkManager(): Option[ExternalBlockManager] = {
 val clsName = 
blockManager.conf.getOption(ExternalBlockStore.BLOCK_MANAGER_NAME)
@@ -196,7 +187,10 @@ private[spark] class ExternalBlockStore(blockManager: 
BlockManager, executorId:
 .newInstance()
 .asInstanceOf[ExternalBlockManager]
   instance.init(blockManager, executorId)
-  addShutdownHook();
+  ShutdownHookManager.addShutdownHook { () =>
+logDebug("Shutdown hook called")
+externalBlockManager.map(_.shutdown())
+  }
   Some(instance)
 } catch {
   case NonFatal(t) =>


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-9886][CORE] Fix to use ShutdownHookManager in

2015-12-15 Thread andrewor14
Repository: spark
Updated Branches:
  refs/heads/branch-1.6 2c324d35a -> 8e9a60031


[SPARK-9886][CORE] Fix to use ShutdownHookManager in

ExternalBlockStore.scala

Author: Naveen 

Closes #10313 from naveenminchu/branch-fix-SPARK-9886.

(cherry picked from commit 8a215d2338c6286253e20122640592f9d69896c8)
Signed-off-by: Andrew Or 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8e9a6003
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8e9a6003
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8e9a6003

Branch: refs/heads/branch-1.6
Commit: 8e9a600313f3047139d3cebef85acc782903123b
Parents: 2c324d3
Author: Naveen 
Authored: Tue Dec 15 18:25:22 2015 -0800
Committer: Andrew Or 
Committed: Tue Dec 15 18:25:28 2015 -0800

--
 .../apache/spark/storage/ExternalBlockStore.scala   | 16 +---
 1 file changed, 5 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8e9a6003/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala
--
diff --git 
a/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala 
b/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala
index db965d5..94883a5 100644
--- a/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ExternalBlockStore.scala
@@ -22,7 +22,7 @@ import java.nio.ByteBuffer
 import scala.util.control.NonFatal
 
 import org.apache.spark.Logging
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{ShutdownHookManager, Utils}
 
 
 /**
@@ -177,15 +177,6 @@ private[spark] class ExternalBlockStore(blockManager: 
BlockManager, executorId:
 }
   }
 
-  private def addShutdownHook() {
-Runtime.getRuntime.addShutdownHook(new Thread("ExternalBlockStore shutdown 
hook") {
-  override def run(): Unit = Utils.logUncaughtExceptions {
-logDebug("Shutdown hook called")
-externalBlockManager.map(_.shutdown())
-  }
-})
-  }
-
   // Create concrete block manager and fall back to Tachyon by default for 
backward compatibility.
   private def createBlkManager(): Option[ExternalBlockManager] = {
 val clsName = 
blockManager.conf.getOption(ExternalBlockStore.BLOCK_MANAGER_NAME)
@@ -196,7 +187,10 @@ private[spark] class ExternalBlockStore(blockManager: 
BlockManager, executorId:
 .newInstance()
 .asInstanceOf[ExternalBlockManager]
   instance.init(blockManager, executorId)
-  addShutdownHook();
+  ShutdownHookManager.addShutdownHook { () =>
+logDebug("Shutdown hook called")
+externalBlockManager.map(_.shutdown())
+  }
   Some(instance)
 } catch {
   case NonFatal(t) =>


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-12249][SQL] JDBC non-equality comparison operator not pushed down.

2015-12-15 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master d52bf47e1 -> 0f6936b5f


[SPARK-12249][SQL] JDBC non-equality comparison operator not pushed down.

https://issues.apache.org/jira/browse/SPARK-12249
Currently `!=` operator is not pushed down correctly.
I simply added a case for this.

Author: hyukjinkwon 

Closes #10233 from HyukjinKwon/SPARK-12249.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0f6936b5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0f6936b5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0f6936b5

Branch: refs/heads/master
Commit: 0f6936b5f1c9b0be1c33b98ffb62a72ae0c3e2a8
Parents: d52bf47
Author: hyukjinkwon 
Authored: Tue Dec 15 22:22:49 2015 -0800
Committer: Reynold Xin 
Committed: Tue Dec 15 22:22:49 2015 -0800

--
 .../org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala  | 1 +
 sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala  | 2 ++
 2 files changed, 3 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0f6936b5/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index 1c348ed..c18a2d2 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -281,6 +281,7 @@ private[sql] class JDBCRDD(
*/
   private def compileFilter(f: Filter): String = f match {
 case EqualTo(attr, value) => s"$attr = ${compileValue(value)}"
+case Not(EqualTo(attr, value)) => s"$attr != ${compileValue(value)}"
 case LessThan(attr, value) => s"$attr < ${compileValue(value)}"
 case GreaterThan(attr, value) => s"$attr > ${compileValue(value)}"
 case LessThanOrEqual(attr, value) => s"$attr <= ${compileValue(value)}"

http://git-wip-us.apache.org/repos/asf/spark/blob/0f6936b5/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
--
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index a360947..aca1443 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -177,9 +177,11 @@ class JDBCSuite extends SparkFunSuite with BeforeAndAfter 
with SharedSQLContext
 
   test("SELECT * WHERE (simple predicates)") {
 assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID < 
1")).collect().size === 0)
+assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID != 
2")).collect().size === 2)
 assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID = 
1")).collect().size === 1)
 assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME = 
'fred'")).collect().size === 1)
 assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME > 
'fred'")).collect().size === 2)
+assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME != 
'fred'")).collect().size === 2)
   }
 
   test("SELECT * WHERE (quoted strings)") {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-12315][SQL] isnotnull operator not pushed down for JDBC datasource.

2015-12-15 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master 7f443a687 -> 2aad2d372


[SPARK-12315][SQL] isnotnull operator not pushed down for JDBC datasource.

https://issues.apache.org/jira/browse/SPARK-12315
`IsNotNull` filter is not being pushed down for JDBC datasource.

It looks it is SQL standard according to 
[SQL-92](http://www.contrib.andrew.cmu.edu/~shadow/sql/sql1992.txt), SQL:1999, 
[SQL:2003](http://www.wiscorp.com/sql_2003_standard.zip) and 
[SQL:201x](http://www.wiscorp.com/sql20nn.zip) and I believe most databases 
support this.

In this PR, I simply added the case for `IsNotNull` filter to produce a proper 
filter string.

Author: hyukjinkwon 

This patch had conflicts when merged, resolved by
Committer: Reynold Xin 

Closes #10287 from HyukjinKwon/SPARK-12315.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2aad2d37
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2aad2d37
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2aad2d37

Branch: refs/heads/master
Commit: 2aad2d372469aaf2773876cae98ef002fef03aa3
Parents: 7f443a6
Author: hyukjinkwon 
Authored: Tue Dec 15 22:30:35 2015 -0800
Committer: Reynold Xin 
Committed: Tue Dec 15 22:30:35 2015 -0800

--
 .../org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala  | 1 +
 sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala  | 2 ++
 2 files changed, 3 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2aad2d37/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index 3271b46..2d38562 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -287,6 +287,7 @@ private[sql] class JDBCRDD(
 case LessThanOrEqual(attr, value) => s"$attr <= ${compileValue(value)}"
 case GreaterThanOrEqual(attr, value) => s"$attr >= ${compileValue(value)}"
 case IsNull(attr) => s"$attr IS NULL"
+case IsNotNull(attr) => s"$attr IS NOT NULL"
 case _ => null
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/2aad2d37/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
--
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index 0305667..d6aeb52 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -183,6 +183,8 @@ class JDBCSuite extends SparkFunSuite with BeforeAndAfter 
with SharedSQLContext
 assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME > 
'fred'")).collect().size === 2)
 assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME != 
'fred'")).collect().size === 2)
 assert(stripSparkFilter(sql("SELECT * FROM nulltypes WHERE A IS 
NULL")).collect().size === 1)
+assert(stripSparkFilter(
+  sql("SELECT * FROM nulltypes WHERE A IS NOT NULL")).collect().size === 0)
   }
 
   test("SELECT * WHERE (quoted strings)") {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: Style fix for the previous 3 JDBC filter push down commits.

2015-12-15 Thread rxin
Repository: spark
Updated Branches:
  refs/heads/master 2aad2d372 -> 554d840a9


Style fix for the previous 3 JDBC filter push down commits.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/554d840a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/554d840a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/554d840a

Branch: refs/heads/master
Commit: 554d840a9ade79722c96972257435a05e2aa9d88
Parents: 2aad2d3
Author: Reynold Xin 
Authored: Tue Dec 15 22:32:51 2015 -0800
Committer: Reynold Xin 
Committed: Tue Dec 15 22:32:51 2015 -0800

--
 .../org/apache/spark/sql/jdbc/JDBCSuite.scala  | 17 -
 1 file changed, 8 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/554d840a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
--
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index d6aeb52..2b91f62 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -176,15 +176,14 @@ class JDBCSuite extends SparkFunSuite with BeforeAndAfter 
with SharedSQLContext
   }
 
   test("SELECT * WHERE (simple predicates)") {
-assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID < 
1")).collect().size === 0)
-assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID != 
2")).collect().size === 2)
-assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID = 
1")).collect().size === 1)
-assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME = 
'fred'")).collect().size === 1)
-assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME > 
'fred'")).collect().size === 2)
-assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME != 
'fred'")).collect().size === 2)
-assert(stripSparkFilter(sql("SELECT * FROM nulltypes WHERE A IS 
NULL")).collect().size === 1)
-assert(stripSparkFilter(
-  sql("SELECT * FROM nulltypes WHERE A IS NOT NULL")).collect().size === 0)
+assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID < 
1")).collect().size == 0)
+assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID != 
2")).collect().size == 2)
+assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID = 
1")).collect().size == 1)
+assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME = 
'fred'")).collect().size == 1)
+assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME > 
'fred'")).collect().size == 2)
+assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME != 
'fred'")).collect().size == 2)
+assert(stripSparkFilter(sql("SELECT * FROM nulltypes WHERE A IS 
NULL")).collect().size == 1)
+assert(stripSparkFilter(sql("SELECT * FROM nulltypes WHERE A IS NOT 
NULL")).collect().size == 0)
   }
 
   test("SELECT * WHERE (quoted strings)") {


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org