date:20150923

spark git commit: [SPARK-9710] [TEST] Fix RPackageUtilsSuite when R is not available.

2015-09-23 Thread shivaram

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 4174b94f0 -> 6c6cadb8f


[SPARK-9710] [TEST] Fix RPackageUtilsSuite when R is not available.

RUtils.isRInstalled throws an exception if R is not installed,
instead of returning false. Fix that.

Author: Marcelo Vanzin 

Closes #8008 from vanzin/SPARK-9710 and squashes the following commits:

df72d8c [Marcelo Vanzin] [SPARK-9710] [test] Fix RPackageUtilsSuite when R is 
not available.

(cherry picked from commit 0f3366a4c740147a7a7519922642912e2dd238f8)
Signed-off-by: Shivaram Venkataraman 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6c6cadb8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6c6cadb8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6c6cadb8

Branch: refs/heads/branch-1.5
Commit: 6c6cadb8febbf65e8540ffa832a317a37a4a8168
Parents: 4174b94
Author: Marcelo Vanzin 
Authored: Mon Aug 10 10:10:40 2015 -0700
Committer: Shivaram Venkataraman 
Committed: Wed Sep 23 07:38:31 2015 -0700

--
 core/src/main/scala/org/apache/spark/api/r/RUtils.scala | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/6c6cadb8/core/src/main/scala/org/apache/spark/api/r/RUtils.scala
--
diff --git a/core/src/main/scala/org/apache/spark/api/r/RUtils.scala 
b/core/src/main/scala/org/apache/spark/api/r/RUtils.scala
index daad5b5..646fd0b 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RUtils.scala
@@ -67,7 +67,11 @@ private[spark] object RUtils {
 
   /** Check if R is installed before running tests that use R commands. */
   def isRInstalled: Boolean = {
-val builder = new ProcessBuilder(Seq("R", "--version"))
-builder.start().waitFor() == 0
+try {
+  val builder = new ProcessBuilder(Seq("R", "--version"))
+  builder.start().waitFor() == 0
+} catch {
+  case e: Exception => false
+}
   }
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Git Push Summary

2015-09-23 Thread pwendell

Repository: spark
Updated Tags:  refs/tags/v1.5.1-rc1 [created] 4df97937d

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-10763] [ML] [JAVA] [TEST] Update Java MLLIB/ML tests to use simplified dataframe construction

2015-09-23 Thread meng

Repository: spark
Updated Branches:
  refs/heads/master 758c9d25e -> d91967e15


[SPARK-10763] [ML] [JAVA] [TEST] Update Java MLLIB/ML tests to use simplified 
dataframe construction

As introduced in https://issues.apache.org/jira/browse/SPARK-10630 we now have 
an easier way to create dataframes from local Java lists. Lets update the tests 
to use those.

Author: Holden Karau 

Closes #8886 from 
holdenk/SPARK-10763-update-java-mllib-ml-tests-to-use-simplified-dataframe-construction.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d91967e1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d91967e1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d91967e1

Branch: refs/heads/master
Commit: d91967e159f416924bbd7f0db25156588d4bd7b1
Parents: 758c9d2
Author: Holden Karau 
Authored: Wed Sep 23 22:49:08 2015 -0700
Committer: Xiangrui Meng 
Committed: Wed Sep 23 22:49:08 2015 -0700

--
 .../spark/ml/classification/JavaNaiveBayesSuite.java  |  8 
 .../apache/spark/ml/feature/JavaBucketizerSuite.java  | 14 +++---
 .../org/apache/spark/ml/feature/JavaDCTSuite.java | 11 +--
 .../apache/spark/ml/feature/JavaHashingTFSuite.java   |  7 ---
 .../ml/feature/JavaPolynomialExpansionSuite.java  |  5 +++--
 .../spark/ml/feature/JavaStopWordsRemoverSuite.java   |  7 ---
 .../spark/ml/feature/JavaStringIndexerSuite.java  |  7 ---
 .../spark/ml/feature/JavaVectorAssemblerSuite.java|  3 +--
 .../spark/ml/feature/JavaVectorSlicerSuite.java   |  7 ---
 .../apache/spark/ml/feature/JavaWord2VecSuite.java| 12 ++--
 10 files changed, 42 insertions(+), 39 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d91967e1/mllib/src/test/java/org/apache/spark/ml/classification/JavaNaiveBayesSuite.java
--
diff --git 
a/mllib/src/test/java/org/apache/spark/ml/classification/JavaNaiveBayesSuite.java
 
b/mllib/src/test/java/org/apache/spark/ml/classification/JavaNaiveBayesSuite.java
index 075a62c..f5f690e 100644
--- 
a/mllib/src/test/java/org/apache/spark/ml/classification/JavaNaiveBayesSuite.java
+++ 
b/mllib/src/test/java/org/apache/spark/ml/classification/JavaNaiveBayesSuite.java
@@ -19,6 +19,7 @@ package org.apache.spark.ml.classification;
 
 import java.io.Serializable;
 import java.util.Arrays;
+import java.util.List;
 
 import org.junit.After;
 import org.junit.Before;
@@ -75,21 +76,20 @@ public class JavaNaiveBayesSuite implements Serializable {
 
   @Test
   public void testNaiveBayes() {
-JavaRDD jrdd = jsc.parallelize(Arrays.asList(
+List data = Arrays.asList(
   RowFactory.create(0.0, Vectors.dense(1.0, 0.0, 0.0)),
   RowFactory.create(0.0, Vectors.dense(2.0, 0.0, 0.0)),
   RowFactory.create(1.0, Vectors.dense(0.0, 1.0, 0.0)),
   RowFactory.create(1.0, Vectors.dense(0.0, 2.0, 0.0)),
   RowFactory.create(2.0, Vectors.dense(0.0, 0.0, 1.0)),
-  RowFactory.create(2.0, Vectors.dense(0.0, 0.0, 2.0))
-));
+  RowFactory.create(2.0, Vectors.dense(0.0, 0.0, 2.0)));
 
 StructType schema = new StructType(new StructField[]{
   new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
   new StructField("features", new VectorUDT(), false, Metadata.empty())
 });
 
-DataFrame dataset = jsql.createDataFrame(jrdd, schema);
+DataFrame dataset = jsql.createDataFrame(data, schema);
 NaiveBayes nb = new 
NaiveBayes().setSmoothing(0.5).setModelType("multinomial");
 NaiveBayesModel model = nb.fit(dataset);
 

http://git-wip-us.apache.org/repos/asf/spark/blob/d91967e1/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java
--
diff --git 
a/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java 
b/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java
index 47d68de..8a1e5ef 100644
--- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java
@@ -55,16 +55,16 @@ public class JavaBucketizerSuite {
   public void bucketizerTest() {
 double[] splits = {-0.5, 0.0, 0.5};
 
-JavaRDD data = jsc.parallelize(Arrays.asList(
-  RowFactory.create(-0.5),
-  RowFactory.create(-0.3),
-  RowFactory.create(0.0),
-  RowFactory.create(0.2)
-));
 StructType schema = new StructType(new StructField[] {
   new StructField("feature", DataTypes.DoubleType, false, Metadata.empty())
 });
-DataFrame dataset = jsql.createDataFrame(data, schema);
+DataFrame dataset = jsql.createDataFrame(
+  Arrays.asList(

[1/2] spark git commit: Preparing Spark release v1.5.1-rc1

2015-09-23 Thread pwendell

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 179f36ed3 -> 3fb011a48


Preparing Spark release v1.5.1-rc1


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4df97937
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4df97937
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4df97937

Branch: refs/heads/branch-1.5
Commit: 4df97937dbf68a9868de58408b9be0bf87dbbb94
Parents: 179f36e
Author: Patrick Wendell 
Authored: Wed Sep 23 22:49:35 2015 -0700
Committer: Patrick Wendell 
Committed: Wed Sep 23 22:49:35 2015 -0700

--
 assembly/pom.xml| 2 +-
 bagel/pom.xml   | 2 +-
 core/pom.xml| 2 +-
 examples/pom.xml| 2 +-
 external/flume-assembly/pom.xml | 2 +-
 external/flume-sink/pom.xml | 2 +-
 external/flume/pom.xml  | 2 +-
 external/kafka-assembly/pom.xml | 2 +-
 external/kafka/pom.xml  | 2 +-
 external/mqtt-assembly/pom.xml  | 2 +-
 external/mqtt/pom.xml   | 2 +-
 external/twitter/pom.xml| 2 +-
 external/zeromq/pom.xml | 2 +-
 extras/java8-tests/pom.xml  | 2 +-
 extras/kinesis-asl-assembly/pom.xml | 2 +-
 extras/kinesis-asl/pom.xml  | 2 +-
 extras/spark-ganglia-lgpl/pom.xml   | 2 +-
 graphx/pom.xml  | 2 +-
 launcher/pom.xml| 2 +-
 mllib/pom.xml   | 2 +-
 network/common/pom.xml  | 2 +-
 network/shuffle/pom.xml | 2 +-
 network/yarn/pom.xml| 2 +-
 pom.xml | 2 +-
 repl/pom.xml| 2 +-
 sql/catalyst/pom.xml| 2 +-
 sql/core/pom.xml| 2 +-
 sql/hive-thriftserver/pom.xml   | 2 +-
 sql/hive/pom.xml| 2 +-
 streaming/pom.xml   | 2 +-
 tools/pom.xml   | 2 +-
 unsafe/pom.xml  | 2 +-
 yarn/pom.xml| 2 +-
 33 files changed, 33 insertions(+), 33 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4df97937/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 7671ba2..03d4973 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.2-SNAPSHOT
+1.5.1
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4df97937/bagel/pom.xml
--
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 02e920d..6f058ff 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.2-SNAPSHOT
+1.5.1
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4df97937/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index 03d26df..f32ce5d 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.2-SNAPSHOT
+1.5.1
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4df97937/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index eb1910e..f28847e 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.2-SNAPSHOT
+1.5.1
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4df97937/external/flume-assembly/pom.xml
--
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 0de2f03..e7bd0d2 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.2-SNAPSHOT
+1.5.1
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4df97937/external/flume-sink/pom.xml
--
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 66ab1b2..e5a5503 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.2-SNAPSHOT
+1.5.1
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4df97937/external/flume/pom.xml
--
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index c058490..b5e9423

[2/2] spark git commit: Preparing development version 1.5.2-SNAPSHOT

2015-09-23 Thread pwendell

Preparing development version 1.5.2-SNAPSHOT


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3fb011a4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3fb011a4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3fb011a4

Branch: refs/heads/branch-1.5
Commit: 3fb011a486c87c99c73b6453b25dbb76f93845a7
Parents: 4df9793
Author: Patrick Wendell 
Authored: Wed Sep 23 22:49:40 2015 -0700
Committer: Patrick Wendell 
Committed: Wed Sep 23 22:49:40 2015 -0700

--
 assembly/pom.xml| 2 +-
 bagel/pom.xml   | 2 +-
 core/pom.xml| 2 +-
 examples/pom.xml| 2 +-
 external/flume-assembly/pom.xml | 2 +-
 external/flume-sink/pom.xml | 2 +-
 external/flume/pom.xml  | 2 +-
 external/kafka-assembly/pom.xml | 2 +-
 external/kafka/pom.xml  | 2 +-
 external/mqtt-assembly/pom.xml  | 2 +-
 external/mqtt/pom.xml   | 2 +-
 external/twitter/pom.xml| 2 +-
 external/zeromq/pom.xml | 2 +-
 extras/java8-tests/pom.xml  | 2 +-
 extras/kinesis-asl-assembly/pom.xml | 2 +-
 extras/kinesis-asl/pom.xml  | 2 +-
 extras/spark-ganglia-lgpl/pom.xml   | 2 +-
 graphx/pom.xml  | 2 +-
 launcher/pom.xml| 2 +-
 mllib/pom.xml   | 2 +-
 network/common/pom.xml  | 2 +-
 network/shuffle/pom.xml | 2 +-
 network/yarn/pom.xml| 2 +-
 pom.xml | 2 +-
 repl/pom.xml| 2 +-
 sql/catalyst/pom.xml| 2 +-
 sql/core/pom.xml| 2 +-
 sql/hive-thriftserver/pom.xml   | 2 +-
 sql/hive/pom.xml| 2 +-
 streaming/pom.xml   | 2 +-
 tools/pom.xml   | 2 +-
 unsafe/pom.xml  | 2 +-
 yarn/pom.xml| 2 +-
 33 files changed, 33 insertions(+), 33 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3fb011a4/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 03d4973..7671ba2 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1
+1.5.2-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/3fb011a4/bagel/pom.xml
--
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 6f058ff..02e920d 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1
+1.5.2-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/3fb011a4/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index f32ce5d..03d26df 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1
+1.5.2-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/3fb011a4/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index f28847e..eb1910e 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1
+1.5.2-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/3fb011a4/external/flume-assembly/pom.xml
--
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index e7bd0d2..0de2f03 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1
+1.5.2-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/3fb011a4/external/flume-sink/pom.xml
--
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index e5a5503..66ab1b2 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1
+1.5.2-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/3fb011a4/external/flume/pom.xml
--
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index b5e9423..c058490 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7

spark git commit: [SPARK-10403] Allow UnsafeRowSerializer to work with tungsten-sort ShuffleManager

2015-09-23 Thread marmbrus

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 6c6cadb8f -> 64cc62cb5


[SPARK-10403] Allow UnsafeRowSerializer to work with tungsten-sort 
ShuffleManager

This patch attempts to fix an issue where Spark SQL's UnsafeRowSerializer was 
incompatible with the `tungsten-sort` ShuffleManager.

Author: Josh Rosen 

Closes #8873 from JoshRosen/SPARK-10403.

(cherry picked from commit a18208047f06a4244703c17023bb20cbe1f59d73)
Signed-off-by: Michael Armbrust 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/64cc62cb
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/64cc62cb
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/64cc62cb

Branch: refs/heads/branch-1.5
Commit: 64cc62cb5f14dcc4a69073c48fdf3dd61c5df787
Parents: 6c6cadb
Author: Josh Rosen 
Authored: Wed Sep 23 11:31:01 2015 -0700
Committer: Michael Armbrust 
Committed: Wed Sep 23 11:31:14 2015 -0700

--
 .../sql/execution/UnsafeRowSerializer.scala | 22 +--
 .../execution/UnsafeRowSerializerSuite.scala| 23 ++--
 2 files changed, 27 insertions(+), 18 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/64cc62cb/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala
index e060c06..7e98126 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala
@@ -45,16 +45,9 @@ private[sql] class UnsafeRowSerializer(numFields: Int) 
extends Serializer with S
 }
 
 private class UnsafeRowSerializerInstance(numFields: Int) extends 
SerializerInstance {
-
-  /**
-   * Marks the end of a stream written with [[serializeStream()]].
-   */
-  private[this] val EOF: Int = -1
-
   /**
* Serializes a stream of UnsafeRows. Within the stream, each record 
consists of a record
* length (stored as a 4-byte integer, written high byte first), followed by 
the record's bytes.
-   * The end of the stream is denoted by a record with the special length 
`EOF` (-1).
*/
   override def serializeStream(out: OutputStream): SerializationStream = new 
SerializationStream {
 private[this] var writeBuffer: Array[Byte] = new Array[Byte](4096)
@@ -92,7 +85,6 @@ private class UnsafeRowSerializerInstance(numFields: Int) 
extends SerializerInst
 
 override def close(): Unit = {
   writeBuffer = null
-  dOut.writeInt(EOF)
   dOut.close()
 }
   }
@@ -104,12 +96,20 @@ private class UnsafeRowSerializerInstance(numFields: Int) 
extends SerializerInst
   private[this] var rowBuffer: Array[Byte] = new Array[Byte](1024)
   private[this] var row: UnsafeRow = new UnsafeRow()
   private[this] var rowTuple: (Int, UnsafeRow) = (0, row)
+  private[this] val EOF: Int = -1
 
   override def asKeyValueIterator: Iterator[(Int, UnsafeRow)] = {
 new Iterator[(Int, UnsafeRow)] {
-  private[this] var rowSize: Int = dIn.readInt()
-  if (rowSize == EOF) dIn.close()
 
+  private[this] def readSize(): Int = try {
+dIn.readInt()
+  } catch {
+case e: EOFException =>
+  dIn.close()
+  EOF
+  }
+
+  private[this] var rowSize: Int = readSize()
   override def hasNext: Boolean = rowSize != EOF
 
   override def next(): (Int, UnsafeRow) = {
@@ -118,7 +118,7 @@ private class UnsafeRowSerializerInstance(numFields: Int) 
extends SerializerInst
 }
 ByteStreams.readFully(dIn, rowBuffer, 0, rowSize)
 row.pointTo(rowBuffer, Platform.BYTE_ARRAY_OFFSET, numFields, 
rowSize)
-rowSize = dIn.readInt() // read the next row's size
+rowSize = readSize()
 if (rowSize == EOF) { // We are returning the last row in this 
stream
   dIn.close()
   val _rowTuple = rowTuple

http://git-wip-us.apache.org/repos/asf/spark/blob/64cc62cb/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
index 0113d05..f7d48bc 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
+++

spark git commit: [SPARK-10721] Log warning when file deletion fails

2015-09-23 Thread srowen

Repository: spark
Updated Branches:
  refs/heads/master 50e463423 -> 27bfa9ab3


[SPARK-10721] Log warning when file deletion fails

Author: tedyu 

Closes #8843 from tedyu/master.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/27bfa9ab
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/27bfa9ab
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/27bfa9ab

Branch: refs/heads/master
Commit: 27bfa9ab3a610e072c011fd88ee4684cea6ceb76
Parents: 50e4634
Author: tedyu 
Authored: Wed Sep 23 10:01:28 2015 +0100
Committer: Sean Owen 
Committed: Wed Sep 23 10:01:28 2015 +0100

--
 .../unsafe/sort/UnsafeSorterSpillReader.java   |  7 ++-
 .../scala/org/apache/spark/api/python/PythonRDD.scala  |  7 +--
 .../scala/org/apache/spark/deploy/RPackageUtils.scala  | 10 +++---
 .../spark/deploy/history/FsHistoryProvider.scala   |  8 ++--
 .../deploy/master/FileSystemPersistenceEngine.scala|  5 -
 .../org/apache/spark/rdd/ReliableCheckpointRDD.scala   |  4 +++-
 .../apache/spark/rdd/ReliableRDDCheckpointData.scala   |  6 --
 .../apache/spark/scheduler/EventLoggingListener.scala  |  8 ++--
 .../spark/scheduler/cluster/SimrSchedulerBackend.scala |  4 +++-
 .../spark/shuffle/FileShuffleBlockResolver.scala   |  5 -
 .../spark/shuffle/IndexShuffleBlockResolver.scala  | 13 +
 .../scala/org/apache/spark/storage/DiskStore.scala | 10 --
 .../spark/util/collection/ExternalAppendOnlyMap.scala  |  8 ++--
 .../apache/spark/util/collection/ExternalSorter.scala  |  4 +++-
 .../network/shuffle/ExternalShuffleBlockResolver.java  |  8 ++--
 15 files changed, 80 insertions(+), 27 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/27bfa9ab/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
--
diff --git 
a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
 
b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
index 4989b05..501dfe7 100644
--- 
a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
+++ 
b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java
@@ -24,12 +24,15 @@ import com.google.common.io.ByteStreams;
 import org.apache.spark.storage.BlockId;
 import org.apache.spark.storage.BlockManager;
 import org.apache.spark.unsafe.Platform;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Reads spill files written by {@link UnsafeSorterSpillWriter} (see that 
class for a description
  * of the file format).
  */
 final class UnsafeSorterSpillReader extends UnsafeSorterIterator {
+  private static final Logger logger = 
LoggerFactory.getLogger(UnsafeSorterSpillReader.class);
 
   private final File file;
   private InputStream in;
@@ -73,7 +76,9 @@ final class UnsafeSorterSpillReader extends 
UnsafeSorterIterator {
 numRecordsRemaining--;
 if (numRecordsRemaining == 0) {
   in.close();
-  file.delete();
+  if (!file.delete() && file.exists()) {
+logger.warn("Unable to delete spill file {}", file.getPath());
+  }
   in = null;
   din = null;
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/27bfa9ab/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
--
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala 
b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 3788d18..19be093 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -871,7 +871,8 @@ private class PythonAccumulatorParam(@transient private val 
serverHost: String,
  * write the data into disk after deserialization, then Python can read it 
from disks.
  */
 // scalastyle:off no.finalize
-private[spark] class PythonBroadcast(@transient var path: String) extends 
Serializable {
+private[spark] class PythonBroadcast(@transient var path: String) extends 
Serializable
+  with Logging {
 
   /**
* Read data from disks, then copy it to `out`
@@ -907,7 +908,9 @@ private[spark] class PythonBroadcast(@transient var path: 
String) extends Serial
 if (!path.isEmpty) {
   val file = new File(path)
   if (file.exists()) {
-file.delete()
+if (!file.delete()) {
+  logWarning(s"Error deleting ${file.getPath}")
+}
   }
 }
   }

spark git commit: [SPARK-10224] [STREAMING] Fix the issue that blockIntervalTimer won't call updateCurrentBuffer when stopping

2015-09-23 Thread tdas

Repository: spark
Updated Branches:
  refs/heads/master 5548a2547 -> 44c28abf1


[SPARK-10224] [STREAMING] Fix the issue that blockIntervalTimer won't call 
updateCurrentBuffer when stopping

`blockIntervalTimer.stop(interruptTimer = false)` doesn't guarantee calling 
`updateCurrentBuffer`. So it's possible that `blockIntervalTimer` will exit 
when `updateCurrentBuffer` is not empty. Then the data in `currentBuffer` will 
be lost.

To reproduce it, you can add `Thread.sleep(200)` in this line 
(https://github.com/apache/spark/blob/69c9c177160e32a2fbc9b36ecc52156077fca6fc/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala#L100)
 and run `StreamingContexSuite`.
I cannot write a unit test to reproduce it because I cannot find an approach to 
force `RecurringTimer` suspend at this line for a few milliseconds.

There was a failure in Jenkins here: 
https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/41455/console

This PR updates RecurringTimer to make sure `stop(interruptTimer = false)` will 
call `callback` at least once after the `stop` method is called.

Author: zsxwing 

Closes #8417 from zsxwing/SPARK-10224.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/44c28abf
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/44c28abf
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/44c28abf

Branch: refs/heads/master
Commit: 44c28abf120754c0175c65ffd3d4587a350b3798
Parents: 5548a25
Author: zsxwing 
Authored: Wed Sep 23 01:28:02 2015 -0700
Committer: Tathagata Das 
Committed: Wed Sep 23 01:28:02 2015 -0700

--
 .../spark/streaming/util/RecurringTimer.scala   | 19 +++--
 .../receiver/BlockGeneratorSuite.scala  |  7 +-
 .../streaming/util/RecurringTimerSuite.scala| 83 
 3 files changed, 100 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/44c28abf/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala
--
diff --git 
a/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala 
b/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala
index dd32ad5..0148cb5 100644
--- 
a/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala
+++ 
b/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala
@@ -72,8 +72,10 @@ class RecurringTimer(clock: Clock, period: Long, callback: 
(Long) => Unit, name:
 
   /**
* Stop the timer, and return the last time the callback was made.
-   * interruptTimer = true will interrupt the callback
+   * - interruptTimer = true will interrupt the callback
* if it is in progress (not guaranteed to give correct time in this case).
+   * - interruptTimer = false guarantees that there will be at least one 
callback after `stop` has
+   * been called.
*/
   def stop(interruptTimer: Boolean): Long = synchronized {
 if (!stopped) {
@@ -87,18 +89,23 @@ class RecurringTimer(clock: Clock, period: Long, callback: 
(Long) => Unit, name:
 prevTime
   }
 
+  private def triggerActionForNextInterval(): Unit = {
+clock.waitTillTime(nextTime)
+callback(nextTime)
+prevTime = nextTime
+nextTime += period
+logDebug("Callback for " + name + " called at time " + prevTime)
+  }
+
   /**
* Repeatedly call the callback every interval.
*/
   private def loop() {
 try {
   while (!stopped) {
-clock.waitTillTime(nextTime)
-callback(nextTime)
-prevTime = nextTime
-nextTime += period
-logDebug("Callback for " + name + " called at time " + prevTime)
+triggerActionForNextInterval()
   }
+  triggerActionForNextInterval()
 } catch {
   case e: InterruptedException =>
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/44c28abf/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
--
diff --git 
a/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
 
b/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
index a38cc60..2f11b25 100644
--- 
a/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
+++ 
b/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
@@ -184,9 +184,10 @@ class BlockGeneratorSuite extends SparkFunSuite with 
BeforeAndAfter {
 // Verify that the final data is present in the final generated block and
 // pushed before complete stop
 assert(blockGenerator.isStopped() === false) // generator

spark git commit: [SPARK-10769] [STREAMING] [TESTS] Fix o.a.s.streaming.CheckpointSuite.maintains rate controller

2015-09-23 Thread tdas

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 6a616d0d0 -> 4174b94f0


[SPARK-10769] [STREAMING] [TESTS] Fix o.a.s.streaming.CheckpointSuite.maintains 
rate controller

Fixed the following failure in 
https://amplab.cs.berkeley.edu/jenkins/job/NewSparkPullRequestBuilder/1787/testReport/junit/org.apache.spark.streaming/CheckpointSuite/recovery_maintains_rate_controller/
```
sbt.ForkMain$ForkError: The code passed to eventually never returned normally. 
Attempted 660 times over 10.4439201 seconds. Last failure message: 
9223372036854775807 did not equal 200.
at 
org.scalatest.concurrent.Eventually$class.tryTryAgain$1(Eventually.scala:420)
at 
org.scalatest.concurrent.Eventually$class.eventually(Eventually.scala:438)
at org.scalatest.concurrent.Eventually$.eventually(Eventually.scala:478)
at 
org.scalatest.concurrent.Eventually$class.eventually(Eventually.scala:336)
at org.scalatest.concurrent.Eventually$.eventually(Eventually.scala:478)
at 
org.apache.spark.streaming.CheckpointSuite$$anonfun$15.apply$mcV$sp(CheckpointSuite.scala:413)
at 
org.apache.spark.streaming.CheckpointSuite$$anonfun$15.apply(CheckpointSuite.scala:396)
at 
org.apache.spark.streaming.CheckpointSuite$$anonfun$15.apply(CheckpointSuite.scala:396)
at 
org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22)
at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
at org.scalatest.Transformer.apply(Transformer.scala:22)
```

In this test, it calls `advanceTimeWithRealDelay(ssc, 2)` to run two batch 
jobs. However, one race condition is these two jobs can finish before the 
receiver is registered. Then `UpdateRateLimit` won't be sent to the receiver 
and `getDefaultBlockGeneratorRateLimit` cannot be updated.

Here are the logs related to this issue:
```
15/09/22 19:28:26.154 pool-1-thread-1-ScalaTest-running-CheckpointSuite INFO 
CheckpointSuite: Manual clock before advancing = 2500

15/09/22 19:28:26.869 JobScheduler INFO JobScheduler: Finished job streaming 
job 3000 ms.0 from job set of time 3000 ms
15/09/22 19:28:26.869 JobScheduler INFO JobScheduler: Total delay: 
1442975303.869 s for time 3000 ms (execution: 0.711 s)

15/09/22 19:28:26.873 JobScheduler INFO JobScheduler: Finished job streaming 
job 3500 ms.0 from job set of time 3500 ms
15/09/22 19:28:26.873 JobScheduler INFO JobScheduler: Total delay: 
1442975303.373 s for time 3500 ms (execution: 0.004 s)

15/09/22 19:28:26.879 sparkDriver-akka.actor.default-dispatcher-3 INFO 
ReceiverTracker: Registered receiver for stream 0 from localhost:57749

15/09/22 19:28:27.154 pool-1-thread-1-ScalaTest-running-CheckpointSuite INFO 
CheckpointSuite: Manual clock after advancing = 3500
```
`advanceTimeWithRealDelay(ssc, 2)` triggered job 3000ms and 3500ms but the 
receiver was registered after job 3000ms and 3500ms finished.

So we should make sure the receiver online before running 
`advanceTimeWithRealDelay(ssc, 2)`.

Author: zsxwing 

Closes #8877 from zsxwing/SPARK-10769.

(cherry picked from commit 50e4634236668a0195390f0080d0ac230d428d05)
Signed-off-by: Tathagata Das 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4174b94f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4174b94f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4174b94f

Branch: refs/heads/branch-1.5
Commit: 4174b94f05282ca51f1219aa6aba3226e205aee0
Parents: 6a616d0
Author: zsxwing 
Authored: Wed Sep 23 01:29:30 2015 -0700
Committer: Tathagata Das 
Committed: Wed Sep 23 01:30:21 2015 -0700

--
 .../scala/org/apache/spark/streaming/CheckpointSuite.scala | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4174b94f/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
--
diff --git 
a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala 
b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
index 1bba7a1..a695653 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
@@ -408,10 +408,14 @@ class CheckpointSuite extends TestSuiteBase {
 
 ssc = new StreamingContext(checkpointDir)
 ssc.start()
-val outputNew = advanceTimeWithRealDelay(ssc, 2)
 
 eventually(timeout(10.seconds)) {
   assert(RateTestReceiver.getActive().nonEmpty)
+}
+
+advanceTimeWithRealDelay(ssc, 2)
+
+

spark git commit: [SPARK-10224] [STREAMING] Fix the issue that blockIntervalTimer won't call updateCurrentBuffer when stopping

2015-09-23 Thread tdas

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 8a23ef59b -> 6a616d0d0


[SPARK-10224] [STREAMING] Fix the issue that blockIntervalTimer won't call 
updateCurrentBuffer when stopping

`blockIntervalTimer.stop(interruptTimer = false)` doesn't guarantee calling 
`updateCurrentBuffer`. So it's possible that `blockIntervalTimer` will exit 
when `updateCurrentBuffer` is not empty. Then the data in `currentBuffer` will 
be lost.

To reproduce it, you can add `Thread.sleep(200)` in this line 
(https://github.com/apache/spark/blob/69c9c177160e32a2fbc9b36ecc52156077fca6fc/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala#L100)
 and run `StreamingContexSuite`.
I cannot write a unit test to reproduce it because I cannot find an approach to 
force `RecurringTimer` suspend at this line for a few milliseconds.

There was a failure in Jenkins here: 
https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/41455/console

This PR updates RecurringTimer to make sure `stop(interruptTimer = false)` will 
call `callback` at least once after the `stop` method is called.

Author: zsxwing 

Closes #8417 from zsxwing/SPARK-10224.

(cherry picked from commit 44c28abf120754c0175c65ffd3d4587a350b3798)
Signed-off-by: Tathagata Das 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6a616d0d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6a616d0d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6a616d0d

Branch: refs/heads/branch-1.5
Commit: 6a616d0d02c3fe5d570249695e9ed747bf087dbf
Parents: 8a23ef5
Author: zsxwing 
Authored: Wed Sep 23 01:28:02 2015 -0700
Committer: Tathagata Das 
Committed: Wed Sep 23 01:28:16 2015 -0700

--
 .../spark/streaming/util/RecurringTimer.scala   | 19 +++--
 .../receiver/BlockGeneratorSuite.scala  |  7 +-
 .../streaming/util/RecurringTimerSuite.scala| 83 
 3 files changed, 100 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/6a616d0d/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala
--
diff --git 
a/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala 
b/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala
index dd32ad5..0148cb5 100644
--- 
a/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala
+++ 
b/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala
@@ -72,8 +72,10 @@ class RecurringTimer(clock: Clock, period: Long, callback: 
(Long) => Unit, name:
 
   /**
* Stop the timer, and return the last time the callback was made.
-   * interruptTimer = true will interrupt the callback
+   * - interruptTimer = true will interrupt the callback
* if it is in progress (not guaranteed to give correct time in this case).
+   * - interruptTimer = false guarantees that there will be at least one 
callback after `stop` has
+   * been called.
*/
   def stop(interruptTimer: Boolean): Long = synchronized {
 if (!stopped) {
@@ -87,18 +89,23 @@ class RecurringTimer(clock: Clock, period: Long, callback: 
(Long) => Unit, name:
 prevTime
   }
 
+  private def triggerActionForNextInterval(): Unit = {
+clock.waitTillTime(nextTime)
+callback(nextTime)
+prevTime = nextTime
+nextTime += period
+logDebug("Callback for " + name + " called at time " + prevTime)
+  }
+
   /**
* Repeatedly call the callback every interval.
*/
   private def loop() {
 try {
   while (!stopped) {
-clock.waitTillTime(nextTime)
-callback(nextTime)
-prevTime = nextTime
-nextTime += period
-logDebug("Callback for " + name + " called at time " + prevTime)
+triggerActionForNextInterval()
   }
+  triggerActionForNextInterval()
 } catch {
   case e: InterruptedException =>
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/6a616d0d/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
--
diff --git 
a/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
 
b/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
index a38cc60..2f11b25 100644
--- 
a/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
+++ 
b/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala
@@ -184,9 +184,10 @@ class BlockGeneratorSuite extends SparkFunSuite with 
BeforeAndAfter {
 // Verify that the final data is

spark git commit: [SPARK-10769] [STREAMING] [TESTS] Fix o.a.s.streaming.CheckpointSuite.maintains rate controller

2015-09-23 Thread tdas

Repository: spark
Updated Branches:
  refs/heads/master 44c28abf1 -> 50e463423


[SPARK-10769] [STREAMING] [TESTS] Fix o.a.s.streaming.CheckpointSuite.maintains 
rate controller

Fixed the following failure in 
https://amplab.cs.berkeley.edu/jenkins/job/NewSparkPullRequestBuilder/1787/testReport/junit/org.apache.spark.streaming/CheckpointSuite/recovery_maintains_rate_controller/
```
sbt.ForkMain$ForkError: The code passed to eventually never returned normally. 
Attempted 660 times over 10.4439201 seconds. Last failure message: 
9223372036854775807 did not equal 200.
at 
org.scalatest.concurrent.Eventually$class.tryTryAgain$1(Eventually.scala:420)
at 
org.scalatest.concurrent.Eventually$class.eventually(Eventually.scala:438)
at org.scalatest.concurrent.Eventually$.eventually(Eventually.scala:478)
at 
org.scalatest.concurrent.Eventually$class.eventually(Eventually.scala:336)
at org.scalatest.concurrent.Eventually$.eventually(Eventually.scala:478)
at 
org.apache.spark.streaming.CheckpointSuite$$anonfun$15.apply$mcV$sp(CheckpointSuite.scala:413)
at 
org.apache.spark.streaming.CheckpointSuite$$anonfun$15.apply(CheckpointSuite.scala:396)
at 
org.apache.spark.streaming.CheckpointSuite$$anonfun$15.apply(CheckpointSuite.scala:396)
at 
org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22)
at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
at org.scalatest.Transformer.apply(Transformer.scala:22)
```

In this test, it calls `advanceTimeWithRealDelay(ssc, 2)` to run two batch 
jobs. However, one race condition is these two jobs can finish before the 
receiver is registered. Then `UpdateRateLimit` won't be sent to the receiver 
and `getDefaultBlockGeneratorRateLimit` cannot be updated.

Here are the logs related to this issue:
```
15/09/22 19:28:26.154 pool-1-thread-1-ScalaTest-running-CheckpointSuite INFO 
CheckpointSuite: Manual clock before advancing = 2500

15/09/22 19:28:26.869 JobScheduler INFO JobScheduler: Finished job streaming 
job 3000 ms.0 from job set of time 3000 ms
15/09/22 19:28:26.869 JobScheduler INFO JobScheduler: Total delay: 
1442975303.869 s for time 3000 ms (execution: 0.711 s)

15/09/22 19:28:26.873 JobScheduler INFO JobScheduler: Finished job streaming 
job 3500 ms.0 from job set of time 3500 ms
15/09/22 19:28:26.873 JobScheduler INFO JobScheduler: Total delay: 
1442975303.373 s for time 3500 ms (execution: 0.004 s)

15/09/22 19:28:26.879 sparkDriver-akka.actor.default-dispatcher-3 INFO 
ReceiverTracker: Registered receiver for stream 0 from localhost:57749

15/09/22 19:28:27.154 pool-1-thread-1-ScalaTest-running-CheckpointSuite INFO 
CheckpointSuite: Manual clock after advancing = 3500
```
`advanceTimeWithRealDelay(ssc, 2)` triggered job 3000ms and 3500ms but the 
receiver was registered after job 3000ms and 3500ms finished.

So we should make sure the receiver online before running 
`advanceTimeWithRealDelay(ssc, 2)`.

Author: zsxwing 

Closes #8877 from zsxwing/SPARK-10769.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/50e46342
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/50e46342
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/50e46342

Branch: refs/heads/master
Commit: 50e4634236668a0195390f0080d0ac230d428d05
Parents: 44c28ab
Author: zsxwing 
Authored: Wed Sep 23 01:29:30 2015 -0700
Committer: Tathagata Das 
Committed: Wed Sep 23 01:29:30 2015 -0700

--
 .../scala/org/apache/spark/streaming/CheckpointSuite.scala | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/50e46342/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
--
diff --git 
a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala 
b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
index 1bba7a1..a695653 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
@@ -408,10 +408,14 @@ class CheckpointSuite extends TestSuiteBase {
 
 ssc = new StreamingContext(checkpointDir)
 ssc.start()
-val outputNew = advanceTimeWithRealDelay(ssc, 2)
 
 eventually(timeout(10.seconds)) {
   assert(RateTestReceiver.getActive().nonEmpty)
+}
+
+advanceTimeWithRealDelay(ssc, 2)
+
+eventually(timeout(10.seconds)) {
   
assert(RateTestReceiver.getActive().get.getDefaultBlockGeneratorRateLimit() === 
200)
 }

spark git commit: [SPARK-9715] [ML] Store numFeatures in all ML PredictionModel types

2015-09-23 Thread jkbradley

Repository: spark
Updated Branches:
  refs/heads/master a18208047 -> 098be27ad


[SPARK-9715] [ML] Store numFeatures in all ML PredictionModel types

All prediction models should store `numFeatures` indicating the number of 
features the model was trained on. Default value of -1 added for backwards 
compatibility.

Author: sethah 

Closes #8675 from sethah/SPARK-9715.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/098be27a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/098be27a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/098be27a

Branch: refs/heads/master
Commit: 098be27ad53c485ee2fc7f5871c47f899020e87b
Parents: a182080
Author: sethah 
Authored: Wed Sep 23 15:00:52 2015 -0700
Committer: Joseph K. Bradley 
Committed: Wed Sep 23 15:00:52 2015 -0700

--
 .../examples/ml/JavaDeveloperApiExample.java|  5 
 .../spark/examples/ml/DeveloperApiExample.scala |  3 +++
 .../scala/org/apache/spark/ml/Predictor.scala   |  6 -
 .../classification/DecisionTreeClassifier.scala | 13 ++
 .../spark/ml/classification/GBTClassifier.scala | 26 ++--
 .../ml/classification/LogisticRegression.scala  |  2 ++
 .../MultilayerPerceptronClassifier.scala|  2 ++
 .../spark/ml/classification/NaiveBayes.scala|  2 ++
 .../classification/RandomForestClassifier.scala |  8 +++---
 .../ml/regression/DecisionTreeRegressor.scala   | 13 ++
 .../spark/ml/regression/GBTRegressor.scala  | 24 +-
 .../spark/ml/regression/LinearRegression.scala  |  2 ++
 .../ml/regression/RandomForestRegressor.scala   |  7 +++---
 .../spark/ml/tree/impl/RandomForest.scala   | 14 ---
 .../DecisionTreeClassifierSuite.scala   |  4 ++-
 .../ml/classification/GBTClassifierSuite.scala  | 11 ++---
 .../LogisticRegressionSuite.scala   |  2 ++
 .../MultilayerPerceptronClassifierSuite.scala   |  4 ++-
 .../ProbabilisticClassifierSuite.scala  |  6 +++--
 .../RandomForestClassifierSuite.scala   |  8 +++---
 .../regression/DecisionTreeRegressorSuite.scala |  2 ++
 .../spark/ml/regression/GBTRegressorSuite.scala |  7 --
 .../ml/regression/LinearRegressionSuite.scala   |  4 ++-
 .../regression/RandomForestRegressorSuite.scala |  2 ++
 .../spark/ml/tree/impl/RandomForestSuite.scala  |  3 ++-
 25 files changed, 130 insertions(+), 50 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/098be27a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java
--
diff --git 
a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java
 
b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java
index a377694..0b4c0d9 100644
--- 
a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java
+++ 
b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java
@@ -220,6 +220,11 @@ class MyJavaLogisticRegressionModel
   public int numClasses() { return 2; }
 
   /**
+   * Number of features the model was trained on.
+   */
+  public int numFeatures() { return weights_.size(); }
+
+  /**
* Create a copy of the model.
* The copy is shallow, except for the embedded paramMap, which gets a deep 
copy.
* 

http://git-wip-us.apache.org/repos/asf/spark/blob/098be27a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
--
diff --git 
a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
 
b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
index 340c355..3758edc 100644
--- 
a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
+++ 
b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
@@ -172,6 +172,9 @@ private class MyLogisticRegressionModel(
   /** Number of classes the label can take.  2 indicates binary 
classification. */
   override val numClasses: Int = 2
 
+  /** Number of features the model was trained on. */
+  override val numFeatures: Int = weights.size
+
   /**
* Create a copy of the model.
* The copy is shallow, except for the embedded paramMap, which gets a deep 
copy.

http://git-wip-us.apache.org/repos/asf/spark/blob/098be27a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
--
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala 
b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
index 19fe039..e0dcd42 100644
---

spark git commit: [SPARK-10403] Allow UnsafeRowSerializer to work with tungsten-sort ShuffleManager

2015-09-23 Thread marmbrus

Repository: spark
Updated Branches:
  refs/heads/master 27bfa9ab3 -> a18208047


[SPARK-10403] Allow UnsafeRowSerializer to work with tungsten-sort 
ShuffleManager

This patch attempts to fix an issue where Spark SQL's UnsafeRowSerializer was 
incompatible with the `tungsten-sort` ShuffleManager.

Author: Josh Rosen 

Closes #8873 from JoshRosen/SPARK-10403.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a1820804
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a1820804
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a1820804

Branch: refs/heads/master
Commit: a18208047f06a4244703c17023bb20cbe1f59d73
Parents: 27bfa9a
Author: Josh Rosen 
Authored: Wed Sep 23 11:31:01 2015 -0700
Committer: Michael Armbrust 
Committed: Wed Sep 23 11:31:01 2015 -0700

--
 .../sql/execution/UnsafeRowSerializer.scala | 22 +--
 .../execution/UnsafeRowSerializerSuite.scala| 23 ++--
 2 files changed, 27 insertions(+), 18 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a1820804/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala
index e060c06..7e98126 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala
@@ -45,16 +45,9 @@ private[sql] class UnsafeRowSerializer(numFields: Int) 
extends Serializer with S
 }
 
 private class UnsafeRowSerializerInstance(numFields: Int) extends 
SerializerInstance {
-
-  /**
-   * Marks the end of a stream written with [[serializeStream()]].
-   */
-  private[this] val EOF: Int = -1
-
   /**
* Serializes a stream of UnsafeRows. Within the stream, each record 
consists of a record
* length (stored as a 4-byte integer, written high byte first), followed by 
the record's bytes.
-   * The end of the stream is denoted by a record with the special length 
`EOF` (-1).
*/
   override def serializeStream(out: OutputStream): SerializationStream = new 
SerializationStream {
 private[this] var writeBuffer: Array[Byte] = new Array[Byte](4096)
@@ -92,7 +85,6 @@ private class UnsafeRowSerializerInstance(numFields: Int) 
extends SerializerInst
 
 override def close(): Unit = {
   writeBuffer = null
-  dOut.writeInt(EOF)
   dOut.close()
 }
   }
@@ -104,12 +96,20 @@ private class UnsafeRowSerializerInstance(numFields: Int) 
extends SerializerInst
   private[this] var rowBuffer: Array[Byte] = new Array[Byte](1024)
   private[this] var row: UnsafeRow = new UnsafeRow()
   private[this] var rowTuple: (Int, UnsafeRow) = (0, row)
+  private[this] val EOF: Int = -1
 
   override def asKeyValueIterator: Iterator[(Int, UnsafeRow)] = {
 new Iterator[(Int, UnsafeRow)] {
-  private[this] var rowSize: Int = dIn.readInt()
-  if (rowSize == EOF) dIn.close()
 
+  private[this] def readSize(): Int = try {
+dIn.readInt()
+  } catch {
+case e: EOFException =>
+  dIn.close()
+  EOF
+  }
+
+  private[this] var rowSize: Int = readSize()
   override def hasNext: Boolean = rowSize != EOF
 
   override def next(): (Int, UnsafeRow) = {
@@ -118,7 +118,7 @@ private class UnsafeRowSerializerInstance(numFields: Int) 
extends SerializerInst
 }
 ByteStreams.readFully(dIn, rowBuffer, 0, rowSize)
 row.pointTo(rowBuffer, Platform.BYTE_ARRAY_OFFSET, numFields, 
rowSize)
-rowSize = dIn.readInt() // read the next row's size
+rowSize = readSize()
 if (rowSize == EOF) { // We are returning the last row in this 
stream
   dIn.close()
   val _rowTuple = rowTuple

http://git-wip-us.apache.org/repos/asf/spark/blob/a1820804/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
index 0113d05..f7d48bc 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
@@ -17,9 +17,10 @@
 
 package org.apache.spark.sql.execution

spark git commit: [SPARK-10686] [ML] Add quantilesCol to AFTSurvivalRegression

2015-09-23 Thread meng

Repository: spark
Updated Branches:
  refs/heads/master 098be27ad -> ce2b056d3


[SPARK-10686] [ML] Add quantilesCol to AFTSurvivalRegression

By default ```quantilesCol``` should be empty. If ```quantileProbabilities``` 
is set, we should append quantiles as a new column (of type Vector).

Author: Yanbo Liang 

Closes #8836 from yanboliang/spark-10686.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ce2b056d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ce2b056d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ce2b056d

Branch: refs/heads/master
Commit: ce2b056d35c0c75d5c162b93680ee2d84152e911
Parents: 098be27
Author: Yanbo Liang 
Authored: Wed Sep 23 15:26:02 2015 -0700
Committer: Xiangrui Meng 
Committed: Wed Sep 23 15:26:02 2015 -0700

--
 .../ml/regression/AFTSurvivalRegression.scala   | 51 +++---
 .../regression/AFTSurvivalRegressionSuite.scala | 74 +---
 2 files changed, 91 insertions(+), 34 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ce2b056d/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index 5b25db6..717caac 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -41,7 +41,7 @@ import org.apache.spark.storage.StorageLevel
  */
 private[regression] trait AFTSurvivalRegressionParams extends Params
   with HasFeaturesCol with HasLabelCol with HasPredictionCol with HasMaxIter
-  with HasTol with HasFitIntercept {
+  with HasTol with HasFitIntercept with Logging {
 
   /**
* Param for censor column name.
@@ -59,21 +59,35 @@ private[regression] trait AFTSurvivalRegressionParams 
extends Params
 
   /**
* Param for quantile probabilities array.
-   * Values of the quantile probabilities array should be in the range [0, 1].
+   * Values of the quantile probabilities array should be in the range [0, 1]
+   * and the array should be non-empty.
* @group param
*/
   @Since("1.6.0")
   final val quantileProbabilities: DoubleArrayParam = new 
DoubleArrayParam(this,
 "quantileProbabilities", "quantile probabilities array",
-(t: Array[Double]) => t.forall(ParamValidators.inRange(0, 1)))
+(t: Array[Double]) => t.forall(ParamValidators.inRange(0, 1)) && t.length 
> 0)
 
   /** @group getParam */
   @Since("1.6.0")
   def getQuantileProbabilities: Array[Double] = $(quantileProbabilities)
+  setDefault(quantileProbabilities -> Array(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 
0.9, 0.95, 0.99))
 
-  /** Checks whether the input has quantile probabilities array. */
-  protected[regression] def hasQuantileProbabilities: Boolean = {
-isDefined(quantileProbabilities) && $(quantileProbabilities).size != 0
+  /**
+   * Param for quantiles column name.
+   * This column will output quantiles of corresponding quantileProbabilities 
if it is set.
+   * @group param
+   */
+  @Since("1.6.0")
+  final val quantilesCol: Param[String] = new Param(this, "quantilesCol", 
"quantiles column name")
+
+  /** @group getParam */
+  @Since("1.6.0")
+  def getQuantilesCol: String = $(quantilesCol)
+
+  /** Checks whether the input has quantiles column name. */
+  protected[regression] def hasQuantilesCol: Boolean = {
+isDefined(quantilesCol) && $(quantilesCol) != ""
   }
 
   /**
@@ -90,6 +104,9 @@ private[regression] trait AFTSurvivalRegressionParams 
extends Params
   SchemaUtils.checkColumnType(schema, $(censorCol), DoubleType)
   SchemaUtils.checkColumnType(schema, $(labelCol), DoubleType)
 }
+if (hasQuantilesCol) {
+  SchemaUtils.appendColumn(schema, $(quantilesCol), new VectorUDT)
+}
 SchemaUtils.appendColumn(schema, $(predictionCol), DoubleType)
   }
 }
@@ -124,6 +141,14 @@ class AFTSurvivalRegression @Since("1.6.0") 
(@Since("1.6.0") override val uid: S
   @Since("1.6.0")
   def setPredictionCol(value: String): this.type = set(predictionCol, value)
 
+  /** @group setParam */
+  @Since("1.6.0")
+  def setQuantileProbabilities(value: Array[Double]): this.type = 
set(quantileProbabilities, value)
+
+  /** @group setParam */
+  @Since("1.6.0")
+  def setQuantilesCol(value: String): this.type = set(quantilesCol, value)
+
   /**
* Set if we should fit the intercept
* Default is true.
@@ -243,10 +268,12 @@ class AFTSurvivalRegressionModel private[ml] (
   @Since("1.6.0")
   def setQuantileProbabilities(value: Array[Double]): this.type =

Git Push Summary

2015-09-23 Thread rxin

Repository: spark
Updated Tags:  refs/tags/v1.5.1-rc1 [deleted] 20db8186d

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[1/2] spark git commit: Preparing Spark release v1.5.1-rc1

2015-09-23 Thread pwendell

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 cdc4ac003 -> 179f36ed3


Preparing Spark release v1.5.1-rc1


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f894dd6
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f894dd6
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f894dd6

Branch: refs/heads/branch-1.5
Commit: 4f894dd6906311cb57add6757690069a18078783
Parents: cdc4ac0
Author: Patrick Wendell 
Authored: Wed Sep 23 21:32:10 2015 -0700
Committer: Patrick Wendell 
Committed: Wed Sep 23 21:32:10 2015 -0700

--
 assembly/pom.xml| 2 +-
 bagel/pom.xml   | 2 +-
 core/pom.xml| 2 +-
 examples/pom.xml| 2 +-
 external/flume-assembly/pom.xml | 2 +-
 external/flume-sink/pom.xml | 2 +-
 external/flume/pom.xml  | 2 +-
 external/kafka-assembly/pom.xml | 2 +-
 external/kafka/pom.xml  | 2 +-
 external/mqtt-assembly/pom.xml  | 2 +-
 external/mqtt/pom.xml   | 2 +-
 external/twitter/pom.xml| 2 +-
 external/zeromq/pom.xml | 2 +-
 extras/java8-tests/pom.xml  | 2 +-
 extras/kinesis-asl-assembly/pom.xml | 2 +-
 extras/kinesis-asl/pom.xml  | 2 +-
 extras/spark-ganglia-lgpl/pom.xml   | 2 +-
 graphx/pom.xml  | 2 +-
 launcher/pom.xml| 2 +-
 mllib/pom.xml   | 2 +-
 network/common/pom.xml  | 2 +-
 network/shuffle/pom.xml | 2 +-
 network/yarn/pom.xml| 2 +-
 pom.xml | 2 +-
 repl/pom.xml| 2 +-
 sql/catalyst/pom.xml| 2 +-
 sql/core/pom.xml| 2 +-
 sql/hive-thriftserver/pom.xml   | 2 +-
 sql/hive/pom.xml| 2 +-
 streaming/pom.xml   | 2 +-
 tools/pom.xml   | 2 +-
 unsafe/pom.xml  | 2 +-
 yarn/pom.xml| 2 +-
 33 files changed, 33 insertions(+), 33 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4f894dd6/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 7671ba2..03d4973 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.2-SNAPSHOT
+1.5.1
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4f894dd6/bagel/pom.xml
--
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 02e920d..6f058ff 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.2-SNAPSHOT
+1.5.1
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4f894dd6/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index 03d26df..f32ce5d 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.2-SNAPSHOT
+1.5.1
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4f894dd6/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index eb1910e..f28847e 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.2-SNAPSHOT
+1.5.1
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4f894dd6/external/flume-assembly/pom.xml
--
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 0de2f03..e7bd0d2 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.2-SNAPSHOT
+1.5.1
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4f894dd6/external/flume-sink/pom.xml
--
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 66ab1b2..e5a5503 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.2-SNAPSHOT
+1.5.1
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/4f894dd6/external/flume/pom.xml
--
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index c058490..b5e9423

[2/2] spark git commit: Preparing development version 1.5.2-SNAPSHOT

2015-09-23 Thread pwendell

Preparing development version 1.5.2-SNAPSHOT


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/179f36ed
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/179f36ed
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/179f36ed

Branch: refs/heads/branch-1.5
Commit: 179f36ed32c1026b5e5d906780608e259c265b1e
Parents: 4f894dd
Author: Patrick Wendell 
Authored: Wed Sep 23 21:32:16 2015 -0700
Committer: Patrick Wendell 
Committed: Wed Sep 23 21:32:16 2015 -0700

--
 assembly/pom.xml| 2 +-
 bagel/pom.xml   | 2 +-
 core/pom.xml| 2 +-
 examples/pom.xml| 2 +-
 external/flume-assembly/pom.xml | 2 +-
 external/flume-sink/pom.xml | 2 +-
 external/flume/pom.xml  | 2 +-
 external/kafka-assembly/pom.xml | 2 +-
 external/kafka/pom.xml  | 2 +-
 external/mqtt-assembly/pom.xml  | 2 +-
 external/mqtt/pom.xml   | 2 +-
 external/twitter/pom.xml| 2 +-
 external/zeromq/pom.xml | 2 +-
 extras/java8-tests/pom.xml  | 2 +-
 extras/kinesis-asl-assembly/pom.xml | 2 +-
 extras/kinesis-asl/pom.xml  | 2 +-
 extras/spark-ganglia-lgpl/pom.xml   | 2 +-
 graphx/pom.xml  | 2 +-
 launcher/pom.xml| 2 +-
 mllib/pom.xml   | 2 +-
 network/common/pom.xml  | 2 +-
 network/shuffle/pom.xml | 2 +-
 network/yarn/pom.xml| 2 +-
 pom.xml | 2 +-
 repl/pom.xml| 2 +-
 sql/catalyst/pom.xml| 2 +-
 sql/core/pom.xml| 2 +-
 sql/hive-thriftserver/pom.xml   | 2 +-
 sql/hive/pom.xml| 2 +-
 streaming/pom.xml   | 2 +-
 tools/pom.xml   | 2 +-
 unsafe/pom.xml  | 2 +-
 yarn/pom.xml| 2 +-
 33 files changed, 33 insertions(+), 33 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/179f36ed/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 03d4973..7671ba2 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1
+1.5.2-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/179f36ed/bagel/pom.xml
--
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 6f058ff..02e920d 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1
+1.5.2-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/179f36ed/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index f32ce5d..03d26df 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1
+1.5.2-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/179f36ed/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index f28847e..eb1910e 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1
+1.5.2-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/179f36ed/external/flume-assembly/pom.xml
--
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index e7bd0d2..0de2f03 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1
+1.5.2-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/179f36ed/external/flume-sink/pom.xml
--
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index e5a5503..66ab1b2 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1
+1.5.2-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/179f36ed/external/flume/pom.xml
--
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index b5e9423..c058490 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7

Git Push Summary

2015-09-23 Thread pwendell

Repository: spark
Updated Tags:  refs/tags/v1.5.1-rc1 [created] 4f894dd69

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-10474] [SQL] Aggregation fails to allocate memory for pointer array (round 2)

2015-09-23 Thread andrewor14

Repository: spark
Updated Branches:
  refs/heads/master 084e4e126 -> 83f6f54d1


[SPARK-10474] [SQL] Aggregation fails to allocate memory for pointer array 
(round 2)

This patch reverts most of the changes in a previous fix #8827.

The real cause of the issue is that in `TungstenAggregate`'s prepare method we 
only reserve 1 page, but later when we switch to sort-based aggregation we try 
to acquire 1 page AND a pointer array. The longer-term fix should be to reserve 
also the pointer array, but for now ***we will simply not track the pointer 
array***. (Note that elsewhere we already don't track the pointer array, e.g. 
[here](https://github.com/apache/spark/blob/a18208047f06a4244703c17023bb20cbe1f59d73/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java#L88))

Note: This patch reuses the unit test added in #8827 so it doesn't show up in 
the diff.

Author: Andrew Or 

Closes # from andrewor14/dont-track-pointer-array.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/83f6f54d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/83f6f54d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/83f6f54d

Branch: refs/heads/master
Commit: 83f6f54d12a418f5158ee7ee985b54eef8cc1cf0
Parents: 084e4e1
Author: Andrew Or 
Authored: Wed Sep 23 19:34:31 2015 -0700
Committer: Andrew Or 
Committed: Wed Sep 23 19:34:31 2015 -0700

--
 .../unsafe/sort/UnsafeExternalSorter.java   | 51 +---
 .../sql/execution/UnsafeKVExternalSorter.java   |  9 +---
 .../UnsafeFixedWidthAggregationMapSuite.scala   |  8 +--
 3 files changed, 16 insertions(+), 52 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/83f6f54d/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
--
diff --git 
a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
 
b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
index 14b6aaf..0a311d2 100644
--- 
a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
+++ 
b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
@@ -159,16 +159,15 @@ public final class UnsafeExternalSorter {
   /**
* Allocates new sort data structures. Called when creating the sorter and 
after each spill.
*/
-  public void initializeForWriting() throws IOException {
+  private void initializeForWriting() throws IOException {
+// Note: Do not track memory for the pointer array for now because of 
SPARK-10474.
+// In more detail, in TungstenAggregate we only reserve a page, but when 
we fall back to
+// sort-based aggregation we try to acquire a page AND a pointer array, 
which inevitably
+// fails if all other memory is already occupied. It should be safe to not 
track the array
+// because its memory footprint is frequently much smaller than that of a 
page. This is a
+// temporary hack that we should address in 1.6.0.
+// TODO: track the pointer array memory!
 this.writeMetrics = new ShuffleWriteMetrics();
-final long pointerArrayMemory =
-  UnsafeInMemorySorter.getMemoryRequirementsForPointerArray(initialSize);
-final long memoryAcquired = 
shuffleMemoryManager.tryToAcquire(pointerArrayMemory);
-if (memoryAcquired != pointerArrayMemory) {
-  shuffleMemoryManager.release(memoryAcquired);
-  throw new IOException("Could not acquire " + pointerArrayMemory + " 
bytes of memory");
-}
-
 this.inMemSorter =
   new UnsafeInMemorySorter(taskMemoryManager, recordComparator, 
prefixComparator, initialSize);
 this.isInMemSorterExternal = false;
@@ -187,14 +186,6 @@ public final class UnsafeExternalSorter {
* Sort and spill the current records in response to memory pressure.
*/
   public void spill() throws IOException {
-spill(true);
-  }
-
-  /**
-   * Sort and spill the current records in response to memory pressure.
-   * @param shouldInitializeForWriting whether to allocate memory for writing 
after the spill
-   */
-  public void spill(boolean shouldInitializeForWriting) throws IOException {
 assert(inMemSorter != null);
 logger.info("Thread {} spilling sort data of {} to disk ({} {} so far)",
   Thread.currentThread().getId(),
@@ -225,9 +216,7 @@ public final class UnsafeExternalSorter {
 // written to disk. This also counts the space needed to store the 
sorter's pointer array.
 taskContext.taskMetrics().incMemoryBytesSpilled(spillSize);
 
-if (shouldInitializeForWriting) {
-  initializeForWriting();
-}
+

spark git commit: [SPARK-10474] [SQL] Aggregation fails to allocate memory for pointer array (round 2)

2015-09-23 Thread andrewor14

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 7564c2493 -> 1f47e68f5


[SPARK-10474] [SQL] Aggregation fails to allocate memory for pointer array 
(round 2)

This patch reverts most of the changes in a previous fix #8827.

The real cause of the issue is that in `TungstenAggregate`'s prepare method we 
only reserve 1 page, but later when we switch to sort-based aggregation we try 
to acquire 1 page AND a pointer array. The longer-term fix should be to reserve 
also the pointer array, but for now ***we will simply not track the pointer 
array***. (Note that elsewhere we already don't track the pointer array, e.g. 
[here](https://github.com/apache/spark/blob/a18208047f06a4244703c17023bb20cbe1f59d73/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java#L88))

Note: This patch reuses the unit test added in #8827 so it doesn't show up in 
the diff.

Author: Andrew Or 

Closes # from andrewor14/dont-track-pointer-array.

(cherry picked from commit 83f6f54d12a418f5158ee7ee985b54eef8cc1cf0)
Signed-off-by: Andrew Or 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1f47e68f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1f47e68f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1f47e68f

Branch: refs/heads/branch-1.5
Commit: 1f47e68f56398e4f39b3b00650aea6f71e832115
Parents: 7564c24
Author: Andrew Or 
Authored: Wed Sep 23 19:34:31 2015 -0700
Committer: Andrew Or 
Committed: Wed Sep 23 19:34:47 2015 -0700

--
 .../unsafe/sort/UnsafeExternalSorter.java   | 51 +---
 .../sql/execution/UnsafeKVExternalSorter.java   |  9 +---
 .../UnsafeFixedWidthAggregationMapSuite.scala   |  8 +--
 3 files changed, 16 insertions(+), 52 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1f47e68f/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
--
diff --git 
a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
 
b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
index 14b6aaf..0a311d2 100644
--- 
a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
+++ 
b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
@@ -159,16 +159,15 @@ public final class UnsafeExternalSorter {
   /**
* Allocates new sort data structures. Called when creating the sorter and 
after each spill.
*/
-  public void initializeForWriting() throws IOException {
+  private void initializeForWriting() throws IOException {
+// Note: Do not track memory for the pointer array for now because of 
SPARK-10474.
+// In more detail, in TungstenAggregate we only reserve a page, but when 
we fall back to
+// sort-based aggregation we try to acquire a page AND a pointer array, 
which inevitably
+// fails if all other memory is already occupied. It should be safe to not 
track the array
+// because its memory footprint is frequently much smaller than that of a 
page. This is a
+// temporary hack that we should address in 1.6.0.
+// TODO: track the pointer array memory!
 this.writeMetrics = new ShuffleWriteMetrics();
-final long pointerArrayMemory =
-  UnsafeInMemorySorter.getMemoryRequirementsForPointerArray(initialSize);
-final long memoryAcquired = 
shuffleMemoryManager.tryToAcquire(pointerArrayMemory);
-if (memoryAcquired != pointerArrayMemory) {
-  shuffleMemoryManager.release(memoryAcquired);
-  throw new IOException("Could not acquire " + pointerArrayMemory + " 
bytes of memory");
-}
-
 this.inMemSorter =
   new UnsafeInMemorySorter(taskMemoryManager, recordComparator, 
prefixComparator, initialSize);
 this.isInMemSorterExternal = false;
@@ -187,14 +186,6 @@ public final class UnsafeExternalSorter {
* Sort and spill the current records in response to memory pressure.
*/
   public void spill() throws IOException {
-spill(true);
-  }
-
-  /**
-   * Sort and spill the current records in response to memory pressure.
-   * @param shouldInitializeForWriting whether to allocate memory for writing 
after the spill
-   */
-  public void spill(boolean shouldInitializeForWriting) throws IOException {
 assert(inMemSorter != null);
 logger.info("Thread {} spilling sort data of {} to disk ({} {} so far)",
   Thread.currentThread().getId(),
@@ -225,9 +216,7 @@ public final class UnsafeExternalSorter {
 // written to disk. This also counts the space needed to store the 
sorter's pointer array.

spark git commit: Update branch-1.5 for 1.5.1 release.

2015-09-23 Thread rxin

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 1f47e68f5 -> 1000b5d7e


Update branch-1.5 for 1.5.1 release.

Author: Reynold Xin 

Closes #8890 from rxin/release-1.5.1.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1000b5d7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1000b5d7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1000b5d7

Branch: refs/heads/branch-1.5
Commit: 1000b5d7eed22935c07bc2970cc20d2a78241728
Parents: 1f47e68
Author: Reynold Xin 
Authored: Wed Sep 23 19:46:13 2015 -0700
Committer: Reynold Xin 
Committed: Wed Sep 23 19:46:13 2015 -0700

--
 CHANGES.txt | 468 +++
 R/pkg/DESCRIPTION   |   2 +-
 .../main/scala/org/apache/spark/package.scala   |   2 +-
 dev/create-release/generate-changelist.py   |   4 +-
 docs/_config.yml|   4 +-
 ec2/spark_ec2.py|   8 +-
 6 files changed, 479 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/1000b5d7/CHANGES.txt
--
diff --git a/CHANGES.txt b/CHANGES.txt
index 95f80d8..58c5764 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,6 +1,474 @@
 Spark Change Log
 
 
+Release 1.5.1
+
+  Bump R version
+  Reynold Xin 
+  2015-09-23 19:44:09 -0700
+  Commit: 1c60fc1
+
+  Update branch-1.5 for 1.5.1 release.
+  Reynold Xin 
+  2015-09-23 18:14:36 -0700
+  Commit: a32934c
+
+  [SPARK-10474] [SQL] Aggregation fails to allocate memory for pointer array 
(round 2)
+  Andrew Or 
+  2015-09-23 19:34:31 -0700
+  Commit: 1f47e68, github.com/apache/spark/pull/
+
+  [SPARK-10731] [SQL] Delegate to Scala's DataFrame.take implementation in 
Python DataFrame.
+  Reynold Xin 
+  2015-09-23 16:43:21 -0700
+  Commit: 7564c24, github.com/apache/spark/pull/8876
+
+  [SPARK-10403] Allow UnsafeRowSerializer to work with tungsten-sort 
ShuffleManager
+  Josh Rosen 
+  2015-09-23 11:31:01 -0700
+  Commit: 64cc62c, github.com/apache/spark/pull/8873
+
+  [SPARK-9710] [TEST] Fix RPackageUtilsSuite when R is not available.
+  Marcelo Vanzin 
+  2015-08-10 10:10:40 -0700
+  Commit: 6c6cadb, github.com/apache/spark/pull/8008
+
+  [SPARK-10769] [STREAMING] [TESTS] Fix 
o.a.s.streaming.CheckpointSuite.maintains rate controller
+  zsxwing 
+  2015-09-23 01:29:30 -0700
+  Commit: 4174b94, github.com/apache/spark/pull/8877
+
+  [SPARK-10224] [STREAMING] Fix the issue that blockIntervalTimer won't call 
updateCurrentBuffer when stopping
+  zsxwing 
+  2015-09-23 01:28:02 -0700
+  Commit: 6a616d0, github.com/apache/spark/pull/8417
+
+  [SPARK-10652] [SPARK-10742] [STREAMING] Set meaningful job descriptions for 
all streaming jobs
+  Tathagata Das 
+  2015-09-22 22:44:09 -0700
+  Commit: 8a23ef5, github.com/apache/spark/pull/8791
+
+  [SPARK-10663] Removed unnecessary invocation of DataFrame.toDF method.
+  Matt Hagen 
+  2015-09-22 21:14:25 -0700
+  Commit: 7f07cc6, github.com/apache/spark/pull/8875
+
+  [SPARK-10310] [SQL] Fixes script transformation field/line delimiters
+  Cheng Lian 
+  2015-09-22 19:41:57 -0700
+  Commit: 73d0621, github.com/apache/spark/pull/8860
+
+  [SPARK-10640] History server fails to parse TaskCommitDenied
+  Andrew Or 
+  2015-09-22 16:35:43 -0700
+  Commit: 26187ab, github.com/apache/spark/pull/8828
+
+  Revert "[SPARK-10640] History server fails to parse TaskCommitDenied"
+  Andrew Or 
+  2015-09-22 17:10:58 -0700
+  Commit: 118ebd4
+
+  [SPARK-10640] History server fails to parse TaskCommitDenied
+  Andrew Or 
+  2015-09-22 16:35:43 -0700
+  Commit: 5ffd084, github.com/apache/spark/pull/8828
+
+  [SPARK-10714] [SPARK-8632] [SPARK-10685] [SQL] Refactor Python UDF handling
+  Reynold Xin 
+  2015-09-22 14:11:46 -0700
+  Commit: 3339916, github.com/apache/spark/pull/8835
+
+  [SPARK-10737] [SQL] When using UnsafeRows, SortMergeJoin may return wrong 
results
+  Yin Huai 
+  2015-09-22 13:31:35 -0700
+  Commit: 6b1e5c2, github.com/apache/spark/pull/8854
+
+  [SPARK-10672] [SQL] Do not fail when we cannot save the metadata of a data 
source table in a hive compatible way
+  Yin Huai 
+  2015-09-22 13:29:39 -0700
+  Commit: d83dcc9, github.com/apache/spark/pull/8824
+
+  [SPARK-10740] [SQL] handle nondeterministic expressions correctly for set

[2/2] spark git commit: Preparing development version 1.4.3-SNAPSHOT

2015-09-23 Thread pwendell

Preparing development version 1.4.3-SNAPSHOT


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d4a74a28
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d4a74a28
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d4a74a28

Branch: refs/heads/branch-1.4
Commit: d4a74a28fee2e8c7e0b1cdff04c18dbe0dd7fdff
Parents: 0b22a3c
Author: Patrick Wendell 
Authored: Wed Sep 23 19:50:34 2015 -0700
Committer: Patrick Wendell 
Committed: Wed Sep 23 19:50:34 2015 -0700

--
 assembly/pom.xml  | 2 +-
 bagel/pom.xml | 2 +-
 core/pom.xml  | 2 +-
 examples/pom.xml  | 2 +-
 external/flume-sink/pom.xml   | 2 +-
 external/flume/pom.xml| 2 +-
 external/kafka-assembly/pom.xml   | 2 +-
 external/kafka/pom.xml| 2 +-
 external/mqtt/pom.xml | 2 +-
 external/twitter/pom.xml  | 2 +-
 external/zeromq/pom.xml   | 2 +-
 extras/java8-tests/pom.xml| 2 +-
 extras/kinesis-asl/pom.xml| 2 +-
 extras/spark-ganglia-lgpl/pom.xml | 2 +-
 graphx/pom.xml| 2 +-
 launcher/pom.xml  | 2 +-
 mllib/pom.xml | 2 +-
 network/common/pom.xml| 2 +-
 network/shuffle/pom.xml   | 2 +-
 network/yarn/pom.xml  | 2 +-
 pom.xml   | 2 +-
 repl/pom.xml  | 2 +-
 sql/catalyst/pom.xml  | 2 +-
 sql/core/pom.xml  | 2 +-
 sql/hive-thriftserver/pom.xml | 2 +-
 sql/hive/pom.xml  | 2 +-
 streaming/pom.xml | 2 +-
 tools/pom.xml | 2 +-
 unsafe/pom.xml| 2 +-
 yarn/pom.xml  | 2 +-
 30 files changed, 30 insertions(+), 30 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/d4a74a28/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 261a5f4..f1be8df 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.4.2
+1.4.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/d4a74a28/bagel/pom.xml
--
diff --git a/bagel/pom.xml b/bagel/pom.xml
index e4c0c71..45429e6 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.4.2
+1.4.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/d4a74a28/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index 74d5e3d..5f753ac 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.4.2
+1.4.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/d4a74a28/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index 86a0327..b080518 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.4.2
+1.4.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/d4a74a28/external/flume-sink/pom.xml
--
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index a7845f5..dd761ff 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.4.2
+1.4.3-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/d4a74a28/external/flume/pom.xml
--
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 8357103..a16e636 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.4.2
+1.4.3-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/d4a74a28/external/kafka-assembly/pom.xml
--
diff --git a/external/kafka-assembly/pom.xml b/external/kafka-assembly/pom.xml
index 3c97b20..64c650e 100644
--- a/external/kafka-assembly/pom.xml
+++ b/external/kafka-assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.4.2
+1.4.3-SNAPSHOT
 ../../pom.xml

[1/2] spark git commit: Preparing Spark release v1.4.2-rc1

2015-09-23 Thread pwendell

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 51d9eadbe -> d4a74a28f


Preparing Spark release v1.4.2-rc1


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0b22a3c7
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0b22a3c7
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0b22a3c7

Branch: refs/heads/branch-1.4
Commit: 0b22a3c7a3a40ff63a2e740ecab152141271b30d
Parents: 51d9ead
Author: Patrick Wendell 
Authored: Wed Sep 23 19:50:27 2015 -0700
Committer: Patrick Wendell 
Committed: Wed Sep 23 19:50:27 2015 -0700

--
 assembly/pom.xml  | 2 +-
 bagel/pom.xml | 2 +-
 core/pom.xml  | 2 +-
 examples/pom.xml  | 2 +-
 external/flume-sink/pom.xml   | 2 +-
 external/flume/pom.xml| 2 +-
 external/kafka-assembly/pom.xml   | 2 +-
 external/kafka/pom.xml| 2 +-
 external/mqtt/pom.xml | 2 +-
 external/twitter/pom.xml  | 2 +-
 external/zeromq/pom.xml   | 2 +-
 extras/java8-tests/pom.xml| 2 +-
 extras/kinesis-asl/pom.xml| 2 +-
 extras/spark-ganglia-lgpl/pom.xml | 2 +-
 graphx/pom.xml| 2 +-
 launcher/pom.xml  | 2 +-
 mllib/pom.xml | 2 +-
 network/common/pom.xml| 2 +-
 network/shuffle/pom.xml   | 2 +-
 network/yarn/pom.xml  | 2 +-
 pom.xml   | 2 +-
 repl/pom.xml  | 2 +-
 sql/catalyst/pom.xml  | 2 +-
 sql/core/pom.xml  | 2 +-
 sql/hive-thriftserver/pom.xml | 2 +-
 sql/hive/pom.xml  | 2 +-
 streaming/pom.xml | 2 +-
 tools/pom.xml | 2 +-
 unsafe/pom.xml| 2 +-
 yarn/pom.xml  | 2 +-
 30 files changed, 30 insertions(+), 30 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0b22a3c7/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 228db59..261a5f4 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.4.2-SNAPSHOT
+1.4.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/0b22a3c7/bagel/pom.xml
--
diff --git a/bagel/pom.xml b/bagel/pom.xml
index ce791a6..e4c0c71 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.4.2-SNAPSHOT
+1.4.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/0b22a3c7/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index 176ea9b..74d5e3d 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.4.2-SNAPSHOT
+1.4.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/0b22a3c7/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index 877c2fb..86a0327 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.4.2-SNAPSHOT
+1.4.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/0b22a3c7/external/flume-sink/pom.xml
--
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index ad431fa..a7845f5 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.4.2-SNAPSHOT
+1.4.2
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/0b22a3c7/external/flume/pom.xml
--
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 9789435..8357103 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.4.2-SNAPSHOT
+1.4.2
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/0b22a3c7/external/kafka-assembly/pom.xml
--
diff --git a/external/kafka-assembly/pom.xml b/external/kafka-assembly/pom.xml
index 18b1d86..3c97b20 100644
--- a/external/kafka-assembly/pom.xml
+++ b/external/kafka-assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.4.2-SNAPSHOT
+1.4.2
 ../../pom.xml

spark git commit: Bump R version to 1.4.2.

2015-09-23 Thread rxin

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 8887abb06 -> 51d9eadbe


Bump R version to 1.4.2.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/51d9eadb
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/51d9eadb
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/51d9eadb

Branch: refs/heads/branch-1.4
Commit: 51d9eadbe7740c586718a5215941920365c79c23
Parents: 8887abb
Author: Reynold Xin 
Authored: Wed Sep 23 19:48:28 2015 -0700
Committer: Reynold Xin 
Committed: Wed Sep 23 19:48:28 2015 -0700

--
 R/pkg/DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/51d9eadb/R/pkg/DESCRIPTION
--
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 7379f54..8ac6183 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: SparkR
 Type: Package
 Title: R frontend for Spark
-Version: 1.4.1
+Version: 1.4.2
 Date: 2013-09-09
 Author: The Apache Software Foundation
 Maintainer: Shivaram Venkataraman 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Git Push Summary

2015-09-23 Thread pwendell

Repository: spark
Updated Tags:  refs/tags/v1.4.2-rc1 [created] 0b22a3c7a

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-10692] [STREAMING] Expose failureReasons in BatchInfo for streaming UI to clear failed batches

2015-09-23 Thread rxin

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 1000b5d7e -> 4c48593bf


[SPARK-10692] [STREAMING] Expose failureReasons in BatchInfo for streaming UI 
to clear failed batches

Slightly modified version of #8818, all credit goes to zsxwing

Author: zsxwing 
Author: Tathagata Das 

Closes #8892 from tdas/SPARK-10692.

(cherry picked from commit 758c9d25e92417f8c06328c3af7ea2ef0212c79f)
Signed-off-by: Reynold Xin 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4c48593b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4c48593b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4c48593b

Branch: refs/heads/branch-1.5
Commit: 4c48593bf5d44218b42bc8be9573184dd95e6ff2
Parents: 1000b5d
Author: zsxwing 
Authored: Wed Sep 23 19:52:02 2015 -0700
Committer: Reynold Xin 
Committed: Wed Sep 23 19:52:10 2015 -0700

--
 .../spark/streaming/scheduler/BatchInfo.scala   | 10 +++
 .../streaming/scheduler/JobScheduler.scala  | 26 +++
 .../spark/streaming/scheduler/JobSet.scala  | 19 -
 .../streaming/StreamingListenerSuite.scala  | 76 
 4 files changed, 115 insertions(+), 16 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4c48593b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala
--
diff --git 
a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala 
b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala
index 9922b6b..3c86956 100644
--- 
a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala
+++ 
b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala
@@ -39,6 +39,8 @@ case class BatchInfo(
 processingEndTime: Option[Long]
   ) {
 
+  private var _failureReasons: Map[Int, String] = Map.empty
+
   @deprecated("Use streamIdToInputInfo instead", "1.5.0")
   def streamIdToNumRecords: Map[Int, Long] = 
streamIdToInputInfo.mapValues(_.numRecords)
 
@@ -67,4 +69,12 @@ case class BatchInfo(
* The number of recorders received by the receivers in this batch.
*/
   def numRecords: Long = streamIdToInputInfo.values.map(_.numRecords).sum
+
+  /** Set the failure reasons corresponding to every output ops in the batch */
+  private[streaming] def setFailureReason(reasons: Map[Int, String]): Unit = {
+_failureReasons = reasons
+  }
+
+  /** Failure reasons corresponding to every output ops in the batch */
+  private[streaming] def failureReasons = _failureReasons
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/4c48593b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
--
diff --git 
a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
 
b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
index fb51b0b..b5546db 100644
--- 
a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
+++ 
b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
@@ -166,22 +166,22 @@ class JobScheduler(val ssc: StreamingContext) extends 
Logging {
   }
 
   private def handleJobCompletion(job: Job) {
+val jobSet = jobSets.get(job.time)
+jobSet.handleJobCompletion(job)
+logInfo("Finished job " + job.id + " from job set of time " + jobSet.time)
+if (jobSet.hasCompleted) {
+  jobSets.remove(jobSet.time)
+  jobGenerator.onBatchCompletion(jobSet.time)
+  logInfo("Total delay: %.3f s for time %s (execution: %.3f s)".format(
+jobSet.totalDelay / 1000.0, jobSet.time.toString,
+jobSet.processingDelay / 1000.0
+  ))
+  listenerBus.post(StreamingListenerBatchCompleted(jobSet.toBatchInfo))
+}
 job.result match {
-  case Success(_) =>
-val jobSet = jobSets.get(job.time)
-jobSet.handleJobCompletion(job)
-logInfo("Finished job " + job.id + " from job set of time " + 
jobSet.time)
-if (jobSet.hasCompleted) {
-  jobSets.remove(jobSet.time)
-  jobGenerator.onBatchCompletion(jobSet.time)
-  logInfo("Total delay: %.3f s for time %s (execution: %.3f s)".format(
-jobSet.totalDelay / 1000.0, jobSet.time.toString,
-jobSet.processingDelay / 1000.0
-  ))
-  listenerBus.post(StreamingListenerBatchCompleted(jobSet.toBatchInfo))
-}
   case Failure(e) =>
 reportError("Error running job " + job, e)
+  case _ =>
 }
   }

spark git commit: [SPARK-10692] [STREAMING] Expose failureReasons in BatchInfo for streaming UI to clear failed batches

2015-09-23 Thread rxin

Repository: spark
Updated Branches:
  refs/heads/master 83f6f54d1 -> 758c9d25e


[SPARK-10692] [STREAMING] Expose failureReasons in BatchInfo for streaming UI 
to clear failed batches

Slightly modified version of #8818, all credit goes to zsxwing

Author: zsxwing 
Author: Tathagata Das 

Closes #8892 from tdas/SPARK-10692.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/758c9d25
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/758c9d25
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/758c9d25

Branch: refs/heads/master
Commit: 758c9d25e92417f8c06328c3af7ea2ef0212c79f
Parents: 83f6f54
Author: zsxwing 
Authored: Wed Sep 23 19:52:02 2015 -0700
Committer: Reynold Xin 
Committed: Wed Sep 23 19:52:02 2015 -0700

--
 .../spark/streaming/scheduler/BatchInfo.scala   | 10 +++
 .../streaming/scheduler/JobScheduler.scala  | 26 +++
 .../spark/streaming/scheduler/JobSet.scala  | 19 -
 .../streaming/StreamingListenerSuite.scala  | 76 
 4 files changed, 115 insertions(+), 16 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/758c9d25/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala
--
diff --git 
a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala 
b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala
index 9922b6b..3c86956 100644
--- 
a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala
+++ 
b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala
@@ -39,6 +39,8 @@ case class BatchInfo(
 processingEndTime: Option[Long]
   ) {
 
+  private var _failureReasons: Map[Int, String] = Map.empty
+
   @deprecated("Use streamIdToInputInfo instead", "1.5.0")
   def streamIdToNumRecords: Map[Int, Long] = 
streamIdToInputInfo.mapValues(_.numRecords)
 
@@ -67,4 +69,12 @@ case class BatchInfo(
* The number of recorders received by the receivers in this batch.
*/
   def numRecords: Long = streamIdToInputInfo.values.map(_.numRecords).sum
+
+  /** Set the failure reasons corresponding to every output ops in the batch */
+  private[streaming] def setFailureReason(reasons: Map[Int, String]): Unit = {
+_failureReasons = reasons
+  }
+
+  /** Failure reasons corresponding to every output ops in the batch */
+  private[streaming] def failureReasons = _failureReasons
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/758c9d25/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
--
diff --git 
a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
 
b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
index 32d995d..66afbf1 100644
--- 
a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
+++ 
b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala
@@ -166,22 +166,22 @@ class JobScheduler(val ssc: StreamingContext) extends 
Logging {
   }
 
   private def handleJobCompletion(job: Job) {
+val jobSet = jobSets.get(job.time)
+jobSet.handleJobCompletion(job)
+logInfo("Finished job " + job.id + " from job set of time " + jobSet.time)
+if (jobSet.hasCompleted) {
+  jobSets.remove(jobSet.time)
+  jobGenerator.onBatchCompletion(jobSet.time)
+  logInfo("Total delay: %.3f s for time %s (execution: %.3f s)".format(
+jobSet.totalDelay / 1000.0, jobSet.time.toString,
+jobSet.processingDelay / 1000.0
+  ))
+  listenerBus.post(StreamingListenerBatchCompleted(jobSet.toBatchInfo))
+}
 job.result match {
-  case Success(_) =>
-val jobSet = jobSets.get(job.time)
-jobSet.handleJobCompletion(job)
-logInfo("Finished job " + job.id + " from job set of time " + 
jobSet.time)
-if (jobSet.hasCompleted) {
-  jobSets.remove(jobSet.time)
-  jobGenerator.onBatchCompletion(jobSet.time)
-  logInfo("Total delay: %.3f s for time %s (execution: %.3f s)".format(
-jobSet.totalDelay / 1000.0, jobSet.time.toString,
-jobSet.processingDelay / 1000.0
-  ))
-  listenerBus.post(StreamingListenerBatchCompleted(jobSet.toBatchInfo))
-}
   case Failure(e) =>
 reportError("Error running job " + job, e)
+  case _ =>
 }
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/758c9d25/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobSet.scala

spark git commit: Update release notes.

2015-09-23 Thread rxin

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 4c48593bf -> c8a3d6630


Update release notes.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c8a3d663
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c8a3d663
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c8a3d663

Branch: refs/heads/branch-1.5
Commit: c8a3d66308e7b1fb1bddbec0e00f5d6336393951
Parents: 4c48593
Author: Reynold Xin 
Authored: Wed Sep 23 19:53:56 2015 -0700
Committer: Reynold Xin 
Committed: Wed Sep 23 19:53:56 2015 -0700

--
 CHANGES.txt | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c8a3d663/CHANGES.txt
--
diff --git a/CHANGES.txt b/CHANGES.txt
index 58c5764..449afa6 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -3,15 +3,15 @@ Spark Change Log
 
 Release 1.5.1
 
-  Bump R version
-  Reynold Xin 
-  2015-09-23 19:44:09 -0700
-  Commit: 1c60fc1
+  [SPARK-10692] [STREAMING] Expose failureReasons in BatchInfo for streaming 
UI to clear failed batches
+  zsxwing , Tathagata Das 
+  2015-09-23 19:52:02 -0700
+  Commit: 4c48593, github.com/apache/spark/pull/8892
 
   Update branch-1.5 for 1.5.1 release.
   Reynold Xin 
-  2015-09-23 18:14:36 -0700
-  Commit: a32934c
+  2015-09-23 19:46:13 -0700
+  Commit: 1000b5d, github.com/apache/spark/pull/8890
 
   [SPARK-10474] [SQL] Aggregation fails to allocate memory for pointer array 
(round 2)
   Andrew Or 


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[1/2] spark git commit: Preparing Spark release v1.5.1-rc1

2015-09-23 Thread pwendell

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 c8a3d6630 -> cdc4ac003


Preparing Spark release v1.5.1-rc1


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/20db8186
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/20db8186
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/20db8186

Branch: refs/heads/branch-1.5
Commit: 20db8186dcd40b8d986a78ce2c9d594ae9f2e476
Parents: c8a3d66
Author: Patrick Wendell 
Authored: Wed Sep 23 19:55:19 2015 -0700
Committer: Patrick Wendell 
Committed: Wed Sep 23 19:55:19 2015 -0700

--
 assembly/pom.xml| 2 +-
 bagel/pom.xml   | 2 +-
 core/pom.xml| 2 +-
 examples/pom.xml| 2 +-
 external/flume-assembly/pom.xml | 2 +-
 external/flume-sink/pom.xml | 2 +-
 external/flume/pom.xml  | 2 +-
 external/kafka-assembly/pom.xml | 2 +-
 external/kafka/pom.xml  | 2 +-
 external/mqtt-assembly/pom.xml  | 2 +-
 external/mqtt/pom.xml   | 2 +-
 external/twitter/pom.xml| 2 +-
 external/zeromq/pom.xml | 2 +-
 extras/java8-tests/pom.xml  | 2 +-
 extras/kinesis-asl-assembly/pom.xml | 2 +-
 extras/kinesis-asl/pom.xml  | 2 +-
 extras/spark-ganglia-lgpl/pom.xml   | 2 +-
 graphx/pom.xml  | 2 +-
 launcher/pom.xml| 2 +-
 mllib/pom.xml   | 2 +-
 network/common/pom.xml  | 2 +-
 network/shuffle/pom.xml | 2 +-
 network/yarn/pom.xml| 2 +-
 pom.xml | 2 +-
 repl/pom.xml| 2 +-
 sql/catalyst/pom.xml| 2 +-
 sql/core/pom.xml| 2 +-
 sql/hive-thriftserver/pom.xml   | 2 +-
 sql/hive/pom.xml| 2 +-
 streaming/pom.xml   | 2 +-
 tools/pom.xml   | 2 +-
 unsafe/pom.xml  | 2 +-
 yarn/pom.xml| 2 +-
 33 files changed, 33 insertions(+), 33 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/20db8186/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 7b41ebb..03d4973 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1-SNAPSHOT
+1.5.1
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/20db8186/bagel/pom.xml
--
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 16bf17c..6f058ff 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1-SNAPSHOT
+1.5.1
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/20db8186/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index beb547f..f32ce5d 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1-SNAPSHOT
+1.5.1
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/20db8186/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index 3926b79..f28847e 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1-SNAPSHOT
+1.5.1
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/20db8186/external/flume-assembly/pom.xml
--
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index 5eda12d..e7bd0d2 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1-SNAPSHOT
+1.5.1
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/20db8186/external/flume-sink/pom.xml
--
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index 33f2cd7..e5a5503 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1-SNAPSHOT
+1.5.1
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/20db8186/external/flume/pom.xml
--
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 670c783..b5e9423

[2/2] spark git commit: Preparing development version 1.5.2-SNAPSHOT

2015-09-23 Thread pwendell

Preparing development version 1.5.2-SNAPSHOT


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cdc4ac00
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cdc4ac00
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cdc4ac00

Branch: refs/heads/branch-1.5
Commit: cdc4ac0035c9786e0d90710f7c08cf37496da525
Parents: 20db818
Author: Patrick Wendell 
Authored: Wed Sep 23 19:55:27 2015 -0700
Committer: Patrick Wendell 
Committed: Wed Sep 23 19:55:27 2015 -0700

--
 assembly/pom.xml| 2 +-
 bagel/pom.xml   | 2 +-
 core/pom.xml| 2 +-
 examples/pom.xml| 2 +-
 external/flume-assembly/pom.xml | 2 +-
 external/flume-sink/pom.xml | 2 +-
 external/flume/pom.xml  | 2 +-
 external/kafka-assembly/pom.xml | 2 +-
 external/kafka/pom.xml  | 2 +-
 external/mqtt-assembly/pom.xml  | 2 +-
 external/mqtt/pom.xml   | 2 +-
 external/twitter/pom.xml| 2 +-
 external/zeromq/pom.xml | 2 +-
 extras/java8-tests/pom.xml  | 2 +-
 extras/kinesis-asl-assembly/pom.xml | 2 +-
 extras/kinesis-asl/pom.xml  | 2 +-
 extras/spark-ganglia-lgpl/pom.xml   | 2 +-
 graphx/pom.xml  | 2 +-
 launcher/pom.xml| 2 +-
 mllib/pom.xml   | 2 +-
 network/common/pom.xml  | 2 +-
 network/shuffle/pom.xml | 2 +-
 network/yarn/pom.xml| 2 +-
 pom.xml | 2 +-
 repl/pom.xml| 2 +-
 sql/catalyst/pom.xml| 2 +-
 sql/core/pom.xml| 2 +-
 sql/hive-thriftserver/pom.xml   | 2 +-
 sql/hive/pom.xml| 2 +-
 streaming/pom.xml   | 2 +-
 tools/pom.xml   | 2 +-
 unsafe/pom.xml  | 2 +-
 yarn/pom.xml| 2 +-
 33 files changed, 33 insertions(+), 33 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/cdc4ac00/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 03d4973..7671ba2 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1
+1.5.2-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/cdc4ac00/bagel/pom.xml
--
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 6f058ff..02e920d 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1
+1.5.2-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/cdc4ac00/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index f32ce5d..03d26df 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1
+1.5.2-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/cdc4ac00/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index f28847e..eb1910e 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1
+1.5.2-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/cdc4ac00/external/flume-assembly/pom.xml
--
diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml
index e7bd0d2..0de2f03 100644
--- a/external/flume-assembly/pom.xml
+++ b/external/flume-assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1
+1.5.2-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/cdc4ac00/external/flume-sink/pom.xml
--
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index e5a5503..66ab1b2 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.5.1
+1.5.2-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/cdc4ac00/external/flume/pom.xml
--
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index b5e9423..c058490 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7

Git Push Summary

2015-09-23 Thread pwendell

Repository: spark
Updated Tags:  refs/tags/v1.5.1-rc1 [created] 20db8186d

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-10699] [ML] Support checkpointInterval can be disabled

2015-09-23 Thread jkbradley

Repository: spark
Updated Branches:
  refs/heads/master ce2b056d3 -> 067afb4e9


[SPARK-10699] [ML] Support checkpointInterval can be disabled

Currently use can set ```checkpointInterval``` to specify how often should the 
cache be check-pointed. But we also need the function that users can disable 
it. This PR supports that users can disable checkpoint if user setting 
```checkpointInterval = -1```.
We also add documents for GBT ```cacheNodeIds``` to make users can understand 
more clearly about checkpoint.

Author: Yanbo Liang 

Closes #8820 from yanboliang/spark-10699.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/067afb4e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/067afb4e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/067afb4e

Branch: refs/heads/master
Commit: 067afb4e9bb227f159bcbc2aafafce9693303ea9
Parents: ce2b056
Author: Yanbo Liang 
Authored: Wed Sep 23 16:41:42 2015 -0700
Committer: Joseph K. Bradley 
Committed: Wed Sep 23 16:41:42 2015 -0700

--
 .../spark/ml/classification/DecisionTreeClassifier.scala   | 1 -
 .../org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala | 6 +++---
 .../scala/org/apache/spark/ml/param/shared/sharedParams.scala  | 4 ++--
 .../main/scala/org/apache/spark/ml/recommendation/ALS.scala| 2 +-
 .../main/scala/org/apache/spark/ml/tree/impl/NodeIdCache.scala | 2 +-
 mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala | 4 ++--
 6 files changed, 9 insertions(+), 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/067afb4e/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index a6f6d46..b0157f7 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -19,7 +19,6 @@ package org.apache.spark.ml.classification
 
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.ml.param.ParamMap
-import org.apache.spark.ml.param.shared.HasCheckpointInterval
 import org.apache.spark.ml.tree.{DecisionTreeModel, DecisionTreeParams, Node, 
TreeClassifierParams}
 import org.apache.spark.ml.tree.impl.RandomForest
 import org.apache.spark.ml.util.{Identifiable, MetadataUtils}

http://git-wip-us.apache.org/repos/asf/spark/blob/067afb4e/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
index 8049d51..8cb6b54 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
@@ -56,9 +56,9 @@ private[shared] object SharedParamsCodeGen {
   ParamDesc[String]("inputCol", "input column name"),
   ParamDesc[Array[String]]("inputCols", "input column names"),
   ParamDesc[String]("outputCol", "output column name", Some("uid + 
\"__output\"")),
-  ParamDesc[Int]("checkpointInterval", "checkpoint interval (>= 1). E.g. 
10 means that " +
-"the cache will get checkpointed every 10 iterations.",
-isValid = "ParamValidators.gtEq(1)"),
+  ParamDesc[Int]("checkpointInterval", "set checkpoint interval (>= 1) or 
" +
+"disable checkpoint (-1). E.g. 10 means that the cache will get 
checkpointed " +
+"every 10 iterations", isValid = "(interval: Int) => interval == -1 || 
interval >= 1"),
   ParamDesc[Boolean]("fitIntercept", "whether to fit an intercept term", 
Some("true")),
   ParamDesc[String]("handleInvalid", "how to handle invalid entries. 
Options are skip (which " +
 "will filter out rows with bad values), or error (which will throw an 
errror). More " +

http://git-wip-us.apache.org/repos/asf/spark/blob/067afb4e/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala 
b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
index aff47fc..e362521 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala
+++

spark git commit: [SPARK-10731] [SQL] Delegate to Scala's DataFrame.take implementation in Python DataFrame.

2015-09-23 Thread rxin

Repository: spark
Updated Branches:
  refs/heads/master 067afb4e9 -> 995221774


[SPARK-10731] [SQL] Delegate to Scala's DataFrame.take implementation in Python 
DataFrame.

Python DataFrame.head/take now requires scanning all the partitions. This pull 
request changes them to delegate the actual implementation to Scala DataFrame 
(by calling DataFrame.take).

This is more of a hack for fixing this issue in 1.5.1. A more proper fix is to 
change executeCollect and executeTake to return InternalRow rather than Row, 
and thus eliminate the extra round-trip conversion.

Author: Reynold Xin 

Closes #8876 from rxin/SPARK-10731.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/99522177
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/99522177
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/99522177

Branch: refs/heads/master
Commit: 9952217749118ae78fe794ca11e1c4a87a4ae8ba
Parents: 067afb4
Author: Reynold Xin 
Authored: Wed Sep 23 16:43:21 2015 -0700
Committer: Reynold Xin 
Committed: Wed Sep 23 16:43:21 2015 -0700

--
 .../org/apache/spark/api/python/PythonRDD.scala |   2 +-
 python/pyspark/sql/dataframe.py |   5 +-
 .../org/apache/spark/sql/execution/python.scala | 417 +++
 .../apache/spark/sql/execution/pythonUDFs.scala | 405 --
 .../apache/spark/sql/test/ExamplePointUDT.scala |  16 +-
 5 files changed, 429 insertions(+), 416 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/99522177/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
--
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala 
b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 19be093..8464b57 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -633,7 +633,7 @@ private[spark] object PythonRDD extends Logging {
*
* The thread will terminate after all the data are sent or any exceptions 
happen.
*/
-  private def serveIterator[T](items: Iterator[T], threadName: String): Int = {
+  def serveIterator[T](items: Iterator[T], threadName: String): Int = {
 val serverSocket = new ServerSocket(0, 1, 
InetAddress.getByName("localhost"))
 // Close the socket if no connection in 3 seconds
 serverSocket.setSoTimeout(3000)

http://git-wip-us.apache.org/repos/asf/spark/blob/99522177/python/pyspark/sql/dataframe.py
--
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 80f8d8a..b09422a 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -300,7 +300,10 @@ class DataFrame(object):
 >>> df.take(2)
 [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
 """
-return self.limit(num).collect()
+with SCCallSiteSync(self._sc) as css:
+port = 
self._sc._jvm.org.apache.spark.sql.execution.EvaluatePython.takeAndServe(
+self._jdf, num)
+return list(_load_from_socket(port, 
BatchedSerializer(PickleSerializer(
 
 @ignore_unicode_prefix
 @since(1.3)

http://git-wip-us.apache.org/repos/asf/spark/blob/99522177/sql/core/src/main/scala/org/apache/spark/sql/execution/python.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/python.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/python.scala
new file mode 100644
index 000..d6aaf42
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python.scala
@@ -0,0 +1,417 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.sql.execution
+
+import java.io.OutputStream
+import java.util.{List => JList, Map => JMap}
+
+import

spark git commit: [SPARK-10731] [SQL] Delegate to Scala's DataFrame.take implementation in Python DataFrame.

2015-09-23 Thread rxin

Repository: spark
Updated Branches:
  refs/heads/branch-1.5 64cc62cb5 -> 7564c2493


[SPARK-10731] [SQL] Delegate to Scala's DataFrame.take implementation in Python 
DataFrame.

Python DataFrame.head/take now requires scanning all the partitions. This pull 
request changes them to delegate the actual implementation to Scala DataFrame 
(by calling DataFrame.take).

This is more of a hack for fixing this issue in 1.5.1. A more proper fix is to 
change executeCollect and executeTake to return InternalRow rather than Row, 
and thus eliminate the extra round-trip conversion.

Author: Reynold Xin 

Closes #8876 from rxin/SPARK-10731.

(cherry picked from commit 9952217749118ae78fe794ca11e1c4a87a4ae8ba)
Signed-off-by: Reynold Xin 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7564c249
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7564c249
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7564c249

Branch: refs/heads/branch-1.5
Commit: 7564c249333da2b68c8f9c519ad84f81aec0002d
Parents: 64cc62c
Author: Reynold Xin 
Authored: Wed Sep 23 16:43:21 2015 -0700
Committer: Reynold Xin 
Committed: Wed Sep 23 16:43:34 2015 -0700

--
 .../org/apache/spark/api/python/PythonRDD.scala |   2 +-
 python/pyspark/sql/dataframe.py |   5 +-
 .../org/apache/spark/sql/execution/python.scala | 412 +++
 .../apache/spark/sql/execution/pythonUDFs.scala | 400 --
 .../apache/spark/sql/test/ExamplePointUDT.scala |  16 +-
 5 files changed, 424 insertions(+), 411 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7564c249/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
--
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala 
b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 8a48202..af86ef5 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -633,7 +633,7 @@ private[spark] object PythonRDD extends Logging {
*
* The thread will terminate after all the data are sent or any exceptions 
happen.
*/
-  private def serveIterator[T](items: Iterator[T], threadName: String): Int = {
+  def serveIterator[T](items: Iterator[T], threadName: String): Int = {
 val serverSocket = new ServerSocket(0, 1, 
InetAddress.getByName("localhost"))
 // Close the socket if no connection in 3 seconds
 serverSocket.setSoTimeout(3000)

http://git-wip-us.apache.org/repos/asf/spark/blob/7564c249/python/pyspark/sql/dataframe.py
--
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 025811f..ee2eed2 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -300,7 +300,10 @@ class DataFrame(object):
 >>> df.take(2)
 [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
 """
-return self.limit(num).collect()
+with SCCallSiteSync(self._sc) as css:
+port = 
self._sc._jvm.org.apache.spark.sql.execution.EvaluatePython.takeAndServe(
+self._jdf, num)
+return list(_load_from_socket(port, 
BatchedSerializer(PickleSerializer(
 
 @ignore_unicode_prefix
 @since(1.3)

http://git-wip-us.apache.org/repos/asf/spark/blob/7564c249/sql/core/src/main/scala/org/apache/spark/sql/execution/python.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/python.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/python.scala
new file mode 100644
index 000..c967074
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python.scala
@@ -0,0 +1,412 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package

spark git commit: Update branch-1.3 for 1.3.2 release.

2015-09-23 Thread rxin

Repository: spark
Updated Branches:
  refs/heads/branch-1.3 e54525f4a -> 392875ad3


Update branch-1.3 for 1.3.2 release.

Author: Reynold Xin 

Closes #8894 from rxin/branch-1.3.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/392875ad
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/392875ad
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/392875ad

Branch: refs/heads/branch-1.3
Commit: 392875ad3a3c3f5acd98edaf3b53045ac3e7
Parents: e54525f
Author: Reynold Xin 
Authored: Wed Sep 23 18:46:32 2015 -0700
Committer: Reynold Xin 
Committed: Wed Sep 23 18:46:32 2015 -0700

--
 CHANGES.txt | 643 +++
 .../main/scala/org/apache/spark/package.scala   |   2 +-
 dev/create-release/generate-changelist.py   |   4 +-
 docs/_config.yml|   4 +-
 ec2/spark_ec2.py|   1 +
 5 files changed, 649 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/392875ad/CHANGES.txt
--
diff --git a/CHANGES.txt b/CHANGES.txt
index 7da0244..97116be 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,6 +1,649 @@
 Spark Change Log
 
 
+Release 1.3.2
+
+  [SPARK-10381] Fix mixup of taskAttemptNumber & attemptId in 
OutputCommitCoordinator (branch-1.3 backport)
+  Josh Rosen 
+  2015-09-22 13:37:25 -0700
+  Commit: e54525f, github.com/apache/spark/pull/8790
+
+  [SPARK-10657] Remove SCP-based Jenkins log archiving
+  Josh Rosen 
+  2015-09-17 11:40:24 -0700
+  Commit: 64730a3, github.com/apache/spark/pull/8793
+
+  [SPARK-10642] [PYSPARK] Fix crash when calling rdd.lookup() on tuple keys
+  Liang-Chi Hsieh 
+  2015-09-17 10:02:15 -0700
+  Commit: 7494034, github.com/apache/spark/pull/8796
+
+  [SPARK-10556] Remove explicit Scala version for sbt project build files
+  Ahir Reddy 
+  2015-09-11 13:06:14 +0100
+  Commit: 8c8d7ab, github.com/apache/spark/pull/8709
+
+  [SPARK-6931] [PYSPARK] Cast Python time float values to int before 
serialization
+  Bryan Cutler 
+  2015-09-10 11:20:02 -0700
+  Commit: d0d7ada, github.com/apache/spark/pull/8594
+
+  [MINOR] [MLLIB] [ML] [DOC] fixed typo: label for negative result should be 
0.0 (original: 1.0)
+  Sean Paradiso 
+  2015-09-09 22:09:33 -0700
+  Commit: 9fcd831, github.com/apache/spark/pull/8680
+
+  [SPARK-10353] [MLLIB] (1.3 backport) BLAS gemm not scaling when beta = 0.0 
for some subset of matrix multiplications
+  Sean Owen 
+  2015-09-02 13:33:24 -0700
+  Commit: 29836e2, github.com/apache/spark/pull/8572
+
+  [SPARK-100354] [MLLIB] fix some apparent memory issues in k-means|| 
initializaiton
+  Xiangrui Meng 
+  2015-08-30 23:20:03 -0700
+  Commit: a58c1af, github.com/apache/spark/pull/8526
+
+  [SPARK-8400] [ML] Added check in ml.ALS for positive block size parameter 
setting
+  Bryan Cutler 
+  2015-08-25 12:36:49 +0100
+  Commit: e8b0564, github.com/apache/spark/pull/8363
+
+  [SPARK-10169] [SQL] [BRANCH-1.3] Partial aggregation's plan is wrong when a 
grouping expression is used as an argument of the aggregate fucntion
+  Wenchen Fan , Yin Huai 
+  2015-08-24 13:00:49 -0700
+  Commit: 3d2eaf0, github.com/apache/spark/pull/8380
+
+  [SPARK-9801] [STREAMING] Check if file exists before deleting temporary 
files.
+  Hao Zhu 
+  2015-08-10 17:17:22 -0700
+  Commit: a98603f, github.com/apache/spark/pull/8082
+
+  [SPARK-9633] [BUILD] SBT download locations outdated; need an update
+  Sean Owen 
+  2015-08-06 23:43:52 +0100
+  Commit: b104501, github.com/apache/spark/pull/7956
+
+  [SPARK-9607] [SPARK-9608] fix zinc-port handling in build/mvn
+  Ryan Williams 
+  2015-08-05 11:10:47 +0100
+  Commit: 384793d, github.com/apache/spark/pull/7944
+
+  [SPARK-3190] [GRAPHX] Fix VertexRDD.count() overflow regression
+  Ankur Dave 
+  2015-08-03 23:07:32 -0700
+  Commit: cd5d1be, github.com/apache/spark/pull/7923
+
+  [SPARK-7563] (backport for 1.3) OutputCommitCoordinator.stop() should only 
run on the driver
+  Sean Owen 
+  2015-08-03 13:59:00 +0100
+  Commit: 265ec35, github.com/apache/spark/pull/7865
+
+  [SPARK-9254] [BUILD] [HOTFIX] sbt-launch-lib.bash should support HTTP/HTTPS 
redirection
+  Cheng Lian 
+  2015-07-22 09:32:42 -0700
+  Commit: cc5f711, github.com/apache/spark/pull/7597
+
+  [SPARK-9507] [BUILD]

spark git commit: Update branch-1.4 for 1.4.2 release.

2015-09-23 Thread rxin

Repository: spark
Updated Branches:
  refs/heads/branch-1.4 df9e39470 -> 8887abb06


Update branch-1.4 for 1.4.2 release.

Author: Reynold Xin 

Closes #8891 from rxin/release-1.4.2.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8887abb0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8887abb0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8887abb0

Branch: refs/heads/branch-1.4
Commit: 8887abb06a01c8a264998457c5dfc8cf713675dd
Parents: df9e394
Author: Reynold Xin 
Authored: Wed Sep 23 18:53:55 2015 -0700
Committer: Reynold Xin 
Committed: Wed Sep 23 18:53:55 2015 -0700

--
 core/src/main/scala/org/apache/spark/package.scala | 2 +-
 dev/create-release/generate-changelist.py  | 4 ++--
 docs/_config.yml   | 4 ++--
 ec2/spark_ec2.py   | 2 ++
 4 files changed, 7 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8887abb0/core/src/main/scala/org/apache/spark/package.scala
--
diff --git a/core/src/main/scala/org/apache/spark/package.scala 
b/core/src/main/scala/org/apache/spark/package.scala
index 8f01174..c8d8e7c 100644
--- a/core/src/main/scala/org/apache/spark/package.scala
+++ b/core/src/main/scala/org/apache/spark/package.scala
@@ -43,5 +43,5 @@ package org.apache
 
 package object spark {
   // For package docs only
-  val SPARK_VERSION = "1.4.1"
+  val SPARK_VERSION = "1.4.2"
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/8887abb0/dev/create-release/generate-changelist.py
--
diff --git a/dev/create-release/generate-changelist.py 
b/dev/create-release/generate-changelist.py
index 148ed72..64b27cb 100755
--- a/dev/create-release/generate-changelist.py
+++ b/dev/create-release/generate-changelist.py
@@ -31,8 +31,8 @@ import time
 import traceback
 
 SPARK_HOME = os.environ["SPARK_HOME"]
-NEW_RELEASE_VERSION = "1.4.1"
-PREV_RELEASE_GIT_TAG = "v1.4.0"
+NEW_RELEASE_VERSION = "1.4.2"
+PREV_RELEASE_GIT_TAG = "v1.4.1"
 
 CHANGELIST = "CHANGES.txt"
 OLD_CHANGELIST = "%s.old" % (CHANGELIST)

http://git-wip-us.apache.org/repos/asf/spark/blob/8887abb0/docs/_config.yml
--
diff --git a/docs/_config.yml b/docs/_config.yml
index 83f56e8..d1e0d0a 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -14,8 +14,8 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 1.4.1
-SPARK_VERSION_SHORT: 1.4.1
+SPARK_VERSION: 1.4.2
+SPARK_VERSION_SHORT: 1.4.2
 SCALA_BINARY_VERSION: "2.10"
 SCALA_VERSION: "2.10.4"
 MESOS_VERSION: 0.21.0

http://git-wip-us.apache.org/repos/asf/spark/blob/8887abb0/ec2/spark_ec2.py
--
diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index 3880c2d..b186287 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -72,6 +72,7 @@ VALID_SPARK_VERSIONS = set([
 "1.3.1",
 "1.4.0",
 "1.4.1",
+"1.4.2"
 ])
 
 SPARK_TACHYON_MAP = {
@@ -86,6 +87,7 @@ SPARK_TACHYON_MAP = {
 "1.3.1": "0.5.0",
 "1.4.0": "0.6.4",
 "1.4.1": "0.6.4",
+"1.4.2": "0.6.4"
 }
 
 DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[2/2] spark git commit: [SPARK-6028][Core]A new RPC implemetation based on the network module

2015-09-23 Thread rxin

[SPARK-6028][Core]A new RPC implemetation based on the network module

Design doc: 
https://docs.google.com/document/d/1CF5G6rGVQMKSyV_QKo4D2M-x6rxz5x1Ew7aK3Uq6u8c/edit?usp=sharing

Author: zsxwing 

Closes #6457 from zsxwing/new-rpc.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/084e4e12
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/084e4e12
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/084e4e12

Branch: refs/heads/master
Commit: 084e4e126211d74a79e8dbd2d0e604dd3c650822
Parents: 9952217
Author: zsxwing 
Authored: Wed Sep 23 18:59:49 2015 -0700
Committer: Reynold Xin 
Committed: Wed Sep 23 18:59:49 2015 -0700

--
 .../org/apache/spark/MapOutputTracker.scala |   2 +-
 .../main/scala/org/apache/spark/SparkEnv.scala  |  20 +-
 .../org/apache/spark/deploy/worker/Worker.scala |   2 +-
 .../spark/deploy/worker/WorkerWatcher.scala |  13 +-
 .../org/apache/spark/rpc/RpcCallContext.scala   |   2 +-
 .../org/apache/spark/rpc/RpcEndpoint.scala  |  51 +-
 .../rpc/RpcEndpointNotFoundException.scala  |  22 +
 .../scala/org/apache/spark/rpc/RpcEnv.scala |   7 +-
 .../org/apache/spark/rpc/akka/AkkaRpcEnv.scala  |   6 +-
 .../org/apache/spark/rpc/netty/Dispatcher.scala | 218 
 .../org/apache/spark/rpc/netty/IDVerifier.scala |  39 ++
 .../org/apache/spark/rpc/netty/Inbox.scala  | 220 
 .../spark/rpc/netty/NettyRpcAddress.scala   |  56 +++
 .../spark/rpc/netty/NettyRpcCallContext.scala   |  87 
 .../apache/spark/rpc/netty/NettyRpcEnv.scala| 504 +++
 .../storage/BlockManagerSlaveEndpoint.scala |   6 +-
 .../org/apache/spark/util/ThreadUtils.scala |   6 +-
 .../apache/spark/MapOutputTrackerSuite.scala|  10 +-
 .../org/apache/spark/SSLSampleConfigs.scala |   2 +
 .../deploy/worker/WorkerWatcherSuite.scala  |   6 +-
 .../org/apache/spark/rpc/RpcEnvSuite.scala  |  78 ++-
 .../org/apache/spark/rpc/TestRpcEndpoint.scala  | 123 +
 .../org/apache/spark/rpc/netty/InboxSuite.scala | 148 ++
 .../spark/rpc/netty/NettyRpcAddressSuite.scala  |  29 ++
 .../spark/rpc/netty/NettyRpcEnvSuite.scala  |  38 ++
 .../spark/rpc/netty/NettyRpcHandlerSuite.scala  |  67 +++
 .../spark/network/client/TransportClient.java   |   4 +
 .../apache/spark/network/server/RpcHandler.java |   2 +
 .../network/server/TransportRequestHandler.java |   1 +
 .../streaming/scheduler/ReceiverTracker.scala   |   2 +-
 .../spark/deploy/yarn/ApplicationMaster.scala   |   5 +-
 31 files changed, 1708 insertions(+), 68 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/084e4e12/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
--
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala 
b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index 94eb8da..e380c5b 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -45,7 +45,7 @@ private[spark] class MapOutputTrackerMasterEndpoint(
 
   override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, 
Unit] = {
 case GetMapOutputStatuses(shuffleId: Int) =>
-  val hostPort = context.sender.address.hostPort
+  val hostPort = context.senderAddress.hostPort
   logInfo("Asked to send map output locations for shuffle " + shuffleId + 
" to " + hostPort)
   val mapOutputStatuses = tracker.getSerializedMapOutputStatuses(shuffleId)
   val serializedSize = mapOutputStatuses.size

http://git-wip-us.apache.org/repos/asf/spark/blob/084e4e12/core/src/main/scala/org/apache/spark/SparkEnv.scala
--
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala 
b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index c6fef7f..cfde27f 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -20,11 +20,10 @@ package org.apache.spark
 import java.io.File
 import java.net.Socket
 
-import akka.actor.ActorSystem
-
 import scala.collection.mutable
 import scala.util.Properties
 
+import akka.actor.ActorSystem
 import com.google.common.collect.MapMaker
 
 import org.apache.spark.annotation.DeveloperApi
@@ -41,7 +40,7 @@ import org.apache.spark.serializer.Serializer
 import org.apache.spark.shuffle.{ShuffleMemoryManager, ShuffleManager}
 import org.apache.spark.storage._
 import org.apache.spark.unsafe.memory.{ExecutorMemoryManager, MemoryAllocator}
-import org.apache.spark.util.{RpcUtils, Utils}
+import org.apache.spark.util.{AkkaUtils, RpcUtils, Utils}
 
 /**
  * :: DeveloperApi ::
@@

[2/2] spark git commit: Preparing development version 1.3.3-SNAPSHOT

2015-09-23 Thread pwendell

Preparing development version 1.3.3-SNAPSHOT


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9f4b926d
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9f4b926d
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9f4b926d

Branch: refs/heads/branch-1.3
Commit: 9f4b926d4748203ba58a55568e7e397e8e431651
Parents: 5a13975
Author: Patrick Wendell 
Authored: Wed Sep 23 18:59:20 2015 -0700
Committer: Patrick Wendell 
Committed: Wed Sep 23 18:59:20 2015 -0700

--
 assembly/pom.xml  | 2 +-
 bagel/pom.xml | 2 +-
 core/pom.xml  | 2 +-
 examples/pom.xml  | 2 +-
 external/flume-sink/pom.xml   | 2 +-
 external/flume/pom.xml| 2 +-
 external/kafka-assembly/pom.xml   | 2 +-
 external/kafka/pom.xml| 2 +-
 external/mqtt/pom.xml | 2 +-
 external/twitter/pom.xml  | 2 +-
 external/zeromq/pom.xml   | 2 +-
 extras/java8-tests/pom.xml| 2 +-
 extras/kinesis-asl/pom.xml| 2 +-
 extras/spark-ganglia-lgpl/pom.xml | 2 +-
 graphx/pom.xml| 2 +-
 mllib/pom.xml | 2 +-
 network/common/pom.xml| 2 +-
 network/shuffle/pom.xml   | 2 +-
 network/yarn/pom.xml  | 2 +-
 pom.xml   | 2 +-
 repl/pom.xml  | 2 +-
 sql/catalyst/pom.xml  | 2 +-
 sql/core/pom.xml  | 2 +-
 sql/hive-thriftserver/pom.xml | 2 +-
 sql/hive/pom.xml  | 2 +-
 streaming/pom.xml | 2 +-
 tools/pom.xml | 2 +-
 yarn/pom.xml  | 2 +-
 28 files changed, 28 insertions(+), 28 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9f4b926d/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 1703d8a..5f03f2f 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.3.2
+1.3.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/9f4b926d/bagel/pom.xml
--
diff --git a/bagel/pom.xml b/bagel/pom.xml
index 29e2fc1..5a2066e 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.3.2
+1.3.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/9f4b926d/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index 491e98c..206f2d5 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.3.2
+1.3.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/9f4b926d/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index 669b19c..2da1ce5 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.3.2
+1.3.3-SNAPSHOT
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/9f4b926d/external/flume-sink/pom.xml
--
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index dd4c1d7..0623b5b 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.3.2
+1.3.3-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/9f4b926d/external/flume/pom.xml
--
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index e4c31c1..b9ede7b 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.3.2
+1.3.3-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/9f4b926d/external/kafka-assembly/pom.xml
--
diff --git a/external/kafka-assembly/pom.xml b/external/kafka-assembly/pom.xml
index 4716a78..2ca540a 100644
--- a/external/kafka-assembly/pom.xml
+++ b/external/kafka-assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.3.2
+1.3.3-SNAPSHOT
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/9f4b926d/external/kafka/pom.xml
--
diff

[1/2] spark git commit: [SPARK-6028][Core]A new RPC implemetation based on the network module

2015-09-23 Thread rxin

Repository: spark
Updated Branches:
  refs/heads/master 995221774 -> 084e4e126


http://git-wip-us.apache.org/repos/asf/spark/blob/084e4e12/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcHandlerSuite.scala
--
diff --git 
a/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcHandlerSuite.scala 
b/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcHandlerSuite.scala
new file mode 100644
index 000..06ca035
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcHandlerSuite.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.rpc.netty
+
+import java.net.InetSocketAddress
+
+import io.netty.channel.Channel
+import org.mockito.Mockito._
+import org.mockito.Matchers._
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.network.client.{TransportResponseHandler, 
TransportClient}
+import org.apache.spark.rpc._
+
+class NettyRpcHandlerSuite extends SparkFunSuite {
+
+  val env = mock(classOf[NettyRpcEnv])
+  when(env.deserialize(any(classOf[Array[Byte]]))(any())).
+thenReturn(RequestMessage(RpcAddress("localhost", 12345), null, null, 
false))
+
+  test("receive") {
+val dispatcher = mock(classOf[Dispatcher])
+val nettyRpcHandler = new NettyRpcHandler(dispatcher, env)
+
+val channel = mock(classOf[Channel])
+val client = new TransportClient(channel, 
mock(classOf[TransportResponseHandler]))
+when(channel.remoteAddress()).thenReturn(new 
InetSocketAddress("localhost", 4))
+nettyRpcHandler.receive(client, null, null)
+
+when(channel.remoteAddress()).thenReturn(new 
InetSocketAddress("localhost", 40001))
+nettyRpcHandler.receive(client, null, null)
+
+verify(dispatcher, 
times(1)).broadcastMessage(Associated(RpcAddress("localhost", 12345)))
+  }
+
+  test("connectionTerminated") {
+val dispatcher = mock(classOf[Dispatcher])
+val nettyRpcHandler = new NettyRpcHandler(dispatcher, env)
+
+val channel = mock(classOf[Channel])
+val client = new TransportClient(channel, 
mock(classOf[TransportResponseHandler]))
+when(channel.remoteAddress()).thenReturn(new 
InetSocketAddress("localhost", 4))
+nettyRpcHandler.receive(client, null, null)
+
+when(channel.remoteAddress()).thenReturn(new 
InetSocketAddress("localhost", 4))
+nettyRpcHandler.connectionTerminated(client)
+
+verify(dispatcher, 
times(1)).broadcastMessage(Associated(RpcAddress("localhost", 12345)))
+verify(dispatcher, 
times(1)).broadcastMessage(Disassociated(RpcAddress("localhost", 12345)))
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/spark/blob/084e4e12/network/common/src/main/java/org/apache/spark/network/client/TransportClient.java
--
diff --git 
a/network/common/src/main/java/org/apache/spark/network/client/TransportClient.java
 
b/network/common/src/main/java/org/apache/spark/network/client/TransportClient.java
index df84128..fbb8bb6 100644
--- 
a/network/common/src/main/java/org/apache/spark/network/client/TransportClient.java
+++ 
b/network/common/src/main/java/org/apache/spark/network/client/TransportClient.java
@@ -78,6 +78,10 @@ public class TransportClient implements Closeable {
 this.handler = Preconditions.checkNotNull(handler);
   }
 
+  public Channel getChannel() {
+return channel;
+  }
+
   public boolean isActive() {
 return channel.isOpen() || channel.isActive();
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/084e4e12/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java
--
diff --git 
a/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java 
b/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java
index 2ba92a4..dbb7f95 100644
--- 
a/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java
+++ 
b/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java
@@ -52,4 +52,6 @@ public abstract class RpcHandler {
* No further requests will come from this client.
*/
   public

[1/2] spark git commit: Preparing Spark release v1.3.2-rc1

2015-09-23 Thread pwendell

Repository: spark
Updated Branches:
  refs/heads/branch-1.3 392875ad3 -> 9f4b926d4


Preparing Spark release v1.3.2-rc1


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5a139750
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5a139750
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5a139750

Branch: refs/heads/branch-1.3
Commit: 5a139750bea6cf4b56c432cbcb02584094997695
Parents: 392875a
Author: Patrick Wendell 
Authored: Wed Sep 23 18:59:15 2015 -0700
Committer: Patrick Wendell 
Committed: Wed Sep 23 18:59:15 2015 -0700

--
 assembly/pom.xml  | 2 +-
 bagel/pom.xml | 2 +-
 core/pom.xml  | 2 +-
 examples/pom.xml  | 2 +-
 external/flume-sink/pom.xml   | 2 +-
 external/flume/pom.xml| 2 +-
 external/kafka-assembly/pom.xml   | 2 +-
 external/kafka/pom.xml| 2 +-
 external/mqtt/pom.xml | 2 +-
 external/twitter/pom.xml  | 2 +-
 external/zeromq/pom.xml   | 2 +-
 extras/java8-tests/pom.xml| 2 +-
 extras/kinesis-asl/pom.xml| 2 +-
 extras/spark-ganglia-lgpl/pom.xml | 2 +-
 graphx/pom.xml| 2 +-
 mllib/pom.xml | 2 +-
 network/common/pom.xml| 2 +-
 network/shuffle/pom.xml   | 2 +-
 network/yarn/pom.xml  | 2 +-
 pom.xml   | 2 +-
 repl/pom.xml  | 2 +-
 sql/catalyst/pom.xml  | 2 +-
 sql/core/pom.xml  | 2 +-
 sql/hive-thriftserver/pom.xml | 2 +-
 sql/hive/pom.xml  | 2 +-
 streaming/pom.xml | 2 +-
 tools/pom.xml | 2 +-
 yarn/pom.xml  | 2 +-
 28 files changed, 28 insertions(+), 28 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5a139750/assembly/pom.xml
--
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 0952cd2..1703d8a 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.3.2-SNAPSHOT
+1.3.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/5a139750/bagel/pom.xml
--
diff --git a/bagel/pom.xml b/bagel/pom.xml
index ea3a71a..29e2fc1 100644
--- a/bagel/pom.xml
+++ b/bagel/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.3.2-SNAPSHOT
+1.3.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/5a139750/core/pom.xml
--
diff --git a/core/pom.xml b/core/pom.xml
index 7d67942..491e98c 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.3.2-SNAPSHOT
+1.3.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/5a139750/examples/pom.xml
--
diff --git a/examples/pom.xml b/examples/pom.xml
index e1a3ecc..669b19c 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.3.2-SNAPSHOT
+1.3.2
 ../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/5a139750/external/flume-sink/pom.xml
--
diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml
index f46a2a0..dd4c1d7 100644
--- a/external/flume-sink/pom.xml
+++ b/external/flume-sink/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.3.2-SNAPSHOT
+1.3.2
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/5a139750/external/flume/pom.xml
--
diff --git a/external/flume/pom.xml b/external/flume/pom.xml
index 56e85e3..e4c31c1 100644
--- a/external/flume/pom.xml
+++ b/external/flume/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.3.2-SNAPSHOT
+1.3.2
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/5a139750/external/kafka-assembly/pom.xml
--
diff --git a/external/kafka-assembly/pom.xml b/external/kafka-assembly/pom.xml
index 23ca5d2..4716a78 100644
--- a/external/kafka-assembly/pom.xml
+++ b/external/kafka-assembly/pom.xml
@@ -21,7 +21,7 @@
   
 org.apache.spark
 spark-parent_2.10
-1.3.2-SNAPSHOT
+1.3.2
 ../../pom.xml
   
 

http://git-wip-us.apache.org/repos/asf/spark/blob/5a139750/external/kafka/pom.xml

Git Push Summary

2015-09-23 Thread pwendell

Repository: spark
Updated Tags:  refs/tags/v1.3.2-rc1 [created] 5a139750b

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Git Push Summary

2015-09-23 Thread rxin

Repository: spark
Updated Tags:  refs/tags/v1.5.1-rc1 [deleted] 4f894dd69

-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

42 matches

Mail list logo