spark git commit: [SPARK-9710] [TEST] Fix RPackageUtilsSuite when R is not available.
Repository: spark Updated Branches: refs/heads/branch-1.5 4174b94f0 -> 6c6cadb8f [SPARK-9710] [TEST] Fix RPackageUtilsSuite when R is not available. RUtils.isRInstalled throws an exception if R is not installed, instead of returning false. Fix that. Author: Marcelo VanzinCloses #8008 from vanzin/SPARK-9710 and squashes the following commits: df72d8c [Marcelo Vanzin] [SPARK-9710] [test] Fix RPackageUtilsSuite when R is not available. (cherry picked from commit 0f3366a4c740147a7a7519922642912e2dd238f8) Signed-off-by: Shivaram Venkataraman Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6c6cadb8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6c6cadb8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6c6cadb8 Branch: refs/heads/branch-1.5 Commit: 6c6cadb8febbf65e8540ffa832a317a37a4a8168 Parents: 4174b94 Author: Marcelo Vanzin Authored: Mon Aug 10 10:10:40 2015 -0700 Committer: Shivaram Venkataraman Committed: Wed Sep 23 07:38:31 2015 -0700 -- core/src/main/scala/org/apache/spark/api/r/RUtils.scala | 8 ++-- 1 file changed, 6 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6c6cadb8/core/src/main/scala/org/apache/spark/api/r/RUtils.scala -- diff --git a/core/src/main/scala/org/apache/spark/api/r/RUtils.scala b/core/src/main/scala/org/apache/spark/api/r/RUtils.scala index daad5b5..646fd0b 100644 --- a/core/src/main/scala/org/apache/spark/api/r/RUtils.scala +++ b/core/src/main/scala/org/apache/spark/api/r/RUtils.scala @@ -67,7 +67,11 @@ private[spark] object RUtils { /** Check if R is installed before running tests that use R commands. */ def isRInstalled: Boolean = { -val builder = new ProcessBuilder(Seq("R", "--version")) -builder.start().waitFor() == 0 +try { + val builder = new ProcessBuilder(Seq("R", "--version")) + builder.start().waitFor() == 0 +} catch { + case e: Exception => false +} } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.5.1-rc1 [created] 4df97937d - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-10763] [ML] [JAVA] [TEST] Update Java MLLIB/ML tests to use simplified dataframe construction
Repository: spark Updated Branches: refs/heads/master 758c9d25e -> d91967e15 [SPARK-10763] [ML] [JAVA] [TEST] Update Java MLLIB/ML tests to use simplified dataframe construction As introduced in https://issues.apache.org/jira/browse/SPARK-10630 we now have an easier way to create dataframes from local Java lists. Lets update the tests to use those. Author: Holden KarauCloses #8886 from holdenk/SPARK-10763-update-java-mllib-ml-tests-to-use-simplified-dataframe-construction. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d91967e1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d91967e1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d91967e1 Branch: refs/heads/master Commit: d91967e159f416924bbd7f0db25156588d4bd7b1 Parents: 758c9d2 Author: Holden Karau Authored: Wed Sep 23 22:49:08 2015 -0700 Committer: Xiangrui Meng Committed: Wed Sep 23 22:49:08 2015 -0700 -- .../spark/ml/classification/JavaNaiveBayesSuite.java | 8 .../apache/spark/ml/feature/JavaBucketizerSuite.java | 14 +++--- .../org/apache/spark/ml/feature/JavaDCTSuite.java | 11 +-- .../apache/spark/ml/feature/JavaHashingTFSuite.java | 7 --- .../ml/feature/JavaPolynomialExpansionSuite.java | 5 +++-- .../spark/ml/feature/JavaStopWordsRemoverSuite.java | 7 --- .../spark/ml/feature/JavaStringIndexerSuite.java | 7 --- .../spark/ml/feature/JavaVectorAssemblerSuite.java| 3 +-- .../spark/ml/feature/JavaVectorSlicerSuite.java | 7 --- .../apache/spark/ml/feature/JavaWord2VecSuite.java| 12 ++-- 10 files changed, 42 insertions(+), 39 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d91967e1/mllib/src/test/java/org/apache/spark/ml/classification/JavaNaiveBayesSuite.java -- diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaNaiveBayesSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaNaiveBayesSuite.java index 075a62c..f5f690e 100644 --- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaNaiveBayesSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaNaiveBayesSuite.java @@ -19,6 +19,7 @@ package org.apache.spark.ml.classification; import java.io.Serializable; import java.util.Arrays; +import java.util.List; import org.junit.After; import org.junit.Before; @@ -75,21 +76,20 @@ public class JavaNaiveBayesSuite implements Serializable { @Test public void testNaiveBayes() { -JavaRDD jrdd = jsc.parallelize(Arrays.asList( +List data = Arrays.asList( RowFactory.create(0.0, Vectors.dense(1.0, 0.0, 0.0)), RowFactory.create(0.0, Vectors.dense(2.0, 0.0, 0.0)), RowFactory.create(1.0, Vectors.dense(0.0, 1.0, 0.0)), RowFactory.create(1.0, Vectors.dense(0.0, 2.0, 0.0)), RowFactory.create(2.0, Vectors.dense(0.0, 0.0, 1.0)), - RowFactory.create(2.0, Vectors.dense(0.0, 0.0, 2.0)) -)); + RowFactory.create(2.0, Vectors.dense(0.0, 0.0, 2.0))); StructType schema = new StructType(new StructField[]{ new StructField("label", DataTypes.DoubleType, false, Metadata.empty()), new StructField("features", new VectorUDT(), false, Metadata.empty()) }); -DataFrame dataset = jsql.createDataFrame(jrdd, schema); +DataFrame dataset = jsql.createDataFrame(data, schema); NaiveBayes nb = new NaiveBayes().setSmoothing(0.5).setModelType("multinomial"); NaiveBayesModel model = nb.fit(dataset); http://git-wip-us.apache.org/repos/asf/spark/blob/d91967e1/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java -- diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java index 47d68de..8a1e5ef 100644 --- a/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java +++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaBucketizerSuite.java @@ -55,16 +55,16 @@ public class JavaBucketizerSuite { public void bucketizerTest() { double[] splits = {-0.5, 0.0, 0.5}; -JavaRDD data = jsc.parallelize(Arrays.asList( - RowFactory.create(-0.5), - RowFactory.create(-0.3), - RowFactory.create(0.0), - RowFactory.create(0.2) -)); StructType schema = new StructType(new StructField[] { new StructField("feature", DataTypes.DoubleType, false, Metadata.empty()) }); -DataFrame dataset = jsql.createDataFrame(data, schema); +DataFrame dataset = jsql.createDataFrame( + Arrays.asList(
[1/2] spark git commit: Preparing Spark release v1.5.1-rc1
Repository: spark Updated Branches: refs/heads/branch-1.5 179f36ed3 -> 3fb011a48 Preparing Spark release v1.5.1-rc1 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4df97937 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4df97937 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4df97937 Branch: refs/heads/branch-1.5 Commit: 4df97937dbf68a9868de58408b9be0bf87dbbb94 Parents: 179f36e Author: Patrick WendellAuthored: Wed Sep 23 22:49:35 2015 -0700 Committer: Patrick Wendell Committed: Wed Sep 23 22:49:35 2015 -0700 -- assembly/pom.xml| 2 +- bagel/pom.xml | 2 +- core/pom.xml| 2 +- examples/pom.xml| 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml | 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml | 2 +- external/mqtt-assembly/pom.xml | 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml| 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml | 2 +- extras/kinesis-asl-assembly/pom.xml | 2 +- extras/kinesis-asl/pom.xml | 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- launcher/pom.xml| 2 +- mllib/pom.xml | 2 +- network/common/pom.xml | 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml| 2 +- pom.xml | 2 +- repl/pom.xml| 2 +- sql/catalyst/pom.xml| 2 +- sql/core/pom.xml| 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml| 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- unsafe/pom.xml | 2 +- yarn/pom.xml| 2 +- 33 files changed, 33 insertions(+), 33 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4df97937/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 7671ba2..03d4973 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.2-SNAPSHOT +1.5.1 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/4df97937/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index 02e920d..6f058ff 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.2-SNAPSHOT +1.5.1 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/4df97937/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index 03d26df..f32ce5d 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.2-SNAPSHOT +1.5.1 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/4df97937/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index eb1910e..f28847e 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.2-SNAPSHOT +1.5.1 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/4df97937/external/flume-assembly/pom.xml -- diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml index 0de2f03..e7bd0d2 100644 --- a/external/flume-assembly/pom.xml +++ b/external/flume-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.2-SNAPSHOT +1.5.1 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/4df97937/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index 66ab1b2..e5a5503 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.2-SNAPSHOT +1.5.1 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/4df97937/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index c058490..b5e9423
[2/2] spark git commit: Preparing development version 1.5.2-SNAPSHOT
Preparing development version 1.5.2-SNAPSHOT Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3fb011a4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3fb011a4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3fb011a4 Branch: refs/heads/branch-1.5 Commit: 3fb011a486c87c99c73b6453b25dbb76f93845a7 Parents: 4df9793 Author: Patrick WendellAuthored: Wed Sep 23 22:49:40 2015 -0700 Committer: Patrick Wendell Committed: Wed Sep 23 22:49:40 2015 -0700 -- assembly/pom.xml| 2 +- bagel/pom.xml | 2 +- core/pom.xml| 2 +- examples/pom.xml| 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml | 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml | 2 +- external/mqtt-assembly/pom.xml | 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml| 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml | 2 +- extras/kinesis-asl-assembly/pom.xml | 2 +- extras/kinesis-asl/pom.xml | 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- launcher/pom.xml| 2 +- mllib/pom.xml | 2 +- network/common/pom.xml | 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml| 2 +- pom.xml | 2 +- repl/pom.xml| 2 +- sql/catalyst/pom.xml| 2 +- sql/core/pom.xml| 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml| 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- unsafe/pom.xml | 2 +- yarn/pom.xml| 2 +- 33 files changed, 33 insertions(+), 33 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3fb011a4/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 03d4973..7671ba2 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1 +1.5.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/3fb011a4/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index 6f058ff..02e920d 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1 +1.5.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/3fb011a4/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index f32ce5d..03d26df 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1 +1.5.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/3fb011a4/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index f28847e..eb1910e 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1 +1.5.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/3fb011a4/external/flume-assembly/pom.xml -- diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml index e7bd0d2..0de2f03 100644 --- a/external/flume-assembly/pom.xml +++ b/external/flume-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1 +1.5.2-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/3fb011a4/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index e5a5503..66ab1b2 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1 +1.5.2-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/3fb011a4/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index b5e9423..c058490 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7
spark git commit: [SPARK-10403] Allow UnsafeRowSerializer to work with tungsten-sort ShuffleManager
Repository: spark Updated Branches: refs/heads/branch-1.5 6c6cadb8f -> 64cc62cb5 [SPARK-10403] Allow UnsafeRowSerializer to work with tungsten-sort ShuffleManager This patch attempts to fix an issue where Spark SQL's UnsafeRowSerializer was incompatible with the `tungsten-sort` ShuffleManager. Author: Josh RosenCloses #8873 from JoshRosen/SPARK-10403. (cherry picked from commit a18208047f06a4244703c17023bb20cbe1f59d73) Signed-off-by: Michael Armbrust Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/64cc62cb Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/64cc62cb Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/64cc62cb Branch: refs/heads/branch-1.5 Commit: 64cc62cb5f14dcc4a69073c48fdf3dd61c5df787 Parents: 6c6cadb Author: Josh Rosen Authored: Wed Sep 23 11:31:01 2015 -0700 Committer: Michael Armbrust Committed: Wed Sep 23 11:31:14 2015 -0700 -- .../sql/execution/UnsafeRowSerializer.scala | 22 +-- .../execution/UnsafeRowSerializerSuite.scala| 23 ++-- 2 files changed, 27 insertions(+), 18 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/64cc62cb/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala index e060c06..7e98126 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala @@ -45,16 +45,9 @@ private[sql] class UnsafeRowSerializer(numFields: Int) extends Serializer with S } private class UnsafeRowSerializerInstance(numFields: Int) extends SerializerInstance { - - /** - * Marks the end of a stream written with [[serializeStream()]]. - */ - private[this] val EOF: Int = -1 - /** * Serializes a stream of UnsafeRows. Within the stream, each record consists of a record * length (stored as a 4-byte integer, written high byte first), followed by the record's bytes. - * The end of the stream is denoted by a record with the special length `EOF` (-1). */ override def serializeStream(out: OutputStream): SerializationStream = new SerializationStream { private[this] var writeBuffer: Array[Byte] = new Array[Byte](4096) @@ -92,7 +85,6 @@ private class UnsafeRowSerializerInstance(numFields: Int) extends SerializerInst override def close(): Unit = { writeBuffer = null - dOut.writeInt(EOF) dOut.close() } } @@ -104,12 +96,20 @@ private class UnsafeRowSerializerInstance(numFields: Int) extends SerializerInst private[this] var rowBuffer: Array[Byte] = new Array[Byte](1024) private[this] var row: UnsafeRow = new UnsafeRow() private[this] var rowTuple: (Int, UnsafeRow) = (0, row) + private[this] val EOF: Int = -1 override def asKeyValueIterator: Iterator[(Int, UnsafeRow)] = { new Iterator[(Int, UnsafeRow)] { - private[this] var rowSize: Int = dIn.readInt() - if (rowSize == EOF) dIn.close() + private[this] def readSize(): Int = try { +dIn.readInt() + } catch { +case e: EOFException => + dIn.close() + EOF + } + + private[this] var rowSize: Int = readSize() override def hasNext: Boolean = rowSize != EOF override def next(): (Int, UnsafeRow) = { @@ -118,7 +118,7 @@ private class UnsafeRowSerializerInstance(numFields: Int) extends SerializerInst } ByteStreams.readFully(dIn, rowBuffer, 0, rowSize) row.pointTo(rowBuffer, Platform.BYTE_ARRAY_OFFSET, numFields, rowSize) -rowSize = dIn.readInt() // read the next row's size +rowSize = readSize() if (rowSize == EOF) { // We are returning the last row in this stream dIn.close() val _rowTuple = rowTuple http://git-wip-us.apache.org/repos/asf/spark/blob/64cc62cb/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala index 0113d05..f7d48bc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala +++
spark git commit: [SPARK-10721] Log warning when file deletion fails
Repository: spark Updated Branches: refs/heads/master 50e463423 -> 27bfa9ab3 [SPARK-10721] Log warning when file deletion fails Author: tedyuCloses #8843 from tedyu/master. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/27bfa9ab Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/27bfa9ab Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/27bfa9ab Branch: refs/heads/master Commit: 27bfa9ab3a610e072c011fd88ee4684cea6ceb76 Parents: 50e4634 Author: tedyu Authored: Wed Sep 23 10:01:28 2015 +0100 Committer: Sean Owen Committed: Wed Sep 23 10:01:28 2015 +0100 -- .../unsafe/sort/UnsafeSorterSpillReader.java | 7 ++- .../scala/org/apache/spark/api/python/PythonRDD.scala | 7 +-- .../scala/org/apache/spark/deploy/RPackageUtils.scala | 10 +++--- .../spark/deploy/history/FsHistoryProvider.scala | 8 ++-- .../deploy/master/FileSystemPersistenceEngine.scala| 5 - .../org/apache/spark/rdd/ReliableCheckpointRDD.scala | 4 +++- .../apache/spark/rdd/ReliableRDDCheckpointData.scala | 6 -- .../apache/spark/scheduler/EventLoggingListener.scala | 8 ++-- .../spark/scheduler/cluster/SimrSchedulerBackend.scala | 4 +++- .../spark/shuffle/FileShuffleBlockResolver.scala | 5 - .../spark/shuffle/IndexShuffleBlockResolver.scala | 13 + .../scala/org/apache/spark/storage/DiskStore.scala | 10 -- .../spark/util/collection/ExternalAppendOnlyMap.scala | 8 ++-- .../apache/spark/util/collection/ExternalSorter.scala | 4 +++- .../network/shuffle/ExternalShuffleBlockResolver.java | 8 ++-- 15 files changed, 80 insertions(+), 27 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/27bfa9ab/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java -- diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java index 4989b05..501dfe7 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeSorterSpillReader.java @@ -24,12 +24,15 @@ import com.google.common.io.ByteStreams; import org.apache.spark.storage.BlockId; import org.apache.spark.storage.BlockManager; import org.apache.spark.unsafe.Platform; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * Reads spill files written by {@link UnsafeSorterSpillWriter} (see that class for a description * of the file format). */ final class UnsafeSorterSpillReader extends UnsafeSorterIterator { + private static final Logger logger = LoggerFactory.getLogger(UnsafeSorterSpillReader.class); private final File file; private InputStream in; @@ -73,7 +76,9 @@ final class UnsafeSorterSpillReader extends UnsafeSorterIterator { numRecordsRemaining--; if (numRecordsRemaining == 0) { in.close(); - file.delete(); + if (!file.delete() && file.exists()) { +logger.warn("Unable to delete spill file {}", file.getPath()); + } in = null; din = null; } http://git-wip-us.apache.org/repos/asf/spark/blob/27bfa9ab/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala -- diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala index 3788d18..19be093 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala @@ -871,7 +871,8 @@ private class PythonAccumulatorParam(@transient private val serverHost: String, * write the data into disk after deserialization, then Python can read it from disks. */ // scalastyle:off no.finalize -private[spark] class PythonBroadcast(@transient var path: String) extends Serializable { +private[spark] class PythonBroadcast(@transient var path: String) extends Serializable + with Logging { /** * Read data from disks, then copy it to `out` @@ -907,7 +908,9 @@ private[spark] class PythonBroadcast(@transient var path: String) extends Serial if (!path.isEmpty) { val file = new File(path) if (file.exists()) { -file.delete() +if (!file.delete()) { + logWarning(s"Error deleting ${file.getPath}") +} } } }
spark git commit: [SPARK-10224] [STREAMING] Fix the issue that blockIntervalTimer won't call updateCurrentBuffer when stopping
Repository: spark Updated Branches: refs/heads/master 5548a2547 -> 44c28abf1 [SPARK-10224] [STREAMING] Fix the issue that blockIntervalTimer won't call updateCurrentBuffer when stopping `blockIntervalTimer.stop(interruptTimer = false)` doesn't guarantee calling `updateCurrentBuffer`. So it's possible that `blockIntervalTimer` will exit when `updateCurrentBuffer` is not empty. Then the data in `currentBuffer` will be lost. To reproduce it, you can add `Thread.sleep(200)` in this line (https://github.com/apache/spark/blob/69c9c177160e32a2fbc9b36ecc52156077fca6fc/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala#L100) and run `StreamingContexSuite`. I cannot write a unit test to reproduce it because I cannot find an approach to force `RecurringTimer` suspend at this line for a few milliseconds. There was a failure in Jenkins here: https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/41455/console This PR updates RecurringTimer to make sure `stop(interruptTimer = false)` will call `callback` at least once after the `stop` method is called. Author: zsxwingCloses #8417 from zsxwing/SPARK-10224. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/44c28abf Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/44c28abf Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/44c28abf Branch: refs/heads/master Commit: 44c28abf120754c0175c65ffd3d4587a350b3798 Parents: 5548a25 Author: zsxwing Authored: Wed Sep 23 01:28:02 2015 -0700 Committer: Tathagata Das Committed: Wed Sep 23 01:28:02 2015 -0700 -- .../spark/streaming/util/RecurringTimer.scala | 19 +++-- .../receiver/BlockGeneratorSuite.scala | 7 +- .../streaming/util/RecurringTimerSuite.scala| 83 3 files changed, 100 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/44c28abf/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala index dd32ad5..0148cb5 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala @@ -72,8 +72,10 @@ class RecurringTimer(clock: Clock, period: Long, callback: (Long) => Unit, name: /** * Stop the timer, and return the last time the callback was made. - * interruptTimer = true will interrupt the callback + * - interruptTimer = true will interrupt the callback * if it is in progress (not guaranteed to give correct time in this case). + * - interruptTimer = false guarantees that there will be at least one callback after `stop` has + * been called. */ def stop(interruptTimer: Boolean): Long = synchronized { if (!stopped) { @@ -87,18 +89,23 @@ class RecurringTimer(clock: Clock, period: Long, callback: (Long) => Unit, name: prevTime } + private def triggerActionForNextInterval(): Unit = { +clock.waitTillTime(nextTime) +callback(nextTime) +prevTime = nextTime +nextTime += period +logDebug("Callback for " + name + " called at time " + prevTime) + } + /** * Repeatedly call the callback every interval. */ private def loop() { try { while (!stopped) { -clock.waitTillTime(nextTime) -callback(nextTime) -prevTime = nextTime -nextTime += period -logDebug("Callback for " + name + " called at time " + prevTime) +triggerActionForNextInterval() } + triggerActionForNextInterval() } catch { case e: InterruptedException => } http://git-wip-us.apache.org/repos/asf/spark/blob/44c28abf/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala -- diff --git a/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala index a38cc60..2f11b25 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala @@ -184,9 +184,10 @@ class BlockGeneratorSuite extends SparkFunSuite with BeforeAndAfter { // Verify that the final data is present in the final generated block and // pushed before complete stop assert(blockGenerator.isStopped() === false) // generator
spark git commit: [SPARK-10769] [STREAMING] [TESTS] Fix o.a.s.streaming.CheckpointSuite.maintains rate controller
Repository: spark Updated Branches: refs/heads/branch-1.5 6a616d0d0 -> 4174b94f0 [SPARK-10769] [STREAMING] [TESTS] Fix o.a.s.streaming.CheckpointSuite.maintains rate controller Fixed the following failure in https://amplab.cs.berkeley.edu/jenkins/job/NewSparkPullRequestBuilder/1787/testReport/junit/org.apache.spark.streaming/CheckpointSuite/recovery_maintains_rate_controller/ ``` sbt.ForkMain$ForkError: The code passed to eventually never returned normally. Attempted 660 times over 10.4439201 seconds. Last failure message: 9223372036854775807 did not equal 200. at org.scalatest.concurrent.Eventually$class.tryTryAgain$1(Eventually.scala:420) at org.scalatest.concurrent.Eventually$class.eventually(Eventually.scala:438) at org.scalatest.concurrent.Eventually$.eventually(Eventually.scala:478) at org.scalatest.concurrent.Eventually$class.eventually(Eventually.scala:336) at org.scalatest.concurrent.Eventually$.eventually(Eventually.scala:478) at org.apache.spark.streaming.CheckpointSuite$$anonfun$15.apply$mcV$sp(CheckpointSuite.scala:413) at org.apache.spark.streaming.CheckpointSuite$$anonfun$15.apply(CheckpointSuite.scala:396) at org.apache.spark.streaming.CheckpointSuite$$anonfun$15.apply(CheckpointSuite.scala:396) at org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22) at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85) at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) at org.scalatest.Transformer.apply(Transformer.scala:22) ``` In this test, it calls `advanceTimeWithRealDelay(ssc, 2)` to run two batch jobs. However, one race condition is these two jobs can finish before the receiver is registered. Then `UpdateRateLimit` won't be sent to the receiver and `getDefaultBlockGeneratorRateLimit` cannot be updated. Here are the logs related to this issue: ``` 15/09/22 19:28:26.154 pool-1-thread-1-ScalaTest-running-CheckpointSuite INFO CheckpointSuite: Manual clock before advancing = 2500 15/09/22 19:28:26.869 JobScheduler INFO JobScheduler: Finished job streaming job 3000 ms.0 from job set of time 3000 ms 15/09/22 19:28:26.869 JobScheduler INFO JobScheduler: Total delay: 1442975303.869 s for time 3000 ms (execution: 0.711 s) 15/09/22 19:28:26.873 JobScheduler INFO JobScheduler: Finished job streaming job 3500 ms.0 from job set of time 3500 ms 15/09/22 19:28:26.873 JobScheduler INFO JobScheduler: Total delay: 1442975303.373 s for time 3500 ms (execution: 0.004 s) 15/09/22 19:28:26.879 sparkDriver-akka.actor.default-dispatcher-3 INFO ReceiverTracker: Registered receiver for stream 0 from localhost:57749 15/09/22 19:28:27.154 pool-1-thread-1-ScalaTest-running-CheckpointSuite INFO CheckpointSuite: Manual clock after advancing = 3500 ``` `advanceTimeWithRealDelay(ssc, 2)` triggered job 3000ms and 3500ms but the receiver was registered after job 3000ms and 3500ms finished. So we should make sure the receiver online before running `advanceTimeWithRealDelay(ssc, 2)`. Author: zsxwingCloses #8877 from zsxwing/SPARK-10769. (cherry picked from commit 50e4634236668a0195390f0080d0ac230d428d05) Signed-off-by: Tathagata Das Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4174b94f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4174b94f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4174b94f Branch: refs/heads/branch-1.5 Commit: 4174b94f05282ca51f1219aa6aba3226e205aee0 Parents: 6a616d0 Author: zsxwing Authored: Wed Sep 23 01:29:30 2015 -0700 Committer: Tathagata Das Committed: Wed Sep 23 01:30:21 2015 -0700 -- .../scala/org/apache/spark/streaming/CheckpointSuite.scala | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4174b94f/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala -- diff --git a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala index 1bba7a1..a695653 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala @@ -408,10 +408,14 @@ class CheckpointSuite extends TestSuiteBase { ssc = new StreamingContext(checkpointDir) ssc.start() -val outputNew = advanceTimeWithRealDelay(ssc, 2) eventually(timeout(10.seconds)) { assert(RateTestReceiver.getActive().nonEmpty) +} + +advanceTimeWithRealDelay(ssc, 2) + +
spark git commit: [SPARK-10224] [STREAMING] Fix the issue that blockIntervalTimer won't call updateCurrentBuffer when stopping
Repository: spark Updated Branches: refs/heads/branch-1.5 8a23ef59b -> 6a616d0d0 [SPARK-10224] [STREAMING] Fix the issue that blockIntervalTimer won't call updateCurrentBuffer when stopping `blockIntervalTimer.stop(interruptTimer = false)` doesn't guarantee calling `updateCurrentBuffer`. So it's possible that `blockIntervalTimer` will exit when `updateCurrentBuffer` is not empty. Then the data in `currentBuffer` will be lost. To reproduce it, you can add `Thread.sleep(200)` in this line (https://github.com/apache/spark/blob/69c9c177160e32a2fbc9b36ecc52156077fca6fc/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala#L100) and run `StreamingContexSuite`. I cannot write a unit test to reproduce it because I cannot find an approach to force `RecurringTimer` suspend at this line for a few milliseconds. There was a failure in Jenkins here: https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/41455/console This PR updates RecurringTimer to make sure `stop(interruptTimer = false)` will call `callback` at least once after the `stop` method is called. Author: zsxwingCloses #8417 from zsxwing/SPARK-10224. (cherry picked from commit 44c28abf120754c0175c65ffd3d4587a350b3798) Signed-off-by: Tathagata Das Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6a616d0d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6a616d0d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6a616d0d Branch: refs/heads/branch-1.5 Commit: 6a616d0d02c3fe5d570249695e9ed747bf087dbf Parents: 8a23ef5 Author: zsxwing Authored: Wed Sep 23 01:28:02 2015 -0700 Committer: Tathagata Das Committed: Wed Sep 23 01:28:16 2015 -0700 -- .../spark/streaming/util/RecurringTimer.scala | 19 +++-- .../receiver/BlockGeneratorSuite.scala | 7 +- .../streaming/util/RecurringTimerSuite.scala| 83 3 files changed, 100 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6a616d0d/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala index dd32ad5..0148cb5 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RecurringTimer.scala @@ -72,8 +72,10 @@ class RecurringTimer(clock: Clock, period: Long, callback: (Long) => Unit, name: /** * Stop the timer, and return the last time the callback was made. - * interruptTimer = true will interrupt the callback + * - interruptTimer = true will interrupt the callback * if it is in progress (not guaranteed to give correct time in this case). + * - interruptTimer = false guarantees that there will be at least one callback after `stop` has + * been called. */ def stop(interruptTimer: Boolean): Long = synchronized { if (!stopped) { @@ -87,18 +89,23 @@ class RecurringTimer(clock: Clock, period: Long, callback: (Long) => Unit, name: prevTime } + private def triggerActionForNextInterval(): Unit = { +clock.waitTillTime(nextTime) +callback(nextTime) +prevTime = nextTime +nextTime += period +logDebug("Callback for " + name + " called at time " + prevTime) + } + /** * Repeatedly call the callback every interval. */ private def loop() { try { while (!stopped) { -clock.waitTillTime(nextTime) -callback(nextTime) -prevTime = nextTime -nextTime += period -logDebug("Callback for " + name + " called at time " + prevTime) +triggerActionForNextInterval() } + triggerActionForNextInterval() } catch { case e: InterruptedException => } http://git-wip-us.apache.org/repos/asf/spark/blob/6a616d0d/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala -- diff --git a/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala index a38cc60..2f11b25 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/receiver/BlockGeneratorSuite.scala @@ -184,9 +184,10 @@ class BlockGeneratorSuite extends SparkFunSuite with BeforeAndAfter { // Verify that the final data is
spark git commit: [SPARK-10769] [STREAMING] [TESTS] Fix o.a.s.streaming.CheckpointSuite.maintains rate controller
Repository: spark Updated Branches: refs/heads/master 44c28abf1 -> 50e463423 [SPARK-10769] [STREAMING] [TESTS] Fix o.a.s.streaming.CheckpointSuite.maintains rate controller Fixed the following failure in https://amplab.cs.berkeley.edu/jenkins/job/NewSparkPullRequestBuilder/1787/testReport/junit/org.apache.spark.streaming/CheckpointSuite/recovery_maintains_rate_controller/ ``` sbt.ForkMain$ForkError: The code passed to eventually never returned normally. Attempted 660 times over 10.4439201 seconds. Last failure message: 9223372036854775807 did not equal 200. at org.scalatest.concurrent.Eventually$class.tryTryAgain$1(Eventually.scala:420) at org.scalatest.concurrent.Eventually$class.eventually(Eventually.scala:438) at org.scalatest.concurrent.Eventually$.eventually(Eventually.scala:478) at org.scalatest.concurrent.Eventually$class.eventually(Eventually.scala:336) at org.scalatest.concurrent.Eventually$.eventually(Eventually.scala:478) at org.apache.spark.streaming.CheckpointSuite$$anonfun$15.apply$mcV$sp(CheckpointSuite.scala:413) at org.apache.spark.streaming.CheckpointSuite$$anonfun$15.apply(CheckpointSuite.scala:396) at org.apache.spark.streaming.CheckpointSuite$$anonfun$15.apply(CheckpointSuite.scala:396) at org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22) at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85) at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) at org.scalatest.Transformer.apply(Transformer.scala:22) ``` In this test, it calls `advanceTimeWithRealDelay(ssc, 2)` to run two batch jobs. However, one race condition is these two jobs can finish before the receiver is registered. Then `UpdateRateLimit` won't be sent to the receiver and `getDefaultBlockGeneratorRateLimit` cannot be updated. Here are the logs related to this issue: ``` 15/09/22 19:28:26.154 pool-1-thread-1-ScalaTest-running-CheckpointSuite INFO CheckpointSuite: Manual clock before advancing = 2500 15/09/22 19:28:26.869 JobScheduler INFO JobScheduler: Finished job streaming job 3000 ms.0 from job set of time 3000 ms 15/09/22 19:28:26.869 JobScheduler INFO JobScheduler: Total delay: 1442975303.869 s for time 3000 ms (execution: 0.711 s) 15/09/22 19:28:26.873 JobScheduler INFO JobScheduler: Finished job streaming job 3500 ms.0 from job set of time 3500 ms 15/09/22 19:28:26.873 JobScheduler INFO JobScheduler: Total delay: 1442975303.373 s for time 3500 ms (execution: 0.004 s) 15/09/22 19:28:26.879 sparkDriver-akka.actor.default-dispatcher-3 INFO ReceiverTracker: Registered receiver for stream 0 from localhost:57749 15/09/22 19:28:27.154 pool-1-thread-1-ScalaTest-running-CheckpointSuite INFO CheckpointSuite: Manual clock after advancing = 3500 ``` `advanceTimeWithRealDelay(ssc, 2)` triggered job 3000ms and 3500ms but the receiver was registered after job 3000ms and 3500ms finished. So we should make sure the receiver online before running `advanceTimeWithRealDelay(ssc, 2)`. Author: zsxwingCloses #8877 from zsxwing/SPARK-10769. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/50e46342 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/50e46342 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/50e46342 Branch: refs/heads/master Commit: 50e4634236668a0195390f0080d0ac230d428d05 Parents: 44c28ab Author: zsxwing Authored: Wed Sep 23 01:29:30 2015 -0700 Committer: Tathagata Das Committed: Wed Sep 23 01:29:30 2015 -0700 -- .../scala/org/apache/spark/streaming/CheckpointSuite.scala | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/50e46342/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala -- diff --git a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala index 1bba7a1..a695653 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala @@ -408,10 +408,14 @@ class CheckpointSuite extends TestSuiteBase { ssc = new StreamingContext(checkpointDir) ssc.start() -val outputNew = advanceTimeWithRealDelay(ssc, 2) eventually(timeout(10.seconds)) { assert(RateTestReceiver.getActive().nonEmpty) +} + +advanceTimeWithRealDelay(ssc, 2) + +eventually(timeout(10.seconds)) { assert(RateTestReceiver.getActive().get.getDefaultBlockGeneratorRateLimit() === 200) }
spark git commit: [SPARK-9715] [ML] Store numFeatures in all ML PredictionModel types
Repository: spark Updated Branches: refs/heads/master a18208047 -> 098be27ad [SPARK-9715] [ML] Store numFeatures in all ML PredictionModel types All prediction models should store `numFeatures` indicating the number of features the model was trained on. Default value of -1 added for backwards compatibility. Author: sethahCloses #8675 from sethah/SPARK-9715. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/098be27a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/098be27a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/098be27a Branch: refs/heads/master Commit: 098be27ad53c485ee2fc7f5871c47f899020e87b Parents: a182080 Author: sethah Authored: Wed Sep 23 15:00:52 2015 -0700 Committer: Joseph K. Bradley Committed: Wed Sep 23 15:00:52 2015 -0700 -- .../examples/ml/JavaDeveloperApiExample.java| 5 .../spark/examples/ml/DeveloperApiExample.scala | 3 +++ .../scala/org/apache/spark/ml/Predictor.scala | 6 - .../classification/DecisionTreeClassifier.scala | 13 ++ .../spark/ml/classification/GBTClassifier.scala | 26 ++-- .../ml/classification/LogisticRegression.scala | 2 ++ .../MultilayerPerceptronClassifier.scala| 2 ++ .../spark/ml/classification/NaiveBayes.scala| 2 ++ .../classification/RandomForestClassifier.scala | 8 +++--- .../ml/regression/DecisionTreeRegressor.scala | 13 ++ .../spark/ml/regression/GBTRegressor.scala | 24 +- .../spark/ml/regression/LinearRegression.scala | 2 ++ .../ml/regression/RandomForestRegressor.scala | 7 +++--- .../spark/ml/tree/impl/RandomForest.scala | 14 --- .../DecisionTreeClassifierSuite.scala | 4 ++- .../ml/classification/GBTClassifierSuite.scala | 11 ++--- .../LogisticRegressionSuite.scala | 2 ++ .../MultilayerPerceptronClassifierSuite.scala | 4 ++- .../ProbabilisticClassifierSuite.scala | 6 +++-- .../RandomForestClassifierSuite.scala | 8 +++--- .../regression/DecisionTreeRegressorSuite.scala | 2 ++ .../spark/ml/regression/GBTRegressorSuite.scala | 7 -- .../ml/regression/LinearRegressionSuite.scala | 4 ++- .../regression/RandomForestRegressorSuite.scala | 2 ++ .../spark/ml/tree/impl/RandomForestSuite.scala | 3 ++- 25 files changed, 130 insertions(+), 50 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/098be27a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java -- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java index a377694..0b4c0d9 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaDeveloperApiExample.java @@ -220,6 +220,11 @@ class MyJavaLogisticRegressionModel public int numClasses() { return 2; } /** + * Number of features the model was trained on. + */ + public int numFeatures() { return weights_.size(); } + + /** * Create a copy of the model. * The copy is shallow, except for the embedded paramMap, which gets a deep copy. * http://git-wip-us.apache.org/repos/asf/spark/blob/098be27a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala -- diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala index 340c355..3758edc 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala @@ -172,6 +172,9 @@ private class MyLogisticRegressionModel( /** Number of classes the label can take. 2 indicates binary classification. */ override val numClasses: Int = 2 + /** Number of features the model was trained on. */ + override val numFeatures: Int = weights.size + /** * Create a copy of the model. * The copy is shallow, except for the embedded paramMap, which gets a deep copy. http://git-wip-us.apache.org/repos/asf/spark/blob/098be27a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala index 19fe039..e0dcd42 100644 ---
spark git commit: [SPARK-10403] Allow UnsafeRowSerializer to work with tungsten-sort ShuffleManager
Repository: spark Updated Branches: refs/heads/master 27bfa9ab3 -> a18208047 [SPARK-10403] Allow UnsafeRowSerializer to work with tungsten-sort ShuffleManager This patch attempts to fix an issue where Spark SQL's UnsafeRowSerializer was incompatible with the `tungsten-sort` ShuffleManager. Author: Josh RosenCloses #8873 from JoshRosen/SPARK-10403. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a1820804 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a1820804 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a1820804 Branch: refs/heads/master Commit: a18208047f06a4244703c17023bb20cbe1f59d73 Parents: 27bfa9a Author: Josh Rosen Authored: Wed Sep 23 11:31:01 2015 -0700 Committer: Michael Armbrust Committed: Wed Sep 23 11:31:01 2015 -0700 -- .../sql/execution/UnsafeRowSerializer.scala | 22 +-- .../execution/UnsafeRowSerializerSuite.scala| 23 ++-- 2 files changed, 27 insertions(+), 18 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a1820804/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala index e060c06..7e98126 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala @@ -45,16 +45,9 @@ private[sql] class UnsafeRowSerializer(numFields: Int) extends Serializer with S } private class UnsafeRowSerializerInstance(numFields: Int) extends SerializerInstance { - - /** - * Marks the end of a stream written with [[serializeStream()]]. - */ - private[this] val EOF: Int = -1 - /** * Serializes a stream of UnsafeRows. Within the stream, each record consists of a record * length (stored as a 4-byte integer, written high byte first), followed by the record's bytes. - * The end of the stream is denoted by a record with the special length `EOF` (-1). */ override def serializeStream(out: OutputStream): SerializationStream = new SerializationStream { private[this] var writeBuffer: Array[Byte] = new Array[Byte](4096) @@ -92,7 +85,6 @@ private class UnsafeRowSerializerInstance(numFields: Int) extends SerializerInst override def close(): Unit = { writeBuffer = null - dOut.writeInt(EOF) dOut.close() } } @@ -104,12 +96,20 @@ private class UnsafeRowSerializerInstance(numFields: Int) extends SerializerInst private[this] var rowBuffer: Array[Byte] = new Array[Byte](1024) private[this] var row: UnsafeRow = new UnsafeRow() private[this] var rowTuple: (Int, UnsafeRow) = (0, row) + private[this] val EOF: Int = -1 override def asKeyValueIterator: Iterator[(Int, UnsafeRow)] = { new Iterator[(Int, UnsafeRow)] { - private[this] var rowSize: Int = dIn.readInt() - if (rowSize == EOF) dIn.close() + private[this] def readSize(): Int = try { +dIn.readInt() + } catch { +case e: EOFException => + dIn.close() + EOF + } + + private[this] var rowSize: Int = readSize() override def hasNext: Boolean = rowSize != EOF override def next(): (Int, UnsafeRow) = { @@ -118,7 +118,7 @@ private class UnsafeRowSerializerInstance(numFields: Int) extends SerializerInst } ByteStreams.readFully(dIn, rowBuffer, 0, rowSize) row.pointTo(rowBuffer, Platform.BYTE_ARRAY_OFFSET, numFields, rowSize) -rowSize = dIn.readInt() // read the next row's size +rowSize = readSize() if (rowSize == EOF) { // We are returning the last row in this stream dIn.close() val _rowTuple = rowTuple http://git-wip-us.apache.org/repos/asf/spark/blob/a1820804/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala index 0113d05..f7d48bc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala @@ -17,9 +17,10 @@ package org.apache.spark.sql.execution
spark git commit: [SPARK-10686] [ML] Add quantilesCol to AFTSurvivalRegression
Repository: spark Updated Branches: refs/heads/master 098be27ad -> ce2b056d3 [SPARK-10686] [ML] Add quantilesCol to AFTSurvivalRegression By default ```quantilesCol``` should be empty. If ```quantileProbabilities``` is set, we should append quantiles as a new column (of type Vector). Author: Yanbo LiangCloses #8836 from yanboliang/spark-10686. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ce2b056d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ce2b056d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ce2b056d Branch: refs/heads/master Commit: ce2b056d35c0c75d5c162b93680ee2d84152e911 Parents: 098be27 Author: Yanbo Liang Authored: Wed Sep 23 15:26:02 2015 -0700 Committer: Xiangrui Meng Committed: Wed Sep 23 15:26:02 2015 -0700 -- .../ml/regression/AFTSurvivalRegression.scala | 51 +++--- .../regression/AFTSurvivalRegressionSuite.scala | 74 +--- 2 files changed, 91 insertions(+), 34 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ce2b056d/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala index 5b25db6..717caac 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala @@ -41,7 +41,7 @@ import org.apache.spark.storage.StorageLevel */ private[regression] trait AFTSurvivalRegressionParams extends Params with HasFeaturesCol with HasLabelCol with HasPredictionCol with HasMaxIter - with HasTol with HasFitIntercept { + with HasTol with HasFitIntercept with Logging { /** * Param for censor column name. @@ -59,21 +59,35 @@ private[regression] trait AFTSurvivalRegressionParams extends Params /** * Param for quantile probabilities array. - * Values of the quantile probabilities array should be in the range [0, 1]. + * Values of the quantile probabilities array should be in the range [0, 1] + * and the array should be non-empty. * @group param */ @Since("1.6.0") final val quantileProbabilities: DoubleArrayParam = new DoubleArrayParam(this, "quantileProbabilities", "quantile probabilities array", -(t: Array[Double]) => t.forall(ParamValidators.inRange(0, 1))) +(t: Array[Double]) => t.forall(ParamValidators.inRange(0, 1)) && t.length > 0) /** @group getParam */ @Since("1.6.0") def getQuantileProbabilities: Array[Double] = $(quantileProbabilities) + setDefault(quantileProbabilities -> Array(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99)) - /** Checks whether the input has quantile probabilities array. */ - protected[regression] def hasQuantileProbabilities: Boolean = { -isDefined(quantileProbabilities) && $(quantileProbabilities).size != 0 + /** + * Param for quantiles column name. + * This column will output quantiles of corresponding quantileProbabilities if it is set. + * @group param + */ + @Since("1.6.0") + final val quantilesCol: Param[String] = new Param(this, "quantilesCol", "quantiles column name") + + /** @group getParam */ + @Since("1.6.0") + def getQuantilesCol: String = $(quantilesCol) + + /** Checks whether the input has quantiles column name. */ + protected[regression] def hasQuantilesCol: Boolean = { +isDefined(quantilesCol) && $(quantilesCol) != "" } /** @@ -90,6 +104,9 @@ private[regression] trait AFTSurvivalRegressionParams extends Params SchemaUtils.checkColumnType(schema, $(censorCol), DoubleType) SchemaUtils.checkColumnType(schema, $(labelCol), DoubleType) } +if (hasQuantilesCol) { + SchemaUtils.appendColumn(schema, $(quantilesCol), new VectorUDT) +} SchemaUtils.appendColumn(schema, $(predictionCol), DoubleType) } } @@ -124,6 +141,14 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S @Since("1.6.0") def setPredictionCol(value: String): this.type = set(predictionCol, value) + /** @group setParam */ + @Since("1.6.0") + def setQuantileProbabilities(value: Array[Double]): this.type = set(quantileProbabilities, value) + + /** @group setParam */ + @Since("1.6.0") + def setQuantilesCol(value: String): this.type = set(quantilesCol, value) + /** * Set if we should fit the intercept * Default is true. @@ -243,10 +268,12 @@ class AFTSurvivalRegressionModel private[ml] ( @Since("1.6.0") def setQuantileProbabilities(value: Array[Double]): this.type =
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.5.1-rc1 [deleted] 20db8186d - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[1/2] spark git commit: Preparing Spark release v1.5.1-rc1
Repository: spark Updated Branches: refs/heads/branch-1.5 cdc4ac003 -> 179f36ed3 Preparing Spark release v1.5.1-rc1 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f894dd6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f894dd6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f894dd6 Branch: refs/heads/branch-1.5 Commit: 4f894dd6906311cb57add6757690069a18078783 Parents: cdc4ac0 Author: Patrick WendellAuthored: Wed Sep 23 21:32:10 2015 -0700 Committer: Patrick Wendell Committed: Wed Sep 23 21:32:10 2015 -0700 -- assembly/pom.xml| 2 +- bagel/pom.xml | 2 +- core/pom.xml| 2 +- examples/pom.xml| 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml | 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml | 2 +- external/mqtt-assembly/pom.xml | 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml| 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml | 2 +- extras/kinesis-asl-assembly/pom.xml | 2 +- extras/kinesis-asl/pom.xml | 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- launcher/pom.xml| 2 +- mllib/pom.xml | 2 +- network/common/pom.xml | 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml| 2 +- pom.xml | 2 +- repl/pom.xml| 2 +- sql/catalyst/pom.xml| 2 +- sql/core/pom.xml| 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml| 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- unsafe/pom.xml | 2 +- yarn/pom.xml| 2 +- 33 files changed, 33 insertions(+), 33 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4f894dd6/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 7671ba2..03d4973 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.2-SNAPSHOT +1.5.1 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/4f894dd6/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index 02e920d..6f058ff 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.2-SNAPSHOT +1.5.1 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/4f894dd6/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index 03d26df..f32ce5d 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.2-SNAPSHOT +1.5.1 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/4f894dd6/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index eb1910e..f28847e 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.2-SNAPSHOT +1.5.1 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/4f894dd6/external/flume-assembly/pom.xml -- diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml index 0de2f03..e7bd0d2 100644 --- a/external/flume-assembly/pom.xml +++ b/external/flume-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.2-SNAPSHOT +1.5.1 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/4f894dd6/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index 66ab1b2..e5a5503 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.2-SNAPSHOT +1.5.1 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/4f894dd6/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index c058490..b5e9423
[2/2] spark git commit: Preparing development version 1.5.2-SNAPSHOT
Preparing development version 1.5.2-SNAPSHOT Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/179f36ed Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/179f36ed Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/179f36ed Branch: refs/heads/branch-1.5 Commit: 179f36ed32c1026b5e5d906780608e259c265b1e Parents: 4f894dd Author: Patrick WendellAuthored: Wed Sep 23 21:32:16 2015 -0700 Committer: Patrick Wendell Committed: Wed Sep 23 21:32:16 2015 -0700 -- assembly/pom.xml| 2 +- bagel/pom.xml | 2 +- core/pom.xml| 2 +- examples/pom.xml| 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml | 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml | 2 +- external/mqtt-assembly/pom.xml | 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml| 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml | 2 +- extras/kinesis-asl-assembly/pom.xml | 2 +- extras/kinesis-asl/pom.xml | 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- launcher/pom.xml| 2 +- mllib/pom.xml | 2 +- network/common/pom.xml | 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml| 2 +- pom.xml | 2 +- repl/pom.xml| 2 +- sql/catalyst/pom.xml| 2 +- sql/core/pom.xml| 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml| 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- unsafe/pom.xml | 2 +- yarn/pom.xml| 2 +- 33 files changed, 33 insertions(+), 33 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/179f36ed/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 03d4973..7671ba2 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1 +1.5.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/179f36ed/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index 6f058ff..02e920d 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1 +1.5.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/179f36ed/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index f32ce5d..03d26df 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1 +1.5.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/179f36ed/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index f28847e..eb1910e 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1 +1.5.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/179f36ed/external/flume-assembly/pom.xml -- diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml index e7bd0d2..0de2f03 100644 --- a/external/flume-assembly/pom.xml +++ b/external/flume-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1 +1.5.2-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/179f36ed/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index e5a5503..66ab1b2 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1 +1.5.2-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/179f36ed/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index b5e9423..c058490 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.5.1-rc1 [created] 4f894dd69 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-10474] [SQL] Aggregation fails to allocate memory for pointer array (round 2)
Repository: spark Updated Branches: refs/heads/master 084e4e126 -> 83f6f54d1 [SPARK-10474] [SQL] Aggregation fails to allocate memory for pointer array (round 2) This patch reverts most of the changes in a previous fix #8827. The real cause of the issue is that in `TungstenAggregate`'s prepare method we only reserve 1 page, but later when we switch to sort-based aggregation we try to acquire 1 page AND a pointer array. The longer-term fix should be to reserve also the pointer array, but for now ***we will simply not track the pointer array***. (Note that elsewhere we already don't track the pointer array, e.g. [here](https://github.com/apache/spark/blob/a18208047f06a4244703c17023bb20cbe1f59d73/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java#L88)) Note: This patch reuses the unit test added in #8827 so it doesn't show up in the diff. Author: Andrew OrCloses # from andrewor14/dont-track-pointer-array. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/83f6f54d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/83f6f54d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/83f6f54d Branch: refs/heads/master Commit: 83f6f54d12a418f5158ee7ee985b54eef8cc1cf0 Parents: 084e4e1 Author: Andrew Or Authored: Wed Sep 23 19:34:31 2015 -0700 Committer: Andrew Or Committed: Wed Sep 23 19:34:31 2015 -0700 -- .../unsafe/sort/UnsafeExternalSorter.java | 51 +--- .../sql/execution/UnsafeKVExternalSorter.java | 9 +--- .../UnsafeFixedWidthAggregationMapSuite.scala | 8 +-- 3 files changed, 16 insertions(+), 52 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/83f6f54d/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java -- diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java index 14b6aaf..0a311d2 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java @@ -159,16 +159,15 @@ public final class UnsafeExternalSorter { /** * Allocates new sort data structures. Called when creating the sorter and after each spill. */ - public void initializeForWriting() throws IOException { + private void initializeForWriting() throws IOException { +// Note: Do not track memory for the pointer array for now because of SPARK-10474. +// In more detail, in TungstenAggregate we only reserve a page, but when we fall back to +// sort-based aggregation we try to acquire a page AND a pointer array, which inevitably +// fails if all other memory is already occupied. It should be safe to not track the array +// because its memory footprint is frequently much smaller than that of a page. This is a +// temporary hack that we should address in 1.6.0. +// TODO: track the pointer array memory! this.writeMetrics = new ShuffleWriteMetrics(); -final long pointerArrayMemory = - UnsafeInMemorySorter.getMemoryRequirementsForPointerArray(initialSize); -final long memoryAcquired = shuffleMemoryManager.tryToAcquire(pointerArrayMemory); -if (memoryAcquired != pointerArrayMemory) { - shuffleMemoryManager.release(memoryAcquired); - throw new IOException("Could not acquire " + pointerArrayMemory + " bytes of memory"); -} - this.inMemSorter = new UnsafeInMemorySorter(taskMemoryManager, recordComparator, prefixComparator, initialSize); this.isInMemSorterExternal = false; @@ -187,14 +186,6 @@ public final class UnsafeExternalSorter { * Sort and spill the current records in response to memory pressure. */ public void spill() throws IOException { -spill(true); - } - - /** - * Sort and spill the current records in response to memory pressure. - * @param shouldInitializeForWriting whether to allocate memory for writing after the spill - */ - public void spill(boolean shouldInitializeForWriting) throws IOException { assert(inMemSorter != null); logger.info("Thread {} spilling sort data of {} to disk ({} {} so far)", Thread.currentThread().getId(), @@ -225,9 +216,7 @@ public final class UnsafeExternalSorter { // written to disk. This also counts the space needed to store the sorter's pointer array. taskContext.taskMetrics().incMemoryBytesSpilled(spillSize); -if (shouldInitializeForWriting) { - initializeForWriting(); -} +
spark git commit: [SPARK-10474] [SQL] Aggregation fails to allocate memory for pointer array (round 2)
Repository: spark Updated Branches: refs/heads/branch-1.5 7564c2493 -> 1f47e68f5 [SPARK-10474] [SQL] Aggregation fails to allocate memory for pointer array (round 2) This patch reverts most of the changes in a previous fix #8827. The real cause of the issue is that in `TungstenAggregate`'s prepare method we only reserve 1 page, but later when we switch to sort-based aggregation we try to acquire 1 page AND a pointer array. The longer-term fix should be to reserve also the pointer array, but for now ***we will simply not track the pointer array***. (Note that elsewhere we already don't track the pointer array, e.g. [here](https://github.com/apache/spark/blob/a18208047f06a4244703c17023bb20cbe1f59d73/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java#L88)) Note: This patch reuses the unit test added in #8827 so it doesn't show up in the diff. Author: Andrew OrCloses # from andrewor14/dont-track-pointer-array. (cherry picked from commit 83f6f54d12a418f5158ee7ee985b54eef8cc1cf0) Signed-off-by: Andrew Or Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1f47e68f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1f47e68f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1f47e68f Branch: refs/heads/branch-1.5 Commit: 1f47e68f56398e4f39b3b00650aea6f71e832115 Parents: 7564c24 Author: Andrew Or Authored: Wed Sep 23 19:34:31 2015 -0700 Committer: Andrew Or Committed: Wed Sep 23 19:34:47 2015 -0700 -- .../unsafe/sort/UnsafeExternalSorter.java | 51 +--- .../sql/execution/UnsafeKVExternalSorter.java | 9 +--- .../UnsafeFixedWidthAggregationMapSuite.scala | 8 +-- 3 files changed, 16 insertions(+), 52 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1f47e68f/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java -- diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java index 14b6aaf..0a311d2 100644 --- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java +++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java @@ -159,16 +159,15 @@ public final class UnsafeExternalSorter { /** * Allocates new sort data structures. Called when creating the sorter and after each spill. */ - public void initializeForWriting() throws IOException { + private void initializeForWriting() throws IOException { +// Note: Do not track memory for the pointer array for now because of SPARK-10474. +// In more detail, in TungstenAggregate we only reserve a page, but when we fall back to +// sort-based aggregation we try to acquire a page AND a pointer array, which inevitably +// fails if all other memory is already occupied. It should be safe to not track the array +// because its memory footprint is frequently much smaller than that of a page. This is a +// temporary hack that we should address in 1.6.0. +// TODO: track the pointer array memory! this.writeMetrics = new ShuffleWriteMetrics(); -final long pointerArrayMemory = - UnsafeInMemorySorter.getMemoryRequirementsForPointerArray(initialSize); -final long memoryAcquired = shuffleMemoryManager.tryToAcquire(pointerArrayMemory); -if (memoryAcquired != pointerArrayMemory) { - shuffleMemoryManager.release(memoryAcquired); - throw new IOException("Could not acquire " + pointerArrayMemory + " bytes of memory"); -} - this.inMemSorter = new UnsafeInMemorySorter(taskMemoryManager, recordComparator, prefixComparator, initialSize); this.isInMemSorterExternal = false; @@ -187,14 +186,6 @@ public final class UnsafeExternalSorter { * Sort and spill the current records in response to memory pressure. */ public void spill() throws IOException { -spill(true); - } - - /** - * Sort and spill the current records in response to memory pressure. - * @param shouldInitializeForWriting whether to allocate memory for writing after the spill - */ - public void spill(boolean shouldInitializeForWriting) throws IOException { assert(inMemSorter != null); logger.info("Thread {} spilling sort data of {} to disk ({} {} so far)", Thread.currentThread().getId(), @@ -225,9 +216,7 @@ public final class UnsafeExternalSorter { // written to disk. This also counts the space needed to store the sorter's pointer array.
spark git commit: Update branch-1.5 for 1.5.1 release.
Repository: spark Updated Branches: refs/heads/branch-1.5 1f47e68f5 -> 1000b5d7e Update branch-1.5 for 1.5.1 release. Author: Reynold XinCloses #8890 from rxin/release-1.5.1. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1000b5d7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1000b5d7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1000b5d7 Branch: refs/heads/branch-1.5 Commit: 1000b5d7eed22935c07bc2970cc20d2a78241728 Parents: 1f47e68 Author: Reynold Xin Authored: Wed Sep 23 19:46:13 2015 -0700 Committer: Reynold Xin Committed: Wed Sep 23 19:46:13 2015 -0700 -- CHANGES.txt | 468 +++ R/pkg/DESCRIPTION | 2 +- .../main/scala/org/apache/spark/package.scala | 2 +- dev/create-release/generate-changelist.py | 4 +- docs/_config.yml| 4 +- ec2/spark_ec2.py| 8 +- 6 files changed, 479 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1000b5d7/CHANGES.txt -- diff --git a/CHANGES.txt b/CHANGES.txt index 95f80d8..58c5764 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,6 +1,474 @@ Spark Change Log +Release 1.5.1 + + Bump R version + Reynold Xin + 2015-09-23 19:44:09 -0700 + Commit: 1c60fc1 + + Update branch-1.5 for 1.5.1 release. + Reynold Xin + 2015-09-23 18:14:36 -0700 + Commit: a32934c + + [SPARK-10474] [SQL] Aggregation fails to allocate memory for pointer array (round 2) + Andrew Or + 2015-09-23 19:34:31 -0700 + Commit: 1f47e68, github.com/apache/spark/pull/ + + [SPARK-10731] [SQL] Delegate to Scala's DataFrame.take implementation in Python DataFrame. + Reynold Xin + 2015-09-23 16:43:21 -0700 + Commit: 7564c24, github.com/apache/spark/pull/8876 + + [SPARK-10403] Allow UnsafeRowSerializer to work with tungsten-sort ShuffleManager + Josh Rosen + 2015-09-23 11:31:01 -0700 + Commit: 64cc62c, github.com/apache/spark/pull/8873 + + [SPARK-9710] [TEST] Fix RPackageUtilsSuite when R is not available. + Marcelo Vanzin + 2015-08-10 10:10:40 -0700 + Commit: 6c6cadb, github.com/apache/spark/pull/8008 + + [SPARK-10769] [STREAMING] [TESTS] Fix o.a.s.streaming.CheckpointSuite.maintains rate controller + zsxwing + 2015-09-23 01:29:30 -0700 + Commit: 4174b94, github.com/apache/spark/pull/8877 + + [SPARK-10224] [STREAMING] Fix the issue that blockIntervalTimer won't call updateCurrentBuffer when stopping + zsxwing + 2015-09-23 01:28:02 -0700 + Commit: 6a616d0, github.com/apache/spark/pull/8417 + + [SPARK-10652] [SPARK-10742] [STREAMING] Set meaningful job descriptions for all streaming jobs + Tathagata Das + 2015-09-22 22:44:09 -0700 + Commit: 8a23ef5, github.com/apache/spark/pull/8791 + + [SPARK-10663] Removed unnecessary invocation of DataFrame.toDF method. + Matt Hagen + 2015-09-22 21:14:25 -0700 + Commit: 7f07cc6, github.com/apache/spark/pull/8875 + + [SPARK-10310] [SQL] Fixes script transformation field/line delimiters + Cheng Lian + 2015-09-22 19:41:57 -0700 + Commit: 73d0621, github.com/apache/spark/pull/8860 + + [SPARK-10640] History server fails to parse TaskCommitDenied + Andrew Or + 2015-09-22 16:35:43 -0700 + Commit: 26187ab, github.com/apache/spark/pull/8828 + + Revert "[SPARK-10640] History server fails to parse TaskCommitDenied" + Andrew Or + 2015-09-22 17:10:58 -0700 + Commit: 118ebd4 + + [SPARK-10640] History server fails to parse TaskCommitDenied + Andrew Or + 2015-09-22 16:35:43 -0700 + Commit: 5ffd084, github.com/apache/spark/pull/8828 + + [SPARK-10714] [SPARK-8632] [SPARK-10685] [SQL] Refactor Python UDF handling + Reynold Xin + 2015-09-22 14:11:46 -0700 + Commit: 3339916, github.com/apache/spark/pull/8835 + + [SPARK-10737] [SQL] When using UnsafeRows, SortMergeJoin may return wrong results + Yin Huai + 2015-09-22 13:31:35 -0700 + Commit: 6b1e5c2, github.com/apache/spark/pull/8854 + + [SPARK-10672] [SQL] Do not fail when we cannot save the metadata of a data source table in a hive compatible way + Yin Huai + 2015-09-22 13:29:39 -0700 + Commit: d83dcc9, github.com/apache/spark/pull/8824 + + [SPARK-10740] [SQL] handle nondeterministic expressions correctly for set
[2/2] spark git commit: Preparing development version 1.4.3-SNAPSHOT
Preparing development version 1.4.3-SNAPSHOT Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d4a74a28 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d4a74a28 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d4a74a28 Branch: refs/heads/branch-1.4 Commit: d4a74a28fee2e8c7e0b1cdff04c18dbe0dd7fdff Parents: 0b22a3c Author: Patrick WendellAuthored: Wed Sep 23 19:50:34 2015 -0700 Committer: Patrick Wendell Committed: Wed Sep 23 19:50:34 2015 -0700 -- assembly/pom.xml | 2 +- bagel/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml| 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml | 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml| 2 +- extras/kinesis-asl/pom.xml| 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- launcher/pom.xml | 2 +- mllib/pom.xml | 2 +- network/common/pom.xml| 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- unsafe/pom.xml| 2 +- yarn/pom.xml | 2 +- 30 files changed, 30 insertions(+), 30 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d4a74a28/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 261a5f4..f1be8df 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.4.2 +1.4.3-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/d4a74a28/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index e4c0c71..45429e6 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.4.2 +1.4.3-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/d4a74a28/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index 74d5e3d..5f753ac 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.4.2 +1.4.3-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/d4a74a28/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index 86a0327..b080518 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.4.2 +1.4.3-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/d4a74a28/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index a7845f5..dd761ff 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.4.2 +1.4.3-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/d4a74a28/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index 8357103..a16e636 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.4.2 +1.4.3-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/d4a74a28/external/kafka-assembly/pom.xml -- diff --git a/external/kafka-assembly/pom.xml b/external/kafka-assembly/pom.xml index 3c97b20..64c650e 100644 --- a/external/kafka-assembly/pom.xml +++ b/external/kafka-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.4.2 +1.4.3-SNAPSHOT ../../pom.xml
[1/2] spark git commit: Preparing Spark release v1.4.2-rc1
Repository: spark Updated Branches: refs/heads/branch-1.4 51d9eadbe -> d4a74a28f Preparing Spark release v1.4.2-rc1 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0b22a3c7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0b22a3c7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0b22a3c7 Branch: refs/heads/branch-1.4 Commit: 0b22a3c7a3a40ff63a2e740ecab152141271b30d Parents: 51d9ead Author: Patrick WendellAuthored: Wed Sep 23 19:50:27 2015 -0700 Committer: Patrick Wendell Committed: Wed Sep 23 19:50:27 2015 -0700 -- assembly/pom.xml | 2 +- bagel/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml| 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml | 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml| 2 +- extras/kinesis-asl/pom.xml| 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- launcher/pom.xml | 2 +- mllib/pom.xml | 2 +- network/common/pom.xml| 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- unsafe/pom.xml| 2 +- yarn/pom.xml | 2 +- 30 files changed, 30 insertions(+), 30 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0b22a3c7/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 228db59..261a5f4 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.4.2-SNAPSHOT +1.4.2 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/0b22a3c7/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index ce791a6..e4c0c71 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.4.2-SNAPSHOT +1.4.2 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/0b22a3c7/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index 176ea9b..74d5e3d 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.4.2-SNAPSHOT +1.4.2 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/0b22a3c7/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index 877c2fb..86a0327 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.4.2-SNAPSHOT +1.4.2 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/0b22a3c7/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index ad431fa..a7845f5 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.4.2-SNAPSHOT +1.4.2 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/0b22a3c7/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index 9789435..8357103 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.4.2-SNAPSHOT +1.4.2 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/0b22a3c7/external/kafka-assembly/pom.xml -- diff --git a/external/kafka-assembly/pom.xml b/external/kafka-assembly/pom.xml index 18b1d86..3c97b20 100644 --- a/external/kafka-assembly/pom.xml +++ b/external/kafka-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.4.2-SNAPSHOT +1.4.2 ../../pom.xml
spark git commit: Bump R version to 1.4.2.
Repository: spark Updated Branches: refs/heads/branch-1.4 8887abb06 -> 51d9eadbe Bump R version to 1.4.2. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/51d9eadb Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/51d9eadb Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/51d9eadb Branch: refs/heads/branch-1.4 Commit: 51d9eadbe7740c586718a5215941920365c79c23 Parents: 8887abb Author: Reynold XinAuthored: Wed Sep 23 19:48:28 2015 -0700 Committer: Reynold Xin Committed: Wed Sep 23 19:48:28 2015 -0700 -- R/pkg/DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/51d9eadb/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 7379f54..8ac6183 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,7 +1,7 @@ Package: SparkR Type: Package Title: R frontend for Spark -Version: 1.4.1 +Version: 1.4.2 Date: 2013-09-09 Author: The Apache Software Foundation Maintainer: Shivaram Venkataraman - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.4.2-rc1 [created] 0b22a3c7a - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-10692] [STREAMING] Expose failureReasons in BatchInfo for streaming UI to clear failed batches
Repository: spark Updated Branches: refs/heads/branch-1.5 1000b5d7e -> 4c48593bf [SPARK-10692] [STREAMING] Expose failureReasons in BatchInfo for streaming UI to clear failed batches Slightly modified version of #8818, all credit goes to zsxwing Author: zsxwingAuthor: Tathagata Das Closes #8892 from tdas/SPARK-10692. (cherry picked from commit 758c9d25e92417f8c06328c3af7ea2ef0212c79f) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4c48593b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4c48593b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4c48593b Branch: refs/heads/branch-1.5 Commit: 4c48593bf5d44218b42bc8be9573184dd95e6ff2 Parents: 1000b5d Author: zsxwing Authored: Wed Sep 23 19:52:02 2015 -0700 Committer: Reynold Xin Committed: Wed Sep 23 19:52:10 2015 -0700 -- .../spark/streaming/scheduler/BatchInfo.scala | 10 +++ .../streaming/scheduler/JobScheduler.scala | 26 +++ .../spark/streaming/scheduler/JobSet.scala | 19 - .../streaming/StreamingListenerSuite.scala | 76 4 files changed, 115 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4c48593b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala index 9922b6b..3c86956 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala @@ -39,6 +39,8 @@ case class BatchInfo( processingEndTime: Option[Long] ) { + private var _failureReasons: Map[Int, String] = Map.empty + @deprecated("Use streamIdToInputInfo instead", "1.5.0") def streamIdToNumRecords: Map[Int, Long] = streamIdToInputInfo.mapValues(_.numRecords) @@ -67,4 +69,12 @@ case class BatchInfo( * The number of recorders received by the receivers in this batch. */ def numRecords: Long = streamIdToInputInfo.values.map(_.numRecords).sum + + /** Set the failure reasons corresponding to every output ops in the batch */ + private[streaming] def setFailureReason(reasons: Map[Int, String]): Unit = { +_failureReasons = reasons + } + + /** Failure reasons corresponding to every output ops in the batch */ + private[streaming] def failureReasons = _failureReasons } http://git-wip-us.apache.org/repos/asf/spark/blob/4c48593b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala index fb51b0b..b5546db 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala @@ -166,22 +166,22 @@ class JobScheduler(val ssc: StreamingContext) extends Logging { } private def handleJobCompletion(job: Job) { +val jobSet = jobSets.get(job.time) +jobSet.handleJobCompletion(job) +logInfo("Finished job " + job.id + " from job set of time " + jobSet.time) +if (jobSet.hasCompleted) { + jobSets.remove(jobSet.time) + jobGenerator.onBatchCompletion(jobSet.time) + logInfo("Total delay: %.3f s for time %s (execution: %.3f s)".format( +jobSet.totalDelay / 1000.0, jobSet.time.toString, +jobSet.processingDelay / 1000.0 + )) + listenerBus.post(StreamingListenerBatchCompleted(jobSet.toBatchInfo)) +} job.result match { - case Success(_) => -val jobSet = jobSets.get(job.time) -jobSet.handleJobCompletion(job) -logInfo("Finished job " + job.id + " from job set of time " + jobSet.time) -if (jobSet.hasCompleted) { - jobSets.remove(jobSet.time) - jobGenerator.onBatchCompletion(jobSet.time) - logInfo("Total delay: %.3f s for time %s (execution: %.3f s)".format( -jobSet.totalDelay / 1000.0, jobSet.time.toString, -jobSet.processingDelay / 1000.0 - )) - listenerBus.post(StreamingListenerBatchCompleted(jobSet.toBatchInfo)) -} case Failure(e) => reportError("Error running job " + job, e) + case _ => } }
spark git commit: [SPARK-10692] [STREAMING] Expose failureReasons in BatchInfo for streaming UI to clear failed batches
Repository: spark Updated Branches: refs/heads/master 83f6f54d1 -> 758c9d25e [SPARK-10692] [STREAMING] Expose failureReasons in BatchInfo for streaming UI to clear failed batches Slightly modified version of #8818, all credit goes to zsxwing Author: zsxwingAuthor: Tathagata Das Closes #8892 from tdas/SPARK-10692. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/758c9d25 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/758c9d25 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/758c9d25 Branch: refs/heads/master Commit: 758c9d25e92417f8c06328c3af7ea2ef0212c79f Parents: 83f6f54 Author: zsxwing Authored: Wed Sep 23 19:52:02 2015 -0700 Committer: Reynold Xin Committed: Wed Sep 23 19:52:02 2015 -0700 -- .../spark/streaming/scheduler/BatchInfo.scala | 10 +++ .../streaming/scheduler/JobScheduler.scala | 26 +++ .../spark/streaming/scheduler/JobSet.scala | 19 - .../streaming/StreamingListenerSuite.scala | 76 4 files changed, 115 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/758c9d25/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala index 9922b6b..3c86956 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/BatchInfo.scala @@ -39,6 +39,8 @@ case class BatchInfo( processingEndTime: Option[Long] ) { + private var _failureReasons: Map[Int, String] = Map.empty + @deprecated("Use streamIdToInputInfo instead", "1.5.0") def streamIdToNumRecords: Map[Int, Long] = streamIdToInputInfo.mapValues(_.numRecords) @@ -67,4 +69,12 @@ case class BatchInfo( * The number of recorders received by the receivers in this batch. */ def numRecords: Long = streamIdToInputInfo.values.map(_.numRecords).sum + + /** Set the failure reasons corresponding to every output ops in the batch */ + private[streaming] def setFailureReason(reasons: Map[Int, String]): Unit = { +_failureReasons = reasons + } + + /** Failure reasons corresponding to every output ops in the batch */ + private[streaming] def failureReasons = _failureReasons } http://git-wip-us.apache.org/repos/asf/spark/blob/758c9d25/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala index 32d995d..66afbf1 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobScheduler.scala @@ -166,22 +166,22 @@ class JobScheduler(val ssc: StreamingContext) extends Logging { } private def handleJobCompletion(job: Job) { +val jobSet = jobSets.get(job.time) +jobSet.handleJobCompletion(job) +logInfo("Finished job " + job.id + " from job set of time " + jobSet.time) +if (jobSet.hasCompleted) { + jobSets.remove(jobSet.time) + jobGenerator.onBatchCompletion(jobSet.time) + logInfo("Total delay: %.3f s for time %s (execution: %.3f s)".format( +jobSet.totalDelay / 1000.0, jobSet.time.toString, +jobSet.processingDelay / 1000.0 + )) + listenerBus.post(StreamingListenerBatchCompleted(jobSet.toBatchInfo)) +} job.result match { - case Success(_) => -val jobSet = jobSets.get(job.time) -jobSet.handleJobCompletion(job) -logInfo("Finished job " + job.id + " from job set of time " + jobSet.time) -if (jobSet.hasCompleted) { - jobSets.remove(jobSet.time) - jobGenerator.onBatchCompletion(jobSet.time) - logInfo("Total delay: %.3f s for time %s (execution: %.3f s)".format( -jobSet.totalDelay / 1000.0, jobSet.time.toString, -jobSet.processingDelay / 1000.0 - )) - listenerBus.post(StreamingListenerBatchCompleted(jobSet.toBatchInfo)) -} case Failure(e) => reportError("Error running job " + job, e) + case _ => } } http://git-wip-us.apache.org/repos/asf/spark/blob/758c9d25/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobSet.scala
spark git commit: Update release notes.
Repository: spark Updated Branches: refs/heads/branch-1.5 4c48593bf -> c8a3d6630 Update release notes. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c8a3d663 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c8a3d663 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c8a3d663 Branch: refs/heads/branch-1.5 Commit: c8a3d66308e7b1fb1bddbec0e00f5d6336393951 Parents: 4c48593 Author: Reynold XinAuthored: Wed Sep 23 19:53:56 2015 -0700 Committer: Reynold Xin Committed: Wed Sep 23 19:53:56 2015 -0700 -- CHANGES.txt | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c8a3d663/CHANGES.txt -- diff --git a/CHANGES.txt b/CHANGES.txt index 58c5764..449afa6 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -3,15 +3,15 @@ Spark Change Log Release 1.5.1 - Bump R version - Reynold Xin - 2015-09-23 19:44:09 -0700 - Commit: 1c60fc1 + [SPARK-10692] [STREAMING] Expose failureReasons in BatchInfo for streaming UI to clear failed batches + zsxwing , Tathagata Das + 2015-09-23 19:52:02 -0700 + Commit: 4c48593, github.com/apache/spark/pull/8892 Update branch-1.5 for 1.5.1 release. Reynold Xin - 2015-09-23 18:14:36 -0700 - Commit: a32934c + 2015-09-23 19:46:13 -0700 + Commit: 1000b5d, github.com/apache/spark/pull/8890 [SPARK-10474] [SQL] Aggregation fails to allocate memory for pointer array (round 2) Andrew Or - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[1/2] spark git commit: Preparing Spark release v1.5.1-rc1
Repository: spark Updated Branches: refs/heads/branch-1.5 c8a3d6630 -> cdc4ac003 Preparing Spark release v1.5.1-rc1 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/20db8186 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/20db8186 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/20db8186 Branch: refs/heads/branch-1.5 Commit: 20db8186dcd40b8d986a78ce2c9d594ae9f2e476 Parents: c8a3d66 Author: Patrick WendellAuthored: Wed Sep 23 19:55:19 2015 -0700 Committer: Patrick Wendell Committed: Wed Sep 23 19:55:19 2015 -0700 -- assembly/pom.xml| 2 +- bagel/pom.xml | 2 +- core/pom.xml| 2 +- examples/pom.xml| 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml | 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml | 2 +- external/mqtt-assembly/pom.xml | 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml| 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml | 2 +- extras/kinesis-asl-assembly/pom.xml | 2 +- extras/kinesis-asl/pom.xml | 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- launcher/pom.xml| 2 +- mllib/pom.xml | 2 +- network/common/pom.xml | 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml| 2 +- pom.xml | 2 +- repl/pom.xml| 2 +- sql/catalyst/pom.xml| 2 +- sql/core/pom.xml| 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml| 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- unsafe/pom.xml | 2 +- yarn/pom.xml| 2 +- 33 files changed, 33 insertions(+), 33 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/20db8186/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 7b41ebb..03d4973 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1-SNAPSHOT +1.5.1 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/20db8186/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index 16bf17c..6f058ff 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1-SNAPSHOT +1.5.1 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/20db8186/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index beb547f..f32ce5d 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1-SNAPSHOT +1.5.1 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/20db8186/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index 3926b79..f28847e 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1-SNAPSHOT +1.5.1 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/20db8186/external/flume-assembly/pom.xml -- diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml index 5eda12d..e7bd0d2 100644 --- a/external/flume-assembly/pom.xml +++ b/external/flume-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1-SNAPSHOT +1.5.1 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/20db8186/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index 33f2cd7..e5a5503 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1-SNAPSHOT +1.5.1 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/20db8186/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index 670c783..b5e9423
[2/2] spark git commit: Preparing development version 1.5.2-SNAPSHOT
Preparing development version 1.5.2-SNAPSHOT Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cdc4ac00 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cdc4ac00 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cdc4ac00 Branch: refs/heads/branch-1.5 Commit: cdc4ac0035c9786e0d90710f7c08cf37496da525 Parents: 20db818 Author: Patrick WendellAuthored: Wed Sep 23 19:55:27 2015 -0700 Committer: Patrick Wendell Committed: Wed Sep 23 19:55:27 2015 -0700 -- assembly/pom.xml| 2 +- bagel/pom.xml | 2 +- core/pom.xml| 2 +- examples/pom.xml| 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml | 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml | 2 +- external/mqtt-assembly/pom.xml | 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml| 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml | 2 +- extras/kinesis-asl-assembly/pom.xml | 2 +- extras/kinesis-asl/pom.xml | 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- launcher/pom.xml| 2 +- mllib/pom.xml | 2 +- network/common/pom.xml | 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml| 2 +- pom.xml | 2 +- repl/pom.xml| 2 +- sql/catalyst/pom.xml| 2 +- sql/core/pom.xml| 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml| 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- unsafe/pom.xml | 2 +- yarn/pom.xml| 2 +- 33 files changed, 33 insertions(+), 33 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/cdc4ac00/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 03d4973..7671ba2 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1 +1.5.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/cdc4ac00/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index 6f058ff..02e920d 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1 +1.5.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/cdc4ac00/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index f32ce5d..03d26df 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1 +1.5.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/cdc4ac00/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index f28847e..eb1910e 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1 +1.5.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/cdc4ac00/external/flume-assembly/pom.xml -- diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml index e7bd0d2..0de2f03 100644 --- a/external/flume-assembly/pom.xml +++ b/external/flume-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1 +1.5.2-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/cdc4ac00/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index e5a5503..66ab1b2 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.5.1 +1.5.2-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/cdc4ac00/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index b5e9423..c058490 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.5.1-rc1 [created] 20db8186d - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-10699] [ML] Support checkpointInterval can be disabled
Repository: spark Updated Branches: refs/heads/master ce2b056d3 -> 067afb4e9 [SPARK-10699] [ML] Support checkpointInterval can be disabled Currently use can set ```checkpointInterval``` to specify how often should the cache be check-pointed. But we also need the function that users can disable it. This PR supports that users can disable checkpoint if user setting ```checkpointInterval = -1```. We also add documents for GBT ```cacheNodeIds``` to make users can understand more clearly about checkpoint. Author: Yanbo LiangCloses #8820 from yanboliang/spark-10699. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/067afb4e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/067afb4e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/067afb4e Branch: refs/heads/master Commit: 067afb4e9bb227f159bcbc2aafafce9693303ea9 Parents: ce2b056 Author: Yanbo Liang Authored: Wed Sep 23 16:41:42 2015 -0700 Committer: Joseph K. Bradley Committed: Wed Sep 23 16:41:42 2015 -0700 -- .../spark/ml/classification/DecisionTreeClassifier.scala | 1 - .../org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala | 6 +++--- .../scala/org/apache/spark/ml/param/shared/sharedParams.scala | 4 ++-- .../main/scala/org/apache/spark/ml/recommendation/ALS.scala| 2 +- .../main/scala/org/apache/spark/ml/tree/impl/NodeIdCache.scala | 2 +- mllib/src/main/scala/org/apache/spark/ml/tree/treeParams.scala | 4 ++-- 6 files changed, 9 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/067afb4e/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala index a6f6d46..b0157f7 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala @@ -19,7 +19,6 @@ package org.apache.spark.ml.classification import org.apache.spark.annotation.Experimental import org.apache.spark.ml.param.ParamMap -import org.apache.spark.ml.param.shared.HasCheckpointInterval import org.apache.spark.ml.tree.{DecisionTreeModel, DecisionTreeParams, Node, TreeClassifierParams} import org.apache.spark.ml.tree.impl.RandomForest import org.apache.spark.ml.util.{Identifiable, MetadataUtils} http://git-wip-us.apache.org/repos/asf/spark/blob/067afb4e/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala index 8049d51..8cb6b54 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala @@ -56,9 +56,9 @@ private[shared] object SharedParamsCodeGen { ParamDesc[String]("inputCol", "input column name"), ParamDesc[Array[String]]("inputCols", "input column names"), ParamDesc[String]("outputCol", "output column name", Some("uid + \"__output\"")), - ParamDesc[Int]("checkpointInterval", "checkpoint interval (>= 1). E.g. 10 means that " + -"the cache will get checkpointed every 10 iterations.", -isValid = "ParamValidators.gtEq(1)"), + ParamDesc[Int]("checkpointInterval", "set checkpoint interval (>= 1) or " + +"disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed " + +"every 10 iterations", isValid = "(interval: Int) => interval == -1 || interval >= 1"), ParamDesc[Boolean]("fitIntercept", "whether to fit an intercept term", Some("true")), ParamDesc[String]("handleInvalid", "how to handle invalid entries. Options are skip (which " + "will filter out rows with bad values), or error (which will throw an errror). More " + http://git-wip-us.apache.org/repos/asf/spark/blob/067afb4e/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala index aff47fc..e362521 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/sharedParams.scala +++
spark git commit: [SPARK-10731] [SQL] Delegate to Scala's DataFrame.take implementation in Python DataFrame.
Repository: spark Updated Branches: refs/heads/master 067afb4e9 -> 995221774 [SPARK-10731] [SQL] Delegate to Scala's DataFrame.take implementation in Python DataFrame. Python DataFrame.head/take now requires scanning all the partitions. This pull request changes them to delegate the actual implementation to Scala DataFrame (by calling DataFrame.take). This is more of a hack for fixing this issue in 1.5.1. A more proper fix is to change executeCollect and executeTake to return InternalRow rather than Row, and thus eliminate the extra round-trip conversion. Author: Reynold XinCloses #8876 from rxin/SPARK-10731. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/99522177 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/99522177 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/99522177 Branch: refs/heads/master Commit: 9952217749118ae78fe794ca11e1c4a87a4ae8ba Parents: 067afb4 Author: Reynold Xin Authored: Wed Sep 23 16:43:21 2015 -0700 Committer: Reynold Xin Committed: Wed Sep 23 16:43:21 2015 -0700 -- .../org/apache/spark/api/python/PythonRDD.scala | 2 +- python/pyspark/sql/dataframe.py | 5 +- .../org/apache/spark/sql/execution/python.scala | 417 +++ .../apache/spark/sql/execution/pythonUDFs.scala | 405 -- .../apache/spark/sql/test/ExamplePointUDT.scala | 16 +- 5 files changed, 429 insertions(+), 416 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/99522177/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala -- diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala index 19be093..8464b57 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala @@ -633,7 +633,7 @@ private[spark] object PythonRDD extends Logging { * * The thread will terminate after all the data are sent or any exceptions happen. */ - private def serveIterator[T](items: Iterator[T], threadName: String): Int = { + def serveIterator[T](items: Iterator[T], threadName: String): Int = { val serverSocket = new ServerSocket(0, 1, InetAddress.getByName("localhost")) // Close the socket if no connection in 3 seconds serverSocket.setSoTimeout(3000) http://git-wip-us.apache.org/repos/asf/spark/blob/99522177/python/pyspark/sql/dataframe.py -- diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 80f8d8a..b09422a 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -300,7 +300,10 @@ class DataFrame(object): >>> df.take(2) [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')] """ -return self.limit(num).collect() +with SCCallSiteSync(self._sc) as css: +port = self._sc._jvm.org.apache.spark.sql.execution.EvaluatePython.takeAndServe( +self._jdf, num) +return list(_load_from_socket(port, BatchedSerializer(PickleSerializer( @ignore_unicode_prefix @since(1.3) http://git-wip-us.apache.org/repos/asf/spark/blob/99522177/sql/core/src/main/scala/org/apache/spark/sql/execution/python.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python.scala new file mode 100644 index 000..d6aaf42 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python.scala @@ -0,0 +1,417 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +*http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.spark.sql.execution + +import java.io.OutputStream +import java.util.{List => JList, Map => JMap} + +import
spark git commit: [SPARK-10731] [SQL] Delegate to Scala's DataFrame.take implementation in Python DataFrame.
Repository: spark Updated Branches: refs/heads/branch-1.5 64cc62cb5 -> 7564c2493 [SPARK-10731] [SQL] Delegate to Scala's DataFrame.take implementation in Python DataFrame. Python DataFrame.head/take now requires scanning all the partitions. This pull request changes them to delegate the actual implementation to Scala DataFrame (by calling DataFrame.take). This is more of a hack for fixing this issue in 1.5.1. A more proper fix is to change executeCollect and executeTake to return InternalRow rather than Row, and thus eliminate the extra round-trip conversion. Author: Reynold XinCloses #8876 from rxin/SPARK-10731. (cherry picked from commit 9952217749118ae78fe794ca11e1c4a87a4ae8ba) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7564c249 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7564c249 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7564c249 Branch: refs/heads/branch-1.5 Commit: 7564c249333da2b68c8f9c519ad84f81aec0002d Parents: 64cc62c Author: Reynold Xin Authored: Wed Sep 23 16:43:21 2015 -0700 Committer: Reynold Xin Committed: Wed Sep 23 16:43:34 2015 -0700 -- .../org/apache/spark/api/python/PythonRDD.scala | 2 +- python/pyspark/sql/dataframe.py | 5 +- .../org/apache/spark/sql/execution/python.scala | 412 +++ .../apache/spark/sql/execution/pythonUDFs.scala | 400 -- .../apache/spark/sql/test/ExamplePointUDT.scala | 16 +- 5 files changed, 424 insertions(+), 411 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7564c249/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala -- diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala index 8a48202..af86ef5 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala @@ -633,7 +633,7 @@ private[spark] object PythonRDD extends Logging { * * The thread will terminate after all the data are sent or any exceptions happen. */ - private def serveIterator[T](items: Iterator[T], threadName: String): Int = { + def serveIterator[T](items: Iterator[T], threadName: String): Int = { val serverSocket = new ServerSocket(0, 1, InetAddress.getByName("localhost")) // Close the socket if no connection in 3 seconds serverSocket.setSoTimeout(3000) http://git-wip-us.apache.org/repos/asf/spark/blob/7564c249/python/pyspark/sql/dataframe.py -- diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 025811f..ee2eed2 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -300,7 +300,10 @@ class DataFrame(object): >>> df.take(2) [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')] """ -return self.limit(num).collect() +with SCCallSiteSync(self._sc) as css: +port = self._sc._jvm.org.apache.spark.sql.execution.EvaluatePython.takeAndServe( +self._jdf, num) +return list(_load_from_socket(port, BatchedSerializer(PickleSerializer( @ignore_unicode_prefix @since(1.3) http://git-wip-us.apache.org/repos/asf/spark/blob/7564c249/sql/core/src/main/scala/org/apache/spark/sql/execution/python.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python.scala new file mode 100644 index 000..c967074 --- /dev/null +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python.scala @@ -0,0 +1,412 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +*http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package
spark git commit: Update branch-1.3 for 1.3.2 release.
Repository: spark Updated Branches: refs/heads/branch-1.3 e54525f4a -> 392875ad3 Update branch-1.3 for 1.3.2 release. Author: Reynold XinCloses #8894 from rxin/branch-1.3. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/392875ad Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/392875ad Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/392875ad Branch: refs/heads/branch-1.3 Commit: 392875ad3a3c3f5acd98edaf3b53045ac3e7 Parents: e54525f Author: Reynold Xin Authored: Wed Sep 23 18:46:32 2015 -0700 Committer: Reynold Xin Committed: Wed Sep 23 18:46:32 2015 -0700 -- CHANGES.txt | 643 +++ .../main/scala/org/apache/spark/package.scala | 2 +- dev/create-release/generate-changelist.py | 4 +- docs/_config.yml| 4 +- ec2/spark_ec2.py| 1 + 5 files changed, 649 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/392875ad/CHANGES.txt -- diff --git a/CHANGES.txt b/CHANGES.txt index 7da0244..97116be 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,6 +1,649 @@ Spark Change Log +Release 1.3.2 + + [SPARK-10381] Fix mixup of taskAttemptNumber & attemptId in OutputCommitCoordinator (branch-1.3 backport) + Josh Rosen + 2015-09-22 13:37:25 -0700 + Commit: e54525f, github.com/apache/spark/pull/8790 + + [SPARK-10657] Remove SCP-based Jenkins log archiving + Josh Rosen + 2015-09-17 11:40:24 -0700 + Commit: 64730a3, github.com/apache/spark/pull/8793 + + [SPARK-10642] [PYSPARK] Fix crash when calling rdd.lookup() on tuple keys + Liang-Chi Hsieh + 2015-09-17 10:02:15 -0700 + Commit: 7494034, github.com/apache/spark/pull/8796 + + [SPARK-10556] Remove explicit Scala version for sbt project build files + Ahir Reddy + 2015-09-11 13:06:14 +0100 + Commit: 8c8d7ab, github.com/apache/spark/pull/8709 + + [SPARK-6931] [PYSPARK] Cast Python time float values to int before serialization + Bryan Cutler + 2015-09-10 11:20:02 -0700 + Commit: d0d7ada, github.com/apache/spark/pull/8594 + + [MINOR] [MLLIB] [ML] [DOC] fixed typo: label for negative result should be 0.0 (original: 1.0) + Sean Paradiso + 2015-09-09 22:09:33 -0700 + Commit: 9fcd831, github.com/apache/spark/pull/8680 + + [SPARK-10353] [MLLIB] (1.3 backport) BLAS gemm not scaling when beta = 0.0 for some subset of matrix multiplications + Sean Owen + 2015-09-02 13:33:24 -0700 + Commit: 29836e2, github.com/apache/spark/pull/8572 + + [SPARK-100354] [MLLIB] fix some apparent memory issues in k-means|| initializaiton + Xiangrui Meng + 2015-08-30 23:20:03 -0700 + Commit: a58c1af, github.com/apache/spark/pull/8526 + + [SPARK-8400] [ML] Added check in ml.ALS for positive block size parameter setting + Bryan Cutler + 2015-08-25 12:36:49 +0100 + Commit: e8b0564, github.com/apache/spark/pull/8363 + + [SPARK-10169] [SQL] [BRANCH-1.3] Partial aggregation's plan is wrong when a grouping expression is used as an argument of the aggregate fucntion + Wenchen Fan , Yin Huai + 2015-08-24 13:00:49 -0700 + Commit: 3d2eaf0, github.com/apache/spark/pull/8380 + + [SPARK-9801] [STREAMING] Check if file exists before deleting temporary files. + Hao Zhu + 2015-08-10 17:17:22 -0700 + Commit: a98603f, github.com/apache/spark/pull/8082 + + [SPARK-9633] [BUILD] SBT download locations outdated; need an update + Sean Owen + 2015-08-06 23:43:52 +0100 + Commit: b104501, github.com/apache/spark/pull/7956 + + [SPARK-9607] [SPARK-9608] fix zinc-port handling in build/mvn + Ryan Williams + 2015-08-05 11:10:47 +0100 + Commit: 384793d, github.com/apache/spark/pull/7944 + + [SPARK-3190] [GRAPHX] Fix VertexRDD.count() overflow regression + Ankur Dave + 2015-08-03 23:07:32 -0700 + Commit: cd5d1be, github.com/apache/spark/pull/7923 + + [SPARK-7563] (backport for 1.3) OutputCommitCoordinator.stop() should only run on the driver + Sean Owen + 2015-08-03 13:59:00 +0100 + Commit: 265ec35, github.com/apache/spark/pull/7865 + + [SPARK-9254] [BUILD] [HOTFIX] sbt-launch-lib.bash should support HTTP/HTTPS redirection + Cheng Lian + 2015-07-22 09:32:42 -0700 + Commit: cc5f711, github.com/apache/spark/pull/7597 + + [SPARK-9507] [BUILD]
spark git commit: Update branch-1.4 for 1.4.2 release.
Repository: spark Updated Branches: refs/heads/branch-1.4 df9e39470 -> 8887abb06 Update branch-1.4 for 1.4.2 release. Author: Reynold XinCloses #8891 from rxin/release-1.4.2. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8887abb0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8887abb0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8887abb0 Branch: refs/heads/branch-1.4 Commit: 8887abb06a01c8a264998457c5dfc8cf713675dd Parents: df9e394 Author: Reynold Xin Authored: Wed Sep 23 18:53:55 2015 -0700 Committer: Reynold Xin Committed: Wed Sep 23 18:53:55 2015 -0700 -- core/src/main/scala/org/apache/spark/package.scala | 2 +- dev/create-release/generate-changelist.py | 4 ++-- docs/_config.yml | 4 ++-- ec2/spark_ec2.py | 2 ++ 4 files changed, 7 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8887abb0/core/src/main/scala/org/apache/spark/package.scala -- diff --git a/core/src/main/scala/org/apache/spark/package.scala b/core/src/main/scala/org/apache/spark/package.scala index 8f01174..c8d8e7c 100644 --- a/core/src/main/scala/org/apache/spark/package.scala +++ b/core/src/main/scala/org/apache/spark/package.scala @@ -43,5 +43,5 @@ package org.apache package object spark { // For package docs only - val SPARK_VERSION = "1.4.1" + val SPARK_VERSION = "1.4.2" } http://git-wip-us.apache.org/repos/asf/spark/blob/8887abb0/dev/create-release/generate-changelist.py -- diff --git a/dev/create-release/generate-changelist.py b/dev/create-release/generate-changelist.py index 148ed72..64b27cb 100755 --- a/dev/create-release/generate-changelist.py +++ b/dev/create-release/generate-changelist.py @@ -31,8 +31,8 @@ import time import traceback SPARK_HOME = os.environ["SPARK_HOME"] -NEW_RELEASE_VERSION = "1.4.1" -PREV_RELEASE_GIT_TAG = "v1.4.0" +NEW_RELEASE_VERSION = "1.4.2" +PREV_RELEASE_GIT_TAG = "v1.4.1" CHANGELIST = "CHANGES.txt" OLD_CHANGELIST = "%s.old" % (CHANGELIST) http://git-wip-us.apache.org/repos/asf/spark/blob/8887abb0/docs/_config.yml -- diff --git a/docs/_config.yml b/docs/_config.yml index 83f56e8..d1e0d0a 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -14,8 +14,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 1.4.1 -SPARK_VERSION_SHORT: 1.4.1 +SPARK_VERSION: 1.4.2 +SPARK_VERSION_SHORT: 1.4.2 SCALA_BINARY_VERSION: "2.10" SCALA_VERSION: "2.10.4" MESOS_VERSION: 0.21.0 http://git-wip-us.apache.org/repos/asf/spark/blob/8887abb0/ec2/spark_ec2.py -- diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 3880c2d..b186287 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -72,6 +72,7 @@ VALID_SPARK_VERSIONS = set([ "1.3.1", "1.4.0", "1.4.1", +"1.4.2" ]) SPARK_TACHYON_MAP = { @@ -86,6 +87,7 @@ SPARK_TACHYON_MAP = { "1.3.1": "0.5.0", "1.4.0": "0.6.4", "1.4.1": "0.6.4", +"1.4.2": "0.6.4" } DEFAULT_SPARK_VERSION = SPARK_EC2_VERSION - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/2] spark git commit: [SPARK-6028][Core]A new RPC implemetation based on the network module
[SPARK-6028][Core]A new RPC implemetation based on the network module Design doc: https://docs.google.com/document/d/1CF5G6rGVQMKSyV_QKo4D2M-x6rxz5x1Ew7aK3Uq6u8c/edit?usp=sharing Author: zsxwingCloses #6457 from zsxwing/new-rpc. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/084e4e12 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/084e4e12 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/084e4e12 Branch: refs/heads/master Commit: 084e4e126211d74a79e8dbd2d0e604dd3c650822 Parents: 9952217 Author: zsxwing Authored: Wed Sep 23 18:59:49 2015 -0700 Committer: Reynold Xin Committed: Wed Sep 23 18:59:49 2015 -0700 -- .../org/apache/spark/MapOutputTracker.scala | 2 +- .../main/scala/org/apache/spark/SparkEnv.scala | 20 +- .../org/apache/spark/deploy/worker/Worker.scala | 2 +- .../spark/deploy/worker/WorkerWatcher.scala | 13 +- .../org/apache/spark/rpc/RpcCallContext.scala | 2 +- .../org/apache/spark/rpc/RpcEndpoint.scala | 51 +- .../rpc/RpcEndpointNotFoundException.scala | 22 + .../scala/org/apache/spark/rpc/RpcEnv.scala | 7 +- .../org/apache/spark/rpc/akka/AkkaRpcEnv.scala | 6 +- .../org/apache/spark/rpc/netty/Dispatcher.scala | 218 .../org/apache/spark/rpc/netty/IDVerifier.scala | 39 ++ .../org/apache/spark/rpc/netty/Inbox.scala | 220 .../spark/rpc/netty/NettyRpcAddress.scala | 56 +++ .../spark/rpc/netty/NettyRpcCallContext.scala | 87 .../apache/spark/rpc/netty/NettyRpcEnv.scala| 504 +++ .../storage/BlockManagerSlaveEndpoint.scala | 6 +- .../org/apache/spark/util/ThreadUtils.scala | 6 +- .../apache/spark/MapOutputTrackerSuite.scala| 10 +- .../org/apache/spark/SSLSampleConfigs.scala | 2 + .../deploy/worker/WorkerWatcherSuite.scala | 6 +- .../org/apache/spark/rpc/RpcEnvSuite.scala | 78 ++- .../org/apache/spark/rpc/TestRpcEndpoint.scala | 123 + .../org/apache/spark/rpc/netty/InboxSuite.scala | 148 ++ .../spark/rpc/netty/NettyRpcAddressSuite.scala | 29 ++ .../spark/rpc/netty/NettyRpcEnvSuite.scala | 38 ++ .../spark/rpc/netty/NettyRpcHandlerSuite.scala | 67 +++ .../spark/network/client/TransportClient.java | 4 + .../apache/spark/network/server/RpcHandler.java | 2 + .../network/server/TransportRequestHandler.java | 1 + .../streaming/scheduler/ReceiverTracker.scala | 2 +- .../spark/deploy/yarn/ApplicationMaster.scala | 5 +- 31 files changed, 1708 insertions(+), 68 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/084e4e12/core/src/main/scala/org/apache/spark/MapOutputTracker.scala -- diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala index 94eb8da..e380c5b 100644 --- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala +++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala @@ -45,7 +45,7 @@ private[spark] class MapOutputTrackerMasterEndpoint( override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = { case GetMapOutputStatuses(shuffleId: Int) => - val hostPort = context.sender.address.hostPort + val hostPort = context.senderAddress.hostPort logInfo("Asked to send map output locations for shuffle " + shuffleId + " to " + hostPort) val mapOutputStatuses = tracker.getSerializedMapOutputStatuses(shuffleId) val serializedSize = mapOutputStatuses.size http://git-wip-us.apache.org/repos/asf/spark/blob/084e4e12/core/src/main/scala/org/apache/spark/SparkEnv.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala index c6fef7f..cfde27f 100644 --- a/core/src/main/scala/org/apache/spark/SparkEnv.scala +++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala @@ -20,11 +20,10 @@ package org.apache.spark import java.io.File import java.net.Socket -import akka.actor.ActorSystem - import scala.collection.mutable import scala.util.Properties +import akka.actor.ActorSystem import com.google.common.collect.MapMaker import org.apache.spark.annotation.DeveloperApi @@ -41,7 +40,7 @@ import org.apache.spark.serializer.Serializer import org.apache.spark.shuffle.{ShuffleMemoryManager, ShuffleManager} import org.apache.spark.storage._ import org.apache.spark.unsafe.memory.{ExecutorMemoryManager, MemoryAllocator} -import org.apache.spark.util.{RpcUtils, Utils} +import org.apache.spark.util.{AkkaUtils, RpcUtils, Utils} /** * :: DeveloperApi :: @@
[2/2] spark git commit: Preparing development version 1.3.3-SNAPSHOT
Preparing development version 1.3.3-SNAPSHOT Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9f4b926d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9f4b926d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9f4b926d Branch: refs/heads/branch-1.3 Commit: 9f4b926d4748203ba58a55568e7e397e8e431651 Parents: 5a13975 Author: Patrick WendellAuthored: Wed Sep 23 18:59:20 2015 -0700 Committer: Patrick Wendell Committed: Wed Sep 23 18:59:20 2015 -0700 -- assembly/pom.xml | 2 +- bagel/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml| 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml | 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml| 2 +- extras/kinesis-asl/pom.xml| 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- mllib/pom.xml | 2 +- network/common/pom.xml| 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- yarn/pom.xml | 2 +- 28 files changed, 28 insertions(+), 28 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9f4b926d/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 1703d8a..5f03f2f 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.3.2 +1.3.3-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/9f4b926d/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index 29e2fc1..5a2066e 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.3.2 +1.3.3-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/9f4b926d/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index 491e98c..206f2d5 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.3.2 +1.3.3-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/9f4b926d/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index 669b19c..2da1ce5 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.3.2 +1.3.3-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/9f4b926d/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index dd4c1d7..0623b5b 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.3.2 +1.3.3-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/9f4b926d/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index e4c31c1..b9ede7b 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.3.2 +1.3.3-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/9f4b926d/external/kafka-assembly/pom.xml -- diff --git a/external/kafka-assembly/pom.xml b/external/kafka-assembly/pom.xml index 4716a78..2ca540a 100644 --- a/external/kafka-assembly/pom.xml +++ b/external/kafka-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.3.2 +1.3.3-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/9f4b926d/external/kafka/pom.xml -- diff
[1/2] spark git commit: [SPARK-6028][Core]A new RPC implemetation based on the network module
Repository: spark Updated Branches: refs/heads/master 995221774 -> 084e4e126 http://git-wip-us.apache.org/repos/asf/spark/blob/084e4e12/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcHandlerSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcHandlerSuite.scala b/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcHandlerSuite.scala new file mode 100644 index 000..06ca035 --- /dev/null +++ b/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcHandlerSuite.scala @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.rpc.netty + +import java.net.InetSocketAddress + +import io.netty.channel.Channel +import org.mockito.Mockito._ +import org.mockito.Matchers._ + +import org.apache.spark.SparkFunSuite +import org.apache.spark.network.client.{TransportResponseHandler, TransportClient} +import org.apache.spark.rpc._ + +class NettyRpcHandlerSuite extends SparkFunSuite { + + val env = mock(classOf[NettyRpcEnv]) + when(env.deserialize(any(classOf[Array[Byte]]))(any())). +thenReturn(RequestMessage(RpcAddress("localhost", 12345), null, null, false)) + + test("receive") { +val dispatcher = mock(classOf[Dispatcher]) +val nettyRpcHandler = new NettyRpcHandler(dispatcher, env) + +val channel = mock(classOf[Channel]) +val client = new TransportClient(channel, mock(classOf[TransportResponseHandler])) +when(channel.remoteAddress()).thenReturn(new InetSocketAddress("localhost", 4)) +nettyRpcHandler.receive(client, null, null) + +when(channel.remoteAddress()).thenReturn(new InetSocketAddress("localhost", 40001)) +nettyRpcHandler.receive(client, null, null) + +verify(dispatcher, times(1)).broadcastMessage(Associated(RpcAddress("localhost", 12345))) + } + + test("connectionTerminated") { +val dispatcher = mock(classOf[Dispatcher]) +val nettyRpcHandler = new NettyRpcHandler(dispatcher, env) + +val channel = mock(classOf[Channel]) +val client = new TransportClient(channel, mock(classOf[TransportResponseHandler])) +when(channel.remoteAddress()).thenReturn(new InetSocketAddress("localhost", 4)) +nettyRpcHandler.receive(client, null, null) + +when(channel.remoteAddress()).thenReturn(new InetSocketAddress("localhost", 4)) +nettyRpcHandler.connectionTerminated(client) + +verify(dispatcher, times(1)).broadcastMessage(Associated(RpcAddress("localhost", 12345))) +verify(dispatcher, times(1)).broadcastMessage(Disassociated(RpcAddress("localhost", 12345))) + } + +} http://git-wip-us.apache.org/repos/asf/spark/blob/084e4e12/network/common/src/main/java/org/apache/spark/network/client/TransportClient.java -- diff --git a/network/common/src/main/java/org/apache/spark/network/client/TransportClient.java b/network/common/src/main/java/org/apache/spark/network/client/TransportClient.java index df84128..fbb8bb6 100644 --- a/network/common/src/main/java/org/apache/spark/network/client/TransportClient.java +++ b/network/common/src/main/java/org/apache/spark/network/client/TransportClient.java @@ -78,6 +78,10 @@ public class TransportClient implements Closeable { this.handler = Preconditions.checkNotNull(handler); } + public Channel getChannel() { +return channel; + } + public boolean isActive() { return channel.isOpen() || channel.isActive(); } http://git-wip-us.apache.org/repos/asf/spark/blob/084e4e12/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java -- diff --git a/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java b/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java index 2ba92a4..dbb7f95 100644 --- a/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java +++ b/network/common/src/main/java/org/apache/spark/network/server/RpcHandler.java @@ -52,4 +52,6 @@ public abstract class RpcHandler { * No further requests will come from this client. */ public
[1/2] spark git commit: Preparing Spark release v1.3.2-rc1
Repository: spark Updated Branches: refs/heads/branch-1.3 392875ad3 -> 9f4b926d4 Preparing Spark release v1.3.2-rc1 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5a139750 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5a139750 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5a139750 Branch: refs/heads/branch-1.3 Commit: 5a139750bea6cf4b56c432cbcb02584094997695 Parents: 392875a Author: Patrick WendellAuthored: Wed Sep 23 18:59:15 2015 -0700 Committer: Patrick Wendell Committed: Wed Sep 23 18:59:15 2015 -0700 -- assembly/pom.xml | 2 +- bagel/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml| 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml | 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml| 2 +- extras/kinesis-asl/pom.xml| 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- mllib/pom.xml | 2 +- network/common/pom.xml| 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- yarn/pom.xml | 2 +- 28 files changed, 28 insertions(+), 28 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5a139750/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 0952cd2..1703d8a 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.3.2-SNAPSHOT +1.3.2 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/5a139750/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index ea3a71a..29e2fc1 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.3.2-SNAPSHOT +1.3.2 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/5a139750/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index 7d67942..491e98c 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.3.2-SNAPSHOT +1.3.2 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/5a139750/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index e1a3ecc..669b19c 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.3.2-SNAPSHOT +1.3.2 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/5a139750/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index f46a2a0..dd4c1d7 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.3.2-SNAPSHOT +1.3.2 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/5a139750/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index 56e85e3..e4c31c1 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.3.2-SNAPSHOT +1.3.2 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/5a139750/external/kafka-assembly/pom.xml -- diff --git a/external/kafka-assembly/pom.xml b/external/kafka-assembly/pom.xml index 23ca5d2..4716a78 100644 --- a/external/kafka-assembly/pom.xml +++ b/external/kafka-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.3.2-SNAPSHOT +1.3.2 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/5a139750/external/kafka/pom.xml
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.3.2-rc1 [created] 5a139750b - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.5.1-rc1 [deleted] 4f894dd69 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org