git commit: [SPARK-3139] Made ContextCleaner to not block on shuffles
Repository: spark Updated Branches: refs/heads/master 9d65f2712 -> 3e2864e40 [SPARK-3139] Made ContextCleaner to not block on shuffles As a workaround for SPARK-3015, the ContextCleaner was made "blocking", that is, it cleaned items one-by-one. But shuffles can take a long time to be deleted. Given that the RC for 1.1 is imminent, this PR makes a narrow change in the context cleaner - not wait for shuffle cleanups to complete. Also it changes the error messages on failure to delete to be milder warnings, as exceptions in the delete code path for one item does not really stop the actual functioning of the system. Author: Tathagata Das Closes #2143 from tdas/cleaner-shuffle-fix and squashes the following commits: 9c84202 [Tathagata Das] Restoring default blocking behavior in ContextCleanerSuite, and added docs to identify that spark.cleaner.referenceTracking.blocking does not control shuffle. 2181329 [Tathagata Das] Mark shuffle cleanup as non-blocking. e337cc2 [Tathagata Das] Changed semantics based on PR comments. 387b578 [Tathagata Das] Made ContextCleaner to not block on shuffles Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3e2864e4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3e2864e4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3e2864e4 Branch: refs/heads/master Commit: 3e2864e40472b32e6a7eec5ba3bc83562d2a1a62 Parents: 9d65f27 Author: Tathagata Das Authored: Wed Aug 27 00:13:38 2014 -0700 Committer: Patrick Wendell Committed: Wed Aug 27 00:13:38 2014 -0700 -- .../scala/org/apache/spark/ContextCleaner.scala | 18 -- .../apache/spark/storage/BlockManagerMaster.scala | 12 +++- .../org/apache/spark/ContextCleanerSuite.scala| 3 +++ 3 files changed, 26 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3e2864e4/core/src/main/scala/org/apache/spark/ContextCleaner.scala -- diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala b/core/src/main/scala/org/apache/spark/ContextCleaner.scala index 3848734..ede1e23 100644 --- a/core/src/main/scala/org/apache/spark/ContextCleaner.scala +++ b/core/src/main/scala/org/apache/spark/ContextCleaner.scala @@ -65,7 +65,8 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging { private val cleaningThread = new Thread() { override def run() { keepCleaning() }} /** - * Whether the cleaning thread will block on cleanup tasks. + * Whether the cleaning thread will block on cleanup tasks (other than shuffle, which + * is controlled by the `spark.cleaner.referenceTracking.blocking.shuffle` parameter). * * Due to SPARK-3015, this is set to true by default. This is intended to be only a temporary * workaround for the issue, which is ultimately caused by the way the BlockManager actors @@ -76,6 +77,19 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging { private val blockOnCleanupTasks = sc.conf.getBoolean( "spark.cleaner.referenceTracking.blocking", true) + /** + * Whether the cleaning thread will block on shuffle cleanup tasks. + * + * When context cleaner is configured to block on every delete request, it can throw timeout + * exceptions on cleanup of shuffle blocks, as reported in SPARK-3139. To avoid that, this + * parameter by default disables blocking on shuffle cleanups. Note that this does not affect + * the cleanup of RDDs and broadcasts. This is intended to be a temporary workaround, + * until the real Akka issue (referred to in the comment above `blockOnCleanupTasks`) is + * resolved. + */ + private val blockOnShuffleCleanupTasks = sc.conf.getBoolean( +"spark.cleaner.referenceTracking.blocking.shuffle", false) + @volatile private var stopped = false /** Attach a listener object to get information of when objects are cleaned. */ @@ -128,7 +142,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging { case CleanRDD(rddId) => doCleanupRDD(rddId, blocking = blockOnCleanupTasks) case CleanShuffle(shuffleId) => - doCleanupShuffle(shuffleId, blocking = blockOnCleanupTasks) + doCleanupShuffle(shuffleId, blocking = blockOnShuffleCleanupTasks) case CleanBroadcast(broadcastId) => doCleanupBroadcast(broadcastId, blocking = blockOnCleanupTasks) } http://git-wip-us.apache.org/repos/asf/spark/blob/3e2864e4/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala -- diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala b/core/src/main/sc
git commit: [SPARK-3139] Made ContextCleaner to not block on shuffles
Repository: spark Updated Branches: refs/heads/branch-1.1 6f82a4b13 -> 5cf1e4401 [SPARK-3139] Made ContextCleaner to not block on shuffles As a workaround for SPARK-3015, the ContextCleaner was made "blocking", that is, it cleaned items one-by-one. But shuffles can take a long time to be deleted. Given that the RC for 1.1 is imminent, this PR makes a narrow change in the context cleaner - not wait for shuffle cleanups to complete. Also it changes the error messages on failure to delete to be milder warnings, as exceptions in the delete code path for one item does not really stop the actual functioning of the system. Author: Tathagata Das Closes #2143 from tdas/cleaner-shuffle-fix and squashes the following commits: 9c84202 [Tathagata Das] Restoring default blocking behavior in ContextCleanerSuite, and added docs to identify that spark.cleaner.referenceTracking.blocking does not control shuffle. 2181329 [Tathagata Das] Mark shuffle cleanup as non-blocking. e337cc2 [Tathagata Das] Changed semantics based on PR comments. 387b578 [Tathagata Das] Made ContextCleaner to not block on shuffles (cherry picked from commit 3e2864e40472b32e6a7eec5ba3bc83562d2a1a62) Signed-off-by: Patrick Wendell Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5cf1e440 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5cf1e440 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5cf1e440 Branch: refs/heads/branch-1.1 Commit: 5cf1e440137006eedd6846ac8fa57ccf9fd1958d Parents: 6f82a4b Author: Tathagata Das Authored: Wed Aug 27 00:13:38 2014 -0700 Committer: Patrick Wendell Committed: Wed Aug 27 00:17:37 2014 -0700 -- .../scala/org/apache/spark/ContextCleaner.scala | 18 -- .../apache/spark/storage/BlockManagerMaster.scala | 12 +++- .../org/apache/spark/ContextCleanerSuite.scala| 3 +++ 3 files changed, 26 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5cf1e440/core/src/main/scala/org/apache/spark/ContextCleaner.scala -- diff --git a/core/src/main/scala/org/apache/spark/ContextCleaner.scala b/core/src/main/scala/org/apache/spark/ContextCleaner.scala index 3848734..ede1e23 100644 --- a/core/src/main/scala/org/apache/spark/ContextCleaner.scala +++ b/core/src/main/scala/org/apache/spark/ContextCleaner.scala @@ -65,7 +65,8 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging { private val cleaningThread = new Thread() { override def run() { keepCleaning() }} /** - * Whether the cleaning thread will block on cleanup tasks. + * Whether the cleaning thread will block on cleanup tasks (other than shuffle, which + * is controlled by the `spark.cleaner.referenceTracking.blocking.shuffle` parameter). * * Due to SPARK-3015, this is set to true by default. This is intended to be only a temporary * workaround for the issue, which is ultimately caused by the way the BlockManager actors @@ -76,6 +77,19 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging { private val blockOnCleanupTasks = sc.conf.getBoolean( "spark.cleaner.referenceTracking.blocking", true) + /** + * Whether the cleaning thread will block on shuffle cleanup tasks. + * + * When context cleaner is configured to block on every delete request, it can throw timeout + * exceptions on cleanup of shuffle blocks, as reported in SPARK-3139. To avoid that, this + * parameter by default disables blocking on shuffle cleanups. Note that this does not affect + * the cleanup of RDDs and broadcasts. This is intended to be a temporary workaround, + * until the real Akka issue (referred to in the comment above `blockOnCleanupTasks`) is + * resolved. + */ + private val blockOnShuffleCleanupTasks = sc.conf.getBoolean( +"spark.cleaner.referenceTracking.blocking.shuffle", false) + @volatile private var stopped = false /** Attach a listener object to get information of when objects are cleaned. */ @@ -128,7 +142,7 @@ private[spark] class ContextCleaner(sc: SparkContext) extends Logging { case CleanRDD(rddId) => doCleanupRDD(rddId, blocking = blockOnCleanupTasks) case CleanShuffle(shuffleId) => - doCleanupShuffle(shuffleId, blocking = blockOnCleanupTasks) + doCleanupShuffle(shuffleId, blocking = blockOnShuffleCleanupTasks) case CleanBroadcast(broadcastId) => doCleanupBroadcast(broadcastId, blocking = blockOnCleanupTasks) } http://git-wip-us.apache.org/repos/asf/spark/blob/5cf1e440/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
git commit: [SPARK-3237][SQL] Fix parquet filters with UDFs
Repository: spark Updated Branches: refs/heads/master 3e2864e40 -> e1139dd60 [SPARK-3237][SQL] Fix parquet filters with UDFs Author: Michael Armbrust Closes #2153 from marmbrus/parquetFilters and squashes the following commits: 712731a [Michael Armbrust] Use closure serializer for sending filters. 1e83f80 [Michael Armbrust] Clean udf functions. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e1139dd6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e1139dd6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e1139dd6 Branch: refs/heads/master Commit: e1139dd60e0692e8adb1337c1f605165ce4b8895 Parents: 3e2864e Author: Michael Armbrust Authored: Wed Aug 27 00:59:23 2014 -0700 Committer: Michael Armbrust Committed: Wed Aug 27 00:59:23 2014 -0700 -- .../org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala | 4 .../scala/org/apache/spark/sql/parquet/ParquetFilters.scala | 8 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e1139dd6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala index 63ac2a6..0b3c1df 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala @@ -18,10 +18,14 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.types.DataType +import org.apache.spark.util.ClosureCleaner case class ScalaUdf(function: AnyRef, dataType: DataType, children: Seq[Expression]) extends Expression { + // Clean function when not called with default no-arg constructor. + if (function != null) { ClosureCleaner.clean(function) } + type EvaluatedType = Any def nullable = true http://git-wip-us.apache.org/repos/asf/spark/blob/e1139dd6/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala index 2298a9b..fe28e0d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.parquet +import java.nio.ByteBuffer + import org.apache.hadoop.conf.Configuration import parquet.filter._ @@ -25,6 +27,7 @@ import parquet.column.ColumnReader import com.google.common.io.BaseEncoding +import org.apache.spark.SparkEnv import org.apache.spark.sql.catalyst.types._ import org.apache.spark.sql.catalyst.expressions.{Predicate => CatalystPredicate} import org.apache.spark.sql.catalyst.expressions._ @@ -237,7 +240,8 @@ object ParquetFilters { */ def serializeFilterExpressions(filters: Seq[Expression], conf: Configuration): Unit = { if (filters.length > 0) { - val serialized: Array[Byte] = SparkSqlSerializer.serialize(filters) + val serialized: Array[Byte] = +SparkEnv.get.closureSerializer.newInstance().serialize(filters).array() val encoded: String = BaseEncoding.base64().encode(serialized) conf.set(PARQUET_FILTER_DATA, encoded) } @@ -252,7 +256,7 @@ object ParquetFilters { val data = conf.get(PARQUET_FILTER_DATA) if (data != null) { val decoded: Array[Byte] = BaseEncoding.base64().decode(data) - SparkSqlSerializer.deserialize(decoded) + SparkEnv.get.closureSerializer.newInstance().deserialize(ByteBuffer.wrap(decoded)) } else { Seq() } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-3237][SQL] Fix parquet filters with UDFs
Repository: spark Updated Branches: refs/heads/branch-1.1 5cf1e4401 -> ca01de1b9 [SPARK-3237][SQL] Fix parquet filters with UDFs Author: Michael Armbrust Closes #2153 from marmbrus/parquetFilters and squashes the following commits: 712731a [Michael Armbrust] Use closure serializer for sending filters. 1e83f80 [Michael Armbrust] Clean udf functions. (cherry picked from commit e1139dd60e0692e8adb1337c1f605165ce4b8895) Signed-off-by: Michael Armbrust Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ca01de1b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ca01de1b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ca01de1b Branch: refs/heads/branch-1.1 Commit: ca01de1b98ae17d9f85dbd07e3546c985061c8a5 Parents: 5cf1e44 Author: Michael Armbrust Authored: Wed Aug 27 00:59:23 2014 -0700 Committer: Michael Armbrust Committed: Wed Aug 27 00:59:54 2014 -0700 -- .../org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala | 4 .../scala/org/apache/spark/sql/parquet/ParquetFilters.scala | 8 ++-- 2 files changed, 10 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ca01de1b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala index 63ac2a6..0b3c1df 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala @@ -18,10 +18,14 @@ package org.apache.spark.sql.catalyst.expressions import org.apache.spark.sql.catalyst.types.DataType +import org.apache.spark.util.ClosureCleaner case class ScalaUdf(function: AnyRef, dataType: DataType, children: Seq[Expression]) extends Expression { + // Clean function when not called with default no-arg constructor. + if (function != null) { ClosureCleaner.clean(function) } + type EvaluatedType = Any def nullable = true http://git-wip-us.apache.org/repos/asf/spark/blob/ca01de1b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala index 2298a9b..fe28e0d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetFilters.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.parquet +import java.nio.ByteBuffer + import org.apache.hadoop.conf.Configuration import parquet.filter._ @@ -25,6 +27,7 @@ import parquet.column.ColumnReader import com.google.common.io.BaseEncoding +import org.apache.spark.SparkEnv import org.apache.spark.sql.catalyst.types._ import org.apache.spark.sql.catalyst.expressions.{Predicate => CatalystPredicate} import org.apache.spark.sql.catalyst.expressions._ @@ -237,7 +240,8 @@ object ParquetFilters { */ def serializeFilterExpressions(filters: Seq[Expression], conf: Configuration): Unit = { if (filters.length > 0) { - val serialized: Array[Byte] = SparkSqlSerializer.serialize(filters) + val serialized: Array[Byte] = +SparkEnv.get.closureSerializer.newInstance().serialize(filters).array() val encoded: String = BaseEncoding.base64().encode(serialized) conf.set(PARQUET_FILTER_DATA, encoded) } @@ -252,7 +256,7 @@ object ParquetFilters { val data = conf.get(PARQUET_FILTER_DATA) if (data != null) { val decoded: Array[Byte] = BaseEncoding.base64().decode(data) - SparkSqlSerializer.deserialize(decoded) + SparkEnv.get.closureSerializer.newInstance().deserialize(ByteBuffer.wrap(decoded)) } else { Seq() } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-2830][MLLIB] doc update for 1.1
Repository: spark Updated Branches: refs/heads/master e1139dd60 -> 43dfc84f8 [SPARK-2830][MLLIB] doc update for 1.1 1. renamed mllib-basics to mllib-data-types 1. renamed mllib-stats to mllib-statistics 1. moved random data generation to the bottom of mllib-stats 1. updated toc accordingly atalwalkar Author: Xiangrui Meng Closes #2151 from mengxr/mllib-doc-1.1 and squashes the following commits: 0bd79f3 [Xiangrui Meng] add mllib-data-types b64a5d7 [Xiangrui Meng] update the content list of basis statistics in mllib-guide f625cc2 [Xiangrui Meng] move mllib-basics to mllib-data-types 4d69250 [Xiangrui Meng] move random data generation to the bottom of statistics e64f3ce [Xiangrui Meng] move mllib-stats.md to mllib-statistics.md Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/43dfc84f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/43dfc84f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/43dfc84f Branch: refs/heads/master Commit: 43dfc84f883822ea27b6e312d4353bf301c2e7ef Parents: e1139dd Author: Xiangrui Meng Authored: Wed Aug 27 01:19:48 2014 -0700 Committer: Xiangrui Meng Committed: Wed Aug 27 01:19:48 2014 -0700 -- docs/mllib-basics.md | 468 docs/mllib-data-types.md | 468 docs/mllib-dimensionality-reduction.md | 4 +- docs/mllib-guide.md| 9 +- docs/mllib-statistics.md | 457 +++ docs/mllib-stats.md| 457 --- 6 files changed, 932 insertions(+), 931 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/43dfc84f/docs/mllib-basics.md -- diff --git a/docs/mllib-basics.md b/docs/mllib-basics.md deleted file mode 100644 index 8752df4..000 --- a/docs/mllib-basics.md +++ /dev/null @@ -1,468 +0,0 @@ -layout: global -title: Basics - MLlib -displayTitle: MLlib - Basics - -* Table of contents -{:toc} - -MLlib supports local vectors and matrices stored on a single machine, -as well as distributed matrices backed by one or more RDDs. -Local vectors and local matrices are simple data models -that serve as public interfaces. The underlying linear algebra operations are provided by -[Breeze](http://www.scalanlp.org/) and [jblas](http://jblas.org/). -A training example used in supervised learning is called a "labeled point" in MLlib. - -## Local vector - -A local vector has integer-typed and 0-based indices and double-typed values, stored on a single -machine. MLlib supports two types of local vectors: dense and sparse. A dense vector is backed by -a double array representing its entry values, while a sparse vector is backed by two parallel -arrays: indices and values. For example, a vector `(1.0, 0.0, 3.0)` can be represented in dense -format as `[1.0, 0.0, 3.0]` or in sparse format as `(3, [0, 2], [1.0, 3.0])`, where `3` is the size -of the vector. - - - - -The base class of local vectors is -[`Vector`](api/scala/index.html#org.apache.spark.mllib.linalg.Vector), and we provide two -implementations: [`DenseVector`](api/scala/index.html#org.apache.spark.mllib.linalg.DenseVector) and -[`SparseVector`](api/scala/index.html#org.apache.spark.mllib.linalg.SparseVector). We recommend -using the factory methods implemented in -[`Vectors`](api/scala/index.html#org.apache.spark.mllib.linalg.Vector) to create local vectors. - -{% highlight scala %} -import org.apache.spark.mllib.linalg.{Vector, Vectors} - -// Create a dense vector (1.0, 0.0, 3.0). -val dv: Vector = Vectors.dense(1.0, 0.0, 3.0) -// Create a sparse vector (1.0, 0.0, 3.0) by specifying its indices and values corresponding to nonzero entries. -val sv1: Vector = Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0)) -// Create a sparse vector (1.0, 0.0, 3.0) by specifying its nonzero entries. -val sv2: Vector = Vectors.sparse(3, Seq((0, 1.0), (2, 3.0))) -{% endhighlight %} - -***Note:*** -Scala imports `scala.collection.immutable.Vector` by default, so you have to import -`org.apache.spark.mllib.linalg.Vector` explicitly to use MLlib's `Vector`. - - - - - -The base class of local vectors is -[`Vector`](api/java/org/apache/spark/mllib/linalg/Vector.html), and we provide two -implementations: [`DenseVector`](api/java/org/apache/spark/mllib/linalg/DenseVector.html) and -[`SparseVector`](api/java/org/apache/spark/mllib/linalg/SparseVector.html). We recommend -using the factory methods implemented in -[`Vectors`](api/java/org/apache/spark/mllib/linalg/Vector.html) to create local vectors. - -{% highlight java %} -import org.apache.spark.mllib.linalg.Vector; -import org.apache.spark.mllib.linalg.Vectors; - -// Create a dense vector (1.0
git commit: [SPARK-2830][MLLIB] doc update for 1.1
Repository: spark Updated Branches: refs/heads/branch-1.1 ca01de1b9 -> 74012475b [SPARK-2830][MLLIB] doc update for 1.1 1. renamed mllib-basics to mllib-data-types 1. renamed mllib-stats to mllib-statistics 1. moved random data generation to the bottom of mllib-stats 1. updated toc accordingly atalwalkar Author: Xiangrui Meng Closes #2151 from mengxr/mllib-doc-1.1 and squashes the following commits: 0bd79f3 [Xiangrui Meng] add mllib-data-types b64a5d7 [Xiangrui Meng] update the content list of basis statistics in mllib-guide f625cc2 [Xiangrui Meng] move mllib-basics to mllib-data-types 4d69250 [Xiangrui Meng] move random data generation to the bottom of statistics e64f3ce [Xiangrui Meng] move mllib-stats.md to mllib-statistics.md (cherry picked from commit 43dfc84f883822ea27b6e312d4353bf301c2e7ef) Signed-off-by: Xiangrui Meng Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/74012475 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/74012475 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/74012475 Branch: refs/heads/branch-1.1 Commit: 74012475bf19ceb5caca2eaa90b7c8e3fdfaaf8f Parents: ca01de1 Author: Xiangrui Meng Authored: Wed Aug 27 01:19:48 2014 -0700 Committer: Xiangrui Meng Committed: Wed Aug 27 01:20:07 2014 -0700 -- docs/mllib-basics.md | 468 docs/mllib-data-types.md | 468 docs/mllib-dimensionality-reduction.md | 4 +- docs/mllib-guide.md| 9 +- docs/mllib-statistics.md | 457 +++ docs/mllib-stats.md| 457 --- 6 files changed, 932 insertions(+), 931 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/74012475/docs/mllib-basics.md -- diff --git a/docs/mllib-basics.md b/docs/mllib-basics.md deleted file mode 100644 index 8752df4..000 --- a/docs/mllib-basics.md +++ /dev/null @@ -1,468 +0,0 @@ -layout: global -title: Basics - MLlib -displayTitle: MLlib - Basics - -* Table of contents -{:toc} - -MLlib supports local vectors and matrices stored on a single machine, -as well as distributed matrices backed by one or more RDDs. -Local vectors and local matrices are simple data models -that serve as public interfaces. The underlying linear algebra operations are provided by -[Breeze](http://www.scalanlp.org/) and [jblas](http://jblas.org/). -A training example used in supervised learning is called a "labeled point" in MLlib. - -## Local vector - -A local vector has integer-typed and 0-based indices and double-typed values, stored on a single -machine. MLlib supports two types of local vectors: dense and sparse. A dense vector is backed by -a double array representing its entry values, while a sparse vector is backed by two parallel -arrays: indices and values. For example, a vector `(1.0, 0.0, 3.0)` can be represented in dense -format as `[1.0, 0.0, 3.0]` or in sparse format as `(3, [0, 2], [1.0, 3.0])`, where `3` is the size -of the vector. - - - - -The base class of local vectors is -[`Vector`](api/scala/index.html#org.apache.spark.mllib.linalg.Vector), and we provide two -implementations: [`DenseVector`](api/scala/index.html#org.apache.spark.mllib.linalg.DenseVector) and -[`SparseVector`](api/scala/index.html#org.apache.spark.mllib.linalg.SparseVector). We recommend -using the factory methods implemented in -[`Vectors`](api/scala/index.html#org.apache.spark.mllib.linalg.Vector) to create local vectors. - -{% highlight scala %} -import org.apache.spark.mllib.linalg.{Vector, Vectors} - -// Create a dense vector (1.0, 0.0, 3.0). -val dv: Vector = Vectors.dense(1.0, 0.0, 3.0) -// Create a sparse vector (1.0, 0.0, 3.0) by specifying its indices and values corresponding to nonzero entries. -val sv1: Vector = Vectors.sparse(3, Array(0, 2), Array(1.0, 3.0)) -// Create a sparse vector (1.0, 0.0, 3.0) by specifying its nonzero entries. -val sv2: Vector = Vectors.sparse(3, Seq((0, 1.0), (2, 3.0))) -{% endhighlight %} - -***Note:*** -Scala imports `scala.collection.immutable.Vector` by default, so you have to import -`org.apache.spark.mllib.linalg.Vector` explicitly to use MLlib's `Vector`. - - - - - -The base class of local vectors is -[`Vector`](api/java/org/apache/spark/mllib/linalg/Vector.html), and we provide two -implementations: [`DenseVector`](api/java/org/apache/spark/mllib/linalg/DenseVector.html) and -[`SparseVector`](api/java/org/apache/spark/mllib/linalg/SparseVector.html). We recommend -using the factory methods implemented in -[`Vectors`](api/java/org/apache/spark/mllib/linalg/Vector.html) to create local vectors. - -{% highlight java %} -import org.apach
git commit: [SPARK-3227] [mllib] Added migration guide for v1.0 to v1.1
Repository: spark Updated Branches: refs/heads/branch-1.1 74012475b -> 7286d5707 [SPARK-3227] [mllib] Added migration guide for v1.0 to v1.1 The only updates are in DecisionTree. CC: mengxr Author: Joseph K. Bradley Closes #2146 from jkbradley/mllib-migration and squashes the following commits: 5a1f487 [Joseph K. Bradley] small edit to doc 411d6d9 [Joseph K. Bradley] Added migration guide for v1.0 to v1.1. The only updates are in DecisionTree. (cherry picked from commit 171a41cb034f4ea80f6a3c91a6872970de16a14a) Signed-off-by: Xiangrui Meng Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7286d570 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7286d570 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7286d570 Branch: refs/heads/branch-1.1 Commit: 7286d5707af69d0acfc010f0458626c30f6aef0a Parents: 7401247 Author: Joseph K. Bradley Authored: Wed Aug 27 01:45:59 2014 -0700 Committer: Xiangrui Meng Committed: Wed Aug 27 01:46:24 2014 -0700 -- docs/mllib-guide.md | 28 +++- 1 file changed, 27 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7286d570/docs/mllib-guide.md -- diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md index d3a510b..94fc98c 100644 --- a/docs/mllib-guide.md +++ b/docs/mllib-guide.md @@ -60,6 +60,32 @@ To use MLlib in Python, you will need [NumPy](http://www.numpy.org) version 1.4 # Migration Guide +## From 1.0 to 1.1 + +The only API changes in MLlib v1.1 are in +[`DecisionTree`](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree), +which continues to be an experimental API in MLlib 1.1: + +1. *(Breaking change)* The meaning of tree depth has been changed by 1 in order to match +the implementations of trees in +[scikit-learn](http://scikit-learn.org/stable/modules/classes.html#module-sklearn.tree) +and in [rpart](http://cran.r-project.org/web/packages/rpart/index.html). +In MLlib v1.0, a depth-1 tree had 1 leaf node, and a depth-2 tree had 1 root node and 2 leaf nodes. +In MLlib v1.1, a depth-0 tree has 1 leaf node, and a depth-1 tree has 1 root node and 2 leaf nodes. +This depth is specified by the `maxDepth` parameter in +[`Strategy`](api/scala/index.html#org.apache.spark.mllib.tree.configuration.Strategy) +or via [`DecisionTree`](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree) +static `trainClassifier` and `trainRegressor` methods. + +2. *(Non-breaking change)* We recommend using the newly added `trainClassifier` and `trainRegressor` +methods to build a [`DecisionTree`](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree), +rather than using the old parameter class `Strategy`. These new training methods explicitly +separate classification and regression, and they replace specialized parameter types with +simple `String` types. + +Examples of the new, recommended `trainClassifier` and `trainRegressor` are given in the +[Decision Trees Guide](mllib-decision-tree.html#examples). + ## From 0.9 to 1.0 In MLlib v1.0, we support both dense and sparse input in a unified way, which introduces a few @@ -85,7 +111,7 @@ val vector: Vector = Vectors.dense(array) // a dense vector [`Vectors`](api/scala/index.html#org.apache.spark.mllib.linalg.Vectors$) provides factory methods to create sparse vectors. -*Note*. Scala imports `scala.collection.immutable.Vector` by default, so you have to import `org.apache.spark.mllib.linalg.Vector` explicitly to use MLlib's `Vector`. +*Note*: Scala imports `scala.collection.immutable.Vector` by default, so you have to import `org.apache.spark.mllib.linalg.Vector` explicitly to use MLlib's `Vector`. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-3227] [mllib] Added migration guide for v1.0 to v1.1
Repository: spark Updated Branches: refs/heads/master 43dfc84f8 -> 171a41cb0 [SPARK-3227] [mllib] Added migration guide for v1.0 to v1.1 The only updates are in DecisionTree. CC: mengxr Author: Joseph K. Bradley Closes #2146 from jkbradley/mllib-migration and squashes the following commits: 5a1f487 [Joseph K. Bradley] small edit to doc 411d6d9 [Joseph K. Bradley] Added migration guide for v1.0 to v1.1. The only updates are in DecisionTree. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/171a41cb Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/171a41cb Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/171a41cb Branch: refs/heads/master Commit: 171a41cb034f4ea80f6a3c91a6872970de16a14a Parents: 43dfc84 Author: Joseph K. Bradley Authored: Wed Aug 27 01:45:59 2014 -0700 Committer: Xiangrui Meng Committed: Wed Aug 27 01:45:59 2014 -0700 -- docs/mllib-guide.md | 28 +++- 1 file changed, 27 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/171a41cb/docs/mllib-guide.md -- diff --git a/docs/mllib-guide.md b/docs/mllib-guide.md index d3a510b..94fc98c 100644 --- a/docs/mllib-guide.md +++ b/docs/mllib-guide.md @@ -60,6 +60,32 @@ To use MLlib in Python, you will need [NumPy](http://www.numpy.org) version 1.4 # Migration Guide +## From 1.0 to 1.1 + +The only API changes in MLlib v1.1 are in +[`DecisionTree`](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree), +which continues to be an experimental API in MLlib 1.1: + +1. *(Breaking change)* The meaning of tree depth has been changed by 1 in order to match +the implementations of trees in +[scikit-learn](http://scikit-learn.org/stable/modules/classes.html#module-sklearn.tree) +and in [rpart](http://cran.r-project.org/web/packages/rpart/index.html). +In MLlib v1.0, a depth-1 tree had 1 leaf node, and a depth-2 tree had 1 root node and 2 leaf nodes. +In MLlib v1.1, a depth-0 tree has 1 leaf node, and a depth-1 tree has 1 root node and 2 leaf nodes. +This depth is specified by the `maxDepth` parameter in +[`Strategy`](api/scala/index.html#org.apache.spark.mllib.tree.configuration.Strategy) +or via [`DecisionTree`](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree) +static `trainClassifier` and `trainRegressor` methods. + +2. *(Non-breaking change)* We recommend using the newly added `trainClassifier` and `trainRegressor` +methods to build a [`DecisionTree`](api/scala/index.html#org.apache.spark.mllib.tree.DecisionTree), +rather than using the old parameter class `Strategy`. These new training methods explicitly +separate classification and regression, and they replace specialized parameter types with +simple `String` types. + +Examples of the new, recommended `trainClassifier` and `trainRegressor` are given in the +[Decision Trees Guide](mllib-decision-tree.html#examples). + ## From 0.9 to 1.0 In MLlib v1.0, we support both dense and sparse input in a unified way, which introduces a few @@ -85,7 +111,7 @@ val vector: Vector = Vectors.dense(array) // a dense vector [`Vectors`](api/scala/index.html#org.apache.spark.mllib.linalg.Vectors$) provides factory methods to create sparse vectors. -*Note*. Scala imports `scala.collection.immutable.Vector` by default, so you have to import `org.apache.spark.mllib.linalg.Vector` explicitly to use MLlib's `Vector`. +*Note*: Scala imports `scala.collection.immutable.Vector` by default, so you have to import `org.apache.spark.mllib.linalg.Vector` explicitly to use MLlib's `Vector`. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-3154][STREAMING] Make FlumePollingInputDStream shutdown cleaner.
Repository: spark Updated Branches: refs/heads/branch-1.1 7286d5707 -> 1d468df33 [SPARK-3154][STREAMING] Make FlumePollingInputDStream shutdown cleaner. Currently lot of errors get thrown from Avro IPC layer when the dstream or sink is shutdown. This PR cleans it up. Some refactoring is done in the receiver code to put all of the RPC code into a single Try and just recover from that. The sink code has also been cleaned up. Author: Hari Shreedharan Closes #2065 from harishreedharan/clean-flume-shutdown and squashes the following commits: f93a07c [Hari Shreedharan] Formatting fixes. d7427cc [Hari Shreedharan] More fixes! a0a8852 [Hari Shreedharan] Fix race condition, hopefully! Minor other changes. 4c9ed02 [Hari Shreedharan] Remove unneeded list in Callback handler. Other misc changes. 8fee36f [Hari Shreedharan] Scala-library is required, else maven build fails. Also catch InterruptedException in TxnProcessor. 445e700 [Hari Shreedharan] Merge remote-tracking branch 'asf/master' into clean-flume-shutdown 87232e0 [Hari Shreedharan] Refactor Flume Input Stream. Clean up code, better error handling. 9001d26 [Hari Shreedharan] Change log level to debug in TransactionProcessor#shutdown method e7b8d82 [Hari Shreedharan] Incorporate review feedback 598efa7 [Hari Shreedharan] Clean up some exception handling code e1027c6 [Hari Shreedharan] Merge remote-tracking branch 'asf/master' into clean-flume-shutdown ed608c8 [Hari Shreedharan] [SPARK-3154][STREAMING] Make FlumePollingInputDStream shutdown cleaner. (cherry picked from commit 6f671d04fa98f97fd48c5e749b9f47dd4a8b4f44) Signed-off-by: Tathagata Das Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1d468df3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1d468df3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1d468df3 Branch: refs/heads/branch-1.1 Commit: 1d468df33c7b8680af12fcdb66ed91f48c80cae3 Parents: 7286d57 Author: Hari Shreedharan Authored: Wed Aug 27 02:39:02 2014 -0700 Committer: Tathagata Das Committed: Wed Aug 27 02:39:21 2014 -0700 -- external/flume-sink/pom.xml | 4 + .../flume/sink/SparkAvroCallbackHandler.scala | 56 +-- .../flume/sink/TransactionProcessor.scala | 18 +- .../streaming/flume/FlumeBatchFetcher.scala | 167 +++ .../flume/FlumePollingInputDStream.scala| 77 ++--- 5 files changed, 236 insertions(+), 86 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1d468df3/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index a297459..17d0fe2 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -71,6 +71,10 @@ test + org.scala-lang + scala-library + +
git commit: [SPARK-3154][STREAMING] Make FlumePollingInputDStream shutdown cleaner.
Repository: spark Updated Branches: refs/heads/master 171a41cb0 -> 6f671d04f [SPARK-3154][STREAMING] Make FlumePollingInputDStream shutdown cleaner. Currently lot of errors get thrown from Avro IPC layer when the dstream or sink is shutdown. This PR cleans it up. Some refactoring is done in the receiver code to put all of the RPC code into a single Try and just recover from that. The sink code has also been cleaned up. Author: Hari Shreedharan Closes #2065 from harishreedharan/clean-flume-shutdown and squashes the following commits: f93a07c [Hari Shreedharan] Formatting fixes. d7427cc [Hari Shreedharan] More fixes! a0a8852 [Hari Shreedharan] Fix race condition, hopefully! Minor other changes. 4c9ed02 [Hari Shreedharan] Remove unneeded list in Callback handler. Other misc changes. 8fee36f [Hari Shreedharan] Scala-library is required, else maven build fails. Also catch InterruptedException in TxnProcessor. 445e700 [Hari Shreedharan] Merge remote-tracking branch 'asf/master' into clean-flume-shutdown 87232e0 [Hari Shreedharan] Refactor Flume Input Stream. Clean up code, better error handling. 9001d26 [Hari Shreedharan] Change log level to debug in TransactionProcessor#shutdown method e7b8d82 [Hari Shreedharan] Incorporate review feedback 598efa7 [Hari Shreedharan] Clean up some exception handling code e1027c6 [Hari Shreedharan] Merge remote-tracking branch 'asf/master' into clean-flume-shutdown ed608c8 [Hari Shreedharan] [SPARK-3154][STREAMING] Make FlumePollingInputDStream shutdown cleaner. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6f671d04 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6f671d04 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6f671d04 Branch: refs/heads/master Commit: 6f671d04fa98f97fd48c5e749b9f47dd4a8b4f44 Parents: 171a41c Author: Hari Shreedharan Authored: Wed Aug 27 02:39:02 2014 -0700 Committer: Tathagata Das Committed: Wed Aug 27 02:39:02 2014 -0700 -- external/flume-sink/pom.xml | 4 + .../flume/sink/SparkAvroCallbackHandler.scala | 56 +-- .../flume/sink/TransactionProcessor.scala | 18 +- .../streaming/flume/FlumeBatchFetcher.scala | 167 +++ .../flume/FlumePollingInputDStream.scala| 77 ++--- 5 files changed, 236 insertions(+), 86 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6f671d04/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index c1e8e65..b345276 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -71,6 +71,10 @@ test + org.scala-lang + scala-library + +
[1/2] [SPARK-2933] [yarn] Refactor and cleanup Yarn AM code.
Repository: spark Updated Branches: refs/heads/master 6f671d04f -> b92d823ad http://git-wip-us.apache.org/repos/asf/spark/blob/b92d823a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala index 3474112..d162b4c 100644 --- a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala +++ b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientClusterScheduler.scala @@ -19,22 +19,21 @@ package org.apache.spark.scheduler.cluster import org.apache.spark._ import org.apache.hadoop.conf.Configuration -import org.apache.spark.deploy.yarn.YarnAllocationHandler +import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil import org.apache.spark.scheduler.TaskSchedulerImpl import org.apache.spark.util.Utils /** - * - * This scheduler launches executors through Yarn - by calling into Client to launch ExecutorLauncher as AM. + * This scheduler launches executors through Yarn - by calling into Client to launch the Spark AM. */ -private[spark] class YarnClientClusterScheduler(sc: SparkContext, conf: Configuration) extends TaskSchedulerImpl(sc) { +private[spark] class YarnClientClusterScheduler(sc: SparkContext, conf: Configuration) + extends TaskSchedulerImpl(sc) { def this(sc: SparkContext) = this(sc, new Configuration()) // By default, rack is unknown override def getRackForHost(hostPort: String): Option[String] = { val host = Utils.parseHostPort(hostPort)._1 -val retval = YarnAllocationHandler.lookupRack(conf, host) -if (retval != null) Some(retval) else None +Option(YarnSparkHadoopUtil.lookupRack(conf, host)) } } http://git-wip-us.apache.org/repos/asf/spark/blob/b92d823a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala index 833e249..a5f537d 100644 --- a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala +++ b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala @@ -19,7 +19,7 @@ package org.apache.spark.scheduler.cluster import org.apache.hadoop.yarn.api.records.{ApplicationId, YarnApplicationState} import org.apache.spark.{SparkException, Logging, SparkContext} -import org.apache.spark.deploy.yarn.{Client, ClientArguments, ExecutorLauncher, YarnSparkHadoopUtil} +import org.apache.spark.deploy.yarn.{Client, ClientArguments, YarnSparkHadoopUtil} import org.apache.spark.scheduler.TaskSchedulerImpl import scala.collection.mutable.ArrayBuffer @@ -60,10 +60,7 @@ private[spark] class YarnClientSchedulerBackend( val argsArrayBuf = new ArrayBuffer[String]() argsArrayBuf += ( - "--class", "notused", - "--jar", null, // The primary jar will be added dynamically in SparkContext. - "--args", hostport, - "--am-class", classOf[ExecutorLauncher].getName + "--args", hostport ) // process any optional arguments, given either as environment variables http://git-wip-us.apache.org/repos/asf/spark/blob/b92d823a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala -- diff --git a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala index 9aeca4a..69f4022 100644 --- a/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala +++ b/yarn/common/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterScheduler.scala @@ -18,16 +18,17 @@ package org.apache.spark.scheduler.cluster import org.apache.spark._ -import org.apache.spark.deploy.yarn.{ApplicationMaster, YarnAllocationHandler} +import org.apache.spark.deploy.yarn.{ApplicationMaster, YarnSparkHadoopUtil} import org.apache.spark.scheduler.TaskSchedulerImpl import org.apache.spark.util.Utils import org.apache.hadoop.conf.Configuration /** - * - * This is a simple extension to ClusterScheduler - to ensure that appropriate initialization of ApplicationMaster, etc is done + * This is a simple extension to ClusterScheduler - to ensure that appropriate initialization of + * ApplicationMaster, etc is done */ -private[spark] class YarnClusterScheduler(sc: SparkContext, conf: Configuration) extends TaskSchedulerImpl(sc) { +private
[2/2] git commit: [SPARK-2933] [yarn] Refactor and cleanup Yarn AM code.
[SPARK-2933] [yarn] Refactor and cleanup Yarn AM code. This change modifies the Yarn module so that all the logic related to running the ApplicationMaster is localized. Instead of, previously, 4 different classes with mostly identical code, now we have: - A single, shared ApplicationMaster class, which can operate both in client and cluster mode, and substitutes the old ApplicationMaster (for cluster mode) and ExecutorLauncher (for client mode). The benefit here is that all different execution modes for all supported yarn versions use the same shared code for monitoring executor allocation, setting up configuration, and monitoring the process's lifecycle. - A new YarnRMClient interface, which defines basic RM functionality needed by the ApplicationMaster. This interface has concrete implementations for each supported Yarn version. - A new YarnAllocator interface, which just abstracts the existing interface of the YarnAllocationHandler class. This is to avoid having to touch the allocator code too much in this change, although it might benefit from a similar effort in the future. The end result is much easier to understand code, with much less duplication, making it much easier to fix bugs, add features, and test everything knowing that all supported versions will behave the same. Author: Marcelo Vanzin Closes #2020 from vanzin/SPARK-2933 and squashes the following commits: 3bbf3e7 [Marcelo Vanzin] Merge branch 'master' into SPARK-2933 ff389ed [Marcelo Vanzin] Do not interrupt reporter thread from within itself. 3a8ed37 [Marcelo Vanzin] Remote stale comment. 0f5142c [Marcelo Vanzin] Review feedback. 41f8c8a [Marcelo Vanzin] Fix app status reporting. c0794be [Marcelo Vanzin] Correctly clean up staging directory. 92770cc [Marcelo Vanzin] Merge branch 'master' into SPARK-2933 ecaf332 [Marcelo Vanzin] Small fix to shutdown code. f02d3f8 [Marcelo Vanzin] Merge branch 'master' into SPARK-2933 f581122 [Marcelo Vanzin] Review feedback. 557fdeb [Marcelo Vanzin] Cleanup a couple more constants. be6068d [Marcelo Vanzin] Restore shutdown hook to clean up staging dir. 5150993 [Marcelo Vanzin] Some more cleanup. b6289ab [Marcelo Vanzin] Move cluster/client code to separate methods. ecb23cd [Marcelo Vanzin] More trivial cleanup. 34f1e63 [Marcelo Vanzin] Fix some questionable error handling. 5657c7d [Marcelo Vanzin] Finish app if SparkContext initialization times out. 0e4be3d [Marcelo Vanzin] Keep "ExecutorLauncher" as the main class for client-mode AM. 91beabb [Marcelo Vanzin] Fix UI filter registration. 8c72239 [Marcelo Vanzin] Trivial cleanups. 99a52d5 [Marcelo Vanzin] Changes to the yarn-alpha project to use common AM code. 848ca6d [Marcelo Vanzin] [SPARK-2933] [yarn] Refactor and cleanup Yarn AM code. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b92d823a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b92d823a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b92d823a Branch: refs/heads/master Commit: b92d823ad13f6fcc325eeb99563bea543871c6aa Parents: 6f671d0 Author: Marcelo Vanzin Authored: Wed Aug 27 11:02:04 2014 -0500 Committer: Thomas Graves Committed: Wed Aug 27 11:02:04 2014 -0500 -- .../spark/deploy/yarn/ApplicationMaster.scala | 453 --- .../spark/deploy/yarn/ExecutorLauncher.scala| 315 - .../deploy/yarn/YarnAllocationHandler.scala | 192 ++-- .../spark/deploy/yarn/YarnRMClientImpl.scala| 103 + .../spark/deploy/yarn/ApplicationMaster.scala | 430 ++ .../yarn/ApplicationMasterArguments.scala | 26 +- .../spark/deploy/yarn/ClientArguments.scala | 9 +- .../apache/spark/deploy/yarn/ClientBase.scala | 54 ++- .../spark/deploy/yarn/YarnAllocator.scala | 34 ++ .../apache/spark/deploy/yarn/YarnRMClient.scala | 67 +++ .../spark/deploy/yarn/YarnSparkHadoopUtil.scala | 51 +++ .../cluster/YarnClientClusterScheduler.scala| 11 +- .../cluster/YarnClientSchedulerBackend.scala| 7 +- .../cluster/YarnClusterScheduler.scala | 17 +- .../spark/deploy/yarn/ApplicationMaster.scala | 413 - .../spark/deploy/yarn/ExecutorLauncher.scala| 276 --- .../deploy/yarn/YarnAllocationHandler.scala | 196 ++-- .../spark/deploy/yarn/YarnRMClientImpl.scala| 76 18 files changed, 892 insertions(+), 1838 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b92d823a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala -- diff --git a/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/alpha/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala deleted file mode 100644 index
git commit: [SPARK-3170][CORE][BUG]:RDD info loss in "StorageTab" and "ExecutorTab"
Repository: spark Updated Branches: refs/heads/master b92d823ad -> d8298c46b [SPARK-3170][CORE][BUG]:RDD info loss in "StorageTab" and "ExecutorTab" compeleted stage only need to remove its own partitions that are no longer cached. However, "StorageTab" may lost some rdds which are cached actually. Not only in "StorageTab", "ExectutorTab" may also lose some rdd info which have been overwritten by last rdd in a same task. 1. "StorageTab": when multiple stages run simultaneously, completed stage will remove rdd info which belong to other stages that are still running. 2. "ExectutorTab": taskcontext may lose some "updatedBlocks" info of rdds in a dependency chain. Like the following example: val r1 = sc.paralize(..).cache() val r2 = r1.map(...).cache() val n = r2.count() When count the r2, r1 and r2 will be cached finally. So in CacheManager.getOrCompute, the taskcontext should contain "updatedBlocks" of r1 and r2. Currently, the "updatedBlocks" only contain the info of r2. Author: uncleGen Closes #2131 from uncleGen/master_ui_fix and squashes the following commits: a6a8a0b [uncleGen] fix some coding style 3a1bc15 [uncleGen] fix some error in unit test 56ea488 [uncleGen] there's some line too long c82ba82 [uncleGen] Bug Fix: RDD info loss in "StorageTab" and "ExecutorTab" Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d8298c46 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d8298c46 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d8298c46 Branch: refs/heads/master Commit: d8298c46b7bf566d1cd2f7ea9b1b2b2722dcfb17 Parents: b92d823 Author: uncleGen Authored: Wed Aug 27 10:32:13 2014 -0700 Committer: Andrew Or Committed: Wed Aug 27 10:33:01 2014 -0700 -- .../scala/org/apache/spark/CacheManager.scala | 4 ++- .../apache/spark/ui/storage/StorageTab.scala| 7 +++-- .../org/apache/spark/CacheManagerSuite.scala| 19 ++ .../spark/ui/storage/StorageTabSuite.scala | 27 4 files changed, 54 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d8298c46/core/src/main/scala/org/apache/spark/CacheManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/CacheManager.scala b/core/src/main/scala/org/apache/spark/CacheManager.scala index 5ddda4d..f8584b9 100644 --- a/core/src/main/scala/org/apache/spark/CacheManager.scala +++ b/core/src/main/scala/org/apache/spark/CacheManager.scala @@ -68,7 +68,9 @@ private[spark] class CacheManager(blockManager: BlockManager) extends Logging { // Otherwise, cache the values and keep track of any updates in block statuses val updatedBlocks = new ArrayBuffer[(BlockId, BlockStatus)] val cachedValues = putInBlockManager(key, computedValues, storageLevel, updatedBlocks) - context.taskMetrics.updatedBlocks = Some(updatedBlocks) + val metrics = context.taskMetrics + val lastUpdatedBlocks = metrics.updatedBlocks.getOrElse(Seq[(BlockId, BlockStatus)]()) + metrics.updatedBlocks = Some(lastUpdatedBlocks ++ updatedBlocks.toSeq) new InterruptibleIterator(context, cachedValues) } finally { http://git-wip-us.apache.org/repos/asf/spark/blob/d8298c46/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala -- diff --git a/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala b/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala index 67f72a9..76097f1 100644 --- a/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala +++ b/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala @@ -70,8 +70,11 @@ class StorageListener(storageStatusListener: StorageStatusListener) extends Spar } override def onStageCompleted(stageCompleted: SparkListenerStageCompleted) = synchronized { -// Remove all partitions that are no longer cached -_rddInfoMap.retain { case (_, info) => info.numCachedPartitions > 0 } +// Remove all partitions that are no longer cached in current completed stage +val completedRddIds = stageCompleted.stageInfo.rddInfos.map(r => r.id).toSet +_rddInfoMap.retain { case (id, info) => + !completedRddIds.contains(id) || info.numCachedPartitions > 0 +} } override def onUnpersistRDD(unpersistRDD: SparkListenerUnpersistRDD) = synchronized { http://git-wip-us.apache.org/repos/asf/spark/blob/d8298c46/core/src/test/scala/org/apache/spark/CacheManagerSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/CacheManagerSuite.scala b/core/src/test/scala/o
git commit: [SPARK-3170][CORE][BUG]:RDD info loss in "StorageTab" and "ExecutorTab"
Repository: spark Updated Branches: refs/heads/branch-1.1 1d468df33 -> 8f8e2a4ee [SPARK-3170][CORE][BUG]:RDD info loss in "StorageTab" and "ExecutorTab" compeleted stage only need to remove its own partitions that are no longer cached. However, "StorageTab" may lost some rdds which are cached actually. Not only in "StorageTab", "ExectutorTab" may also lose some rdd info which have been overwritten by last rdd in a same task. 1. "StorageTab": when multiple stages run simultaneously, completed stage will remove rdd info which belong to other stages that are still running. 2. "ExectutorTab": taskcontext may lose some "updatedBlocks" info of rdds in a dependency chain. Like the following example: val r1 = sc.paralize(..).cache() val r2 = r1.map(...).cache() val n = r2.count() When count the r2, r1 and r2 will be cached finally. So in CacheManager.getOrCompute, the taskcontext should contain "updatedBlocks" of r1 and r2. Currently, the "updatedBlocks" only contain the info of r2. Author: uncleGen Closes #2131 from uncleGen/master_ui_fix and squashes the following commits: a6a8a0b [uncleGen] fix some coding style 3a1bc15 [uncleGen] fix some error in unit test 56ea488 [uncleGen] there's some line too long c82ba82 [uncleGen] Bug Fix: RDD info loss in "StorageTab" and "ExecutorTab" (cherry picked from commit d8298c46b7bf566d1cd2f7ea9b1b2b2722dcfb17) Signed-off-by: Andrew Or Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8f8e2a4e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8f8e2a4e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8f8e2a4e Branch: refs/heads/branch-1.1 Commit: 8f8e2a4ee7419a96196727704695f5114da5b84e Parents: 1d468df Author: uncleGen Authored: Wed Aug 27 10:32:13 2014 -0700 Committer: Andrew Or Committed: Wed Aug 27 10:33:13 2014 -0700 -- .../scala/org/apache/spark/CacheManager.scala | 4 ++- .../apache/spark/ui/storage/StorageTab.scala| 7 +++-- .../org/apache/spark/CacheManagerSuite.scala| 19 ++ .../spark/ui/storage/StorageTabSuite.scala | 27 4 files changed, 54 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8f8e2a4e/core/src/main/scala/org/apache/spark/CacheManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/CacheManager.scala b/core/src/main/scala/org/apache/spark/CacheManager.scala index 5ddda4d..f8584b9 100644 --- a/core/src/main/scala/org/apache/spark/CacheManager.scala +++ b/core/src/main/scala/org/apache/spark/CacheManager.scala @@ -68,7 +68,9 @@ private[spark] class CacheManager(blockManager: BlockManager) extends Logging { // Otherwise, cache the values and keep track of any updates in block statuses val updatedBlocks = new ArrayBuffer[(BlockId, BlockStatus)] val cachedValues = putInBlockManager(key, computedValues, storageLevel, updatedBlocks) - context.taskMetrics.updatedBlocks = Some(updatedBlocks) + val metrics = context.taskMetrics + val lastUpdatedBlocks = metrics.updatedBlocks.getOrElse(Seq[(BlockId, BlockStatus)]()) + metrics.updatedBlocks = Some(lastUpdatedBlocks ++ updatedBlocks.toSeq) new InterruptibleIterator(context, cachedValues) } finally { http://git-wip-us.apache.org/repos/asf/spark/blob/8f8e2a4e/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala -- diff --git a/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala b/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala index 67f72a9..76097f1 100644 --- a/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala +++ b/core/src/main/scala/org/apache/spark/ui/storage/StorageTab.scala @@ -70,8 +70,11 @@ class StorageListener(storageStatusListener: StorageStatusListener) extends Spar } override def onStageCompleted(stageCompleted: SparkListenerStageCompleted) = synchronized { -// Remove all partitions that are no longer cached -_rddInfoMap.retain { case (_, info) => info.numCachedPartitions > 0 } +// Remove all partitions that are no longer cached in current completed stage +val completedRddIds = stageCompleted.stageInfo.rddInfos.map(r => r.id).toSet +_rddInfoMap.retain { case (id, info) => + !completedRddIds.contains(id) || info.numCachedPartitions > 0 +} } override def onUnpersistRDD(unpersistRDD: SparkListenerUnpersistRDD) = synchronized { http://git-wip-us.apache.org/repos/asf/spark/blob/8f8e2a4e/core/src/test/scala/org/apache/spark/CacheManagerSuite.scala -
git commit: [SPARK-3239] [PySpark] randomize the dirs for each process
Repository: spark Updated Branches: refs/heads/branch-1.1 8f8e2a4ee -> 092121e47 [SPARK-3239] [PySpark] randomize the dirs for each process This can avoid the IO contention during spilling, when you have multiple disks. Author: Davies Liu Closes #2152 from davies/randomize and squashes the following commits: a4863c4 [Davies Liu] randomize the dirs for each process Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/092121e4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/092121e4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/092121e4 Branch: refs/heads/branch-1.1 Commit: 092121e477bcd2e474440dbdfdfa69cbd15c4803 Parents: 8f8e2a4 Author: Davies Liu Authored: Wed Aug 27 10:40:35 2014 -0700 Committer: Matei Zaharia Committed: Wed Aug 27 10:40:35 2014 -0700 -- python/pyspark/shuffle.py | 4 1 file changed, 4 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/092121e4/python/pyspark/shuffle.py -- diff --git a/python/pyspark/shuffle.py b/python/pyspark/shuffle.py index 1ebe7df..2750f11 100644 --- a/python/pyspark/shuffle.py +++ b/python/pyspark/shuffle.py @@ -21,6 +21,7 @@ import platform import shutil import warnings import gc +import random from pyspark.serializers import BatchedSerializer, PickleSerializer @@ -216,6 +217,9 @@ class ExternalMerger(Merger): """ Get all the directories """ path = os.environ.get("SPARK_LOCAL_DIRS", "/tmp") dirs = path.split(",") +if len(dirs) > 1: +rnd = random.Random(os.getpid() + id(dirs)) +random.shuffle(dirs, rnd.random) return [os.path.join(d, "python", str(os.getpid()), str(id(self))) for d in dirs] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-2608][Core] Fixed command line option passing issue over Mesos via SPARK_EXECUTOR_OPTS
Repository: spark Updated Branches: refs/heads/branch-1.1 092121e47 -> 935bffe3b [SPARK-2608][Core] Fixed command line option passing issue over Mesos via SPARK_EXECUTOR_OPTS This is another try after #2145 to fix [SPARK-2608](https://issues.apache.org/jira/browse/SPARK-2608). ### Basic Idea The basic idea is to pass `extraJavaOpts` and `extraLibraryPath` together via environment variable `SPARK_EXECUTOR_OPTS`. This variable is recognized by `spark-class` and not used anywhere else. In this way, we still launch Mesos executors with `spark-class`/`spark-executor`, but avoids the executor side Spark home issue. ### Known Issue Quoted string with spaces is not allowed in either `extraJavaOpts` or `extraLibraryPath` when using Spark over Mesos. The reason is that Mesos passes the whole command line as a single string argument to `sh -c` to start the executor, and this makes shell string escaping non-trivial to handle. This should be fixed in a later release. ### Background Classes in package `org.apache.spark.deploy` shouldn't be used as they assume Spark is deployed in standalone mode, and give wrong executor side Spark home directory. Please refer to comments in #2145 for more details. Author: Cheng Lian Closes #2161 from liancheng/mesos-fix-with-env-var and squashes the following commits: ba59190 [Cheng Lian] Added fine grained Mesos executor support 1174076 [Cheng Lian] Draft fix for CoarseMesosSchedulerBackend Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/935bffe3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/935bffe3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/935bffe3 Branch: refs/heads/branch-1.1 Commit: 935bffe3bf6c91a42288bff8c1ec69fecb41a769 Parents: 092121e Author: Cheng Lian Authored: Wed Aug 27 12:39:21 2014 -0700 Committer: Patrick Wendell Committed: Wed Aug 27 12:39:21 2014 -0700 -- .../cluster/mesos/CoarseMesosSchedulerBackend.scala | 14 ++ .../cluster/mesos/MesosSchedulerBackend.scala | 14 ++ 2 files changed, 24 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/935bffe3/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala index f017250..8c7cb07 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala @@ -122,6 +122,12 @@ private[spark] class CoarseMesosSchedulerBackend( val extraLibraryPath = conf.getOption(libraryPathOption).map(p => s"-Djava.library.path=$p") val extraOpts = Seq(extraJavaOpts, extraLibraryPath).flatten.mkString(" ") +environment.addVariables( + Environment.Variable.newBuilder() +.setName("SPARK_EXECUTOR_OPTS") +.setValue(extraOpts) +.build()) + sc.executorEnvs.foreach { case (key, value) => environment.addVariables(Environment.Variable.newBuilder() .setName(key) @@ -140,16 +146,16 @@ private[spark] class CoarseMesosSchedulerBackend( if (uri == null) { val runScript = new File(sparkHome, "./bin/spark-class").getCanonicalPath command.setValue( -"\"%s\" org.apache.spark.executor.CoarseGrainedExecutorBackend %s %s %s %s %d".format( - runScript, extraOpts, driverUrl, offer.getSlaveId.getValue, offer.getHostname, numCores)) +"\"%s\" org.apache.spark.executor.CoarseGrainedExecutorBackend %s %s %s %d".format( + runScript, driverUrl, offer.getSlaveId.getValue, offer.getHostname, numCores)) } else { // Grab everything to the first '.'. We'll use that and '*' to // glob the directory "correctly". val basename = uri.split('/').last.split('.').head command.setValue( ("cd %s*; " + - "./bin/spark-class org.apache.spark.executor.CoarseGrainedExecutorBackend %s %s %s %s %d") - .format(basename, extraOpts, driverUrl, offer.getSlaveId.getValue, + "./bin/spark-class org.apache.spark.executor.CoarseGrainedExecutorBackend %s %s %s %d") + .format(basename, driverUrl, offer.getSlaveId.getValue, offer.getHostname, numCores)) command.addUris(CommandInfo.URI.newBuilder().setValue(uri)) } http://git-wip-us.apache.org/repos/asf/spark/blob/935bffe3/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosSchedulerBackend.scala --
git commit: SPARK-3259 - User data should be given to the master
Repository: spark Updated Branches: refs/heads/branch-1.1 935bffe3b -> 0c94a5b2a SPARK-3259 - User data should be given to the master Author: Allan Douglas R. de Oliveira Closes #2162 from douglaz/user_data_master and squashes the following commits: 10d15f6 [Allan Douglas R. de Oliveira] Give user data also to the master (cherry picked from commit 5ac4093c9fa29a11e38f884eebb3f5db087de76f) Signed-off-by: Patrick Wendell Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0c94a5b2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0c94a5b2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0c94a5b2 Branch: refs/heads/branch-1.1 Commit: 0c94a5b2a6c41d061f130e30a2c1ad8e84fcf2b6 Parents: 935bffe Author: Allan Douglas R. de Oliveira Authored: Wed Aug 27 12:43:22 2014 -0700 Committer: Patrick Wendell Committed: Wed Aug 27 12:43:29 2014 -0700 -- ec2/spark_ec2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0c94a5b2/ec2/spark_ec2.py -- diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index a979891..58261e2 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -444,7 +444,8 @@ def launch_cluster(conn, opts, cluster_name): placement=opts.zone, min_count=1, max_count=1, - block_device_map=block_map) + block_device_map=block_map, + user_data=user_data_content) master_nodes = master_res.instances print "Launched master in %s, regid = %s" % (zone, master_res.id) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: SPARK-3259 - User data should be given to the master
Repository: spark Updated Branches: refs/heads/master d8298c46b -> 5ac4093c9 SPARK-3259 - User data should be given to the master Author: Allan Douglas R. de Oliveira Closes #2162 from douglaz/user_data_master and squashes the following commits: 10d15f6 [Allan Douglas R. de Oliveira] Give user data also to the master Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5ac4093c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5ac4093c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5ac4093c Branch: refs/heads/master Commit: 5ac4093c9fa29a11e38f884eebb3f5db087de76f Parents: d8298c4 Author: Allan Douglas R. de Oliveira Authored: Wed Aug 27 12:43:22 2014 -0700 Committer: Patrick Wendell Committed: Wed Aug 27 12:43:22 2014 -0700 -- ec2/spark_ec2.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5ac4093c/ec2/spark_ec2.py -- diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 77a246f..ddd72a0 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -458,7 +458,8 @@ def launch_cluster(conn, opts, cluster_name): placement=opts.zone, min_count=1, max_count=1, - block_device_map=block_map) + block_device_map=block_map, + user_data=user_data_content) master_nodes = master_res.instances print "Launched master in %s, regid = %s" % (zone, master_res.id) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-3118][SQL]add "SHOW TBLPROPERTIES tblname; " and "SHOW COLUMNS (FROM|IN) table_name [(FROM|IN) db_name]" support
Repository: spark Updated Branches: refs/heads/master 5ac4093c9 -> 3b5eb7083 [SPARK-3118][SQL]add "SHOW TBLPROPERTIES tblname;" and "SHOW COLUMNS (FROM|IN) table_name [(FROM|IN) db_name]" support JIRA issue: [SPARK-3118] https://issues.apache.org/jira/browse/SPARK-3118 eg: > SHOW TBLPROPERTIES test; SHOW TBLPROPERTIES test; numPartitions 0 numFiles1 transient_lastDdlTime 1407923642 numRows 0 totalSize 82 rawDataSize 0 eg: > SHOW COLUMNS in test; SHOW COLUMNS in test; OK Time taken: 0.304 seconds id stid bo Author: u0jing Closes #2034 from u0jing/spark-3118 and squashes the following commits: b231d87 [u0jing] add golden answer files 35f4885 [u0jing] add 'show columns' and 'show tblproperties' support Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3b5eb708 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3b5eb708 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3b5eb708 Branch: refs/heads/master Commit: 3b5eb7083d3e1955de288e4fd365dca6221f32fb Parents: 5ac4093 Author: u0jing Authored: Wed Aug 27 12:47:14 2014 -0700 Committer: Michael Armbrust Committed: Wed Aug 27 12:47:14 2014 -0700 -- .../spark/sql/hive/execution/HiveCompatibilitySuite.scala | 2 ++ sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala | 2 ++ .../golden/show_columns-0-d84a430d0ab7a63a0a73361f8d188a4b | 0 .../golden/show_columns-1-ac73cff018bf94944244117a2eb76f7f | 0 .../golden/show_columns-10-695a68c82308540eba1d0a04e032cf39| 0 .../golden/show_columns-11-691b4e6664e6d435233ea4e8c3b585d5| 0 .../golden/show_columns-12-afc350d459a8f794cc3ca93092163a0c| 1 + .../golden/show_columns-13-e86d559aeb84a4cc017a103182c22bfb| 0 .../golden/show_columns-14-7c1d63fa270b4d94b69ad49c3e2378a6| 1 + .../golden/show_columns-15-2c404655e35f7bd7ce54500c832f9d8e| 1 + .../golden/show_columns-2-b74990316ec4245fd8a7011e684b39da | 3 +++ .../golden/show_columns-3-6e40309b0ca10f353a68395ccd64d566 | 0 .../golden/show_columns-4-a62fc229d241303bffb29b34ad125f8c | 0 .../golden/show_columns-5-691b4e6664e6d435233ea4e8c3b585d5 | 0 .../golden/show_columns-6-37c88438bd364343a50f64cf39bfcaf6 | 0 .../golden/show_columns-7-afc350d459a8f794cc3ca93092163a0c | 1 + .../golden/show_columns-8-9b0b96593ca513c6932f3ed8df68808a | 1 + .../golden/show_columns-9-6c0fa8be1c19d4d216dfe7427df1275f | 0 .../show_tblproperties-0-ca75bef7d151a44b6a89dd92333eb12a | 0 .../show_tblproperties-1-be4adb893c7f946ebd76a648ce3cc1ae | 1 + .../show_tblproperties-2-7c7993eea8e41cf095afae07772cc16e | 0 .../show_tblproperties-3-2b4b8c43ef08bdb418405917d475ac1d | 0 .../show_tblproperties-4-6c63215ea599f6533666c4d70606b139 | 6 ++ .../show_tblproperties-5-be4adb893c7f946ebd76a648ce3cc1ae | 1 + .../show_tblproperties-6-9dd8d67460f558955d96a107ca996ad | 0 25 files changed, 20 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3b5eb708/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala -- diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index 6624387..7a69e3c 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -644,9 +644,11 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "show_create_table_db_table", "show_create_table_does_not_exist", "show_create_table_index", +"show_columns", "show_describe_func_quotes", "show_functions", "show_partitions", +"show_tblproperties", "skewjoinopt13", "skewjoinopt18", "skewjoinopt9", http://git-wip-us.apache.org/repos/asf/spark/blob/3b5eb708/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index 581332e..fa3adfd 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -54,6 +54,7 @@ private[hive] object HiveQl { "TOK_DESCFUNCTION", "TOK_DESCDATABASE", "TOK_SHOW_CREATETABLE", +"TOK_SHOWCOLUMNS", "TOK_SHOW_TABLESTATUS", "TOK_SHOWDATABASES", "TOK_SHOWFUNCTIONS", @@ -61,6 +62,7 @@
git commit: [SPARK-3118][SQL]add "SHOW TBLPROPERTIES tblname; " and "SHOW COLUMNS (FROM|IN) table_name [(FROM|IN) db_name]" support
Repository: spark Updated Branches: refs/heads/branch-1.1 0c94a5b2a -> 19cda0788 [SPARK-3118][SQL]add "SHOW TBLPROPERTIES tblname;" and "SHOW COLUMNS (FROM|IN) table_name [(FROM|IN) db_name]" support JIRA issue: [SPARK-3118] https://issues.apache.org/jira/browse/SPARK-3118 eg: > SHOW TBLPROPERTIES test; SHOW TBLPROPERTIES test; numPartitions 0 numFiles1 transient_lastDdlTime 1407923642 numRows 0 totalSize 82 rawDataSize 0 eg: > SHOW COLUMNS in test; SHOW COLUMNS in test; OK Time taken: 0.304 seconds id stid bo Author: u0jing Closes #2034 from u0jing/spark-3118 and squashes the following commits: b231d87 [u0jing] add golden answer files 35f4885 [u0jing] add 'show columns' and 'show tblproperties' support (cherry picked from commit 3b5eb7083d3e1955de288e4fd365dca6221f32fb) Signed-off-by: Michael Armbrust Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/19cda078 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/19cda078 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/19cda078 Branch: refs/heads/branch-1.1 Commit: 19cda07884a7fb3a1985d77ad6e91560aa828729 Parents: 0c94a5b Author: u0jing Authored: Wed Aug 27 12:47:14 2014 -0700 Committer: Michael Armbrust Committed: Wed Aug 27 12:47:30 2014 -0700 -- .../spark/sql/hive/execution/HiveCompatibilitySuite.scala | 2 ++ sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala | 2 ++ .../golden/show_columns-0-d84a430d0ab7a63a0a73361f8d188a4b | 0 .../golden/show_columns-1-ac73cff018bf94944244117a2eb76f7f | 0 .../golden/show_columns-10-695a68c82308540eba1d0a04e032cf39| 0 .../golden/show_columns-11-691b4e6664e6d435233ea4e8c3b585d5| 0 .../golden/show_columns-12-afc350d459a8f794cc3ca93092163a0c| 1 + .../golden/show_columns-13-e86d559aeb84a4cc017a103182c22bfb| 0 .../golden/show_columns-14-7c1d63fa270b4d94b69ad49c3e2378a6| 1 + .../golden/show_columns-15-2c404655e35f7bd7ce54500c832f9d8e| 1 + .../golden/show_columns-2-b74990316ec4245fd8a7011e684b39da | 3 +++ .../golden/show_columns-3-6e40309b0ca10f353a68395ccd64d566 | 0 .../golden/show_columns-4-a62fc229d241303bffb29b34ad125f8c | 0 .../golden/show_columns-5-691b4e6664e6d435233ea4e8c3b585d5 | 0 .../golden/show_columns-6-37c88438bd364343a50f64cf39bfcaf6 | 0 .../golden/show_columns-7-afc350d459a8f794cc3ca93092163a0c | 1 + .../golden/show_columns-8-9b0b96593ca513c6932f3ed8df68808a | 1 + .../golden/show_columns-9-6c0fa8be1c19d4d216dfe7427df1275f | 0 .../show_tblproperties-0-ca75bef7d151a44b6a89dd92333eb12a | 0 .../show_tblproperties-1-be4adb893c7f946ebd76a648ce3cc1ae | 1 + .../show_tblproperties-2-7c7993eea8e41cf095afae07772cc16e | 0 .../show_tblproperties-3-2b4b8c43ef08bdb418405917d475ac1d | 0 .../show_tblproperties-4-6c63215ea599f6533666c4d70606b139 | 6 ++ .../show_tblproperties-5-be4adb893c7f946ebd76a648ce3cc1ae | 1 + .../show_tblproperties-6-9dd8d67460f558955d96a107ca996ad | 0 25 files changed, 20 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/19cda078/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala -- diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index 6624387..7a69e3c 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -644,9 +644,11 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "show_create_table_db_table", "show_create_table_does_not_exist", "show_create_table_index", +"show_columns", "show_describe_func_quotes", "show_functions", "show_partitions", +"show_tblproperties", "skewjoinopt13", "skewjoinopt18", "skewjoinopt9", http://git-wip-us.apache.org/repos/asf/spark/blob/19cda078/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index 581332e..fa3adfd 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -54,6 +54,7 @@ private[hive] object HiveQl { "TOK_DESCFUNCTION", "TOK_DESCDATABASE", "TOK_SHOW_CREATETABLE", +"TOK_S
git commit: [SPARK-3197] [SQL] Reduce the Expression tree object creations for aggregation function (min/max)
Repository: spark Updated Branches: refs/heads/master 3b5eb7083 -> 4238c17dc [SPARK-3197] [SQL] Reduce the Expression tree object creations for aggregation function (min/max) Aggregation function min/max in catalyst will create expression tree for each single row, however, the expression tree creation is quite expensive in a multithreading env currently. Hence we got a very bad performance for the min/max. Here is the benchmark that I've done in my local. Master | Previous Result (ms) | Current Result (ms) | - | - local | 3645 | 3416 local[6] | 3602 | 1002 The Benchmark source code. ``` case class Record(key: Int, value: Int) object TestHive2 extends HiveContext(new SparkContext("local[6]", "TestSQLContext", new SparkConf())) object DataPrepare extends App { import TestHive2._ val rdd = sparkContext.parallelize((1 to 1000).map(i => Record(i % 3000, i)), 12) runSqlHive("SHOW TABLES") runSqlHive("DROP TABLE if exists a") runSqlHive("DROP TABLE if exists result") rdd.registerAsTable("records") runSqlHive("""CREATE TABLE a (key INT, value INT) | ROW FORMAT SERDE | 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' | STORED AS RCFILE """.stripMargin) runSqlHive("""CREATE TABLE result (key INT, value INT) | ROW FORMAT SERDE | 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' | STORED AS RCFILE """.stripMargin) hql(s"""from records | insert into table a | select key, value """.stripMargin) } object PerformanceTest extends App { import TestHive2._ hql("SHOW TABLES") hql("set spark.sql.shuffle.partitions=12") val cmd = "select min(value), max(value) from a group by key" val results = ("Result1", benchmark(cmd)) :: ("Result2", benchmark(cmd)) :: ("Result3", benchmark(cmd)) :: Nil results.foreach { case (prompt, result) => { println(s"$prompt: took ${result._1} ms (${result._2} records)") } } def benchmark(cmd: String) = { val begin = System.currentTimeMillis() val count = hql(cmd).count val end = System.currentTimeMillis() ((end - begin), count) } } ``` Author: Cheng Hao Closes #2113 from chenghao-intel/aggregation_expression_optimization and squashes the following commits: db40395 [Cheng Hao] remove the transient and add val for the expression property d56167d [Cheng Hao] Reduce the Expressions creation Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4238c17d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4238c17d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4238c17d Branch: refs/heads/master Commit: 4238c17dc9e1f2f93cc9e6c768f92bd27bf1df66 Parents: 3b5eb70 Author: Cheng Hao Authored: Wed Aug 27 12:50:47 2014 -0700 Committer: Michael Armbrust Committed: Wed Aug 27 12:50:47 2014 -0700 -- .../sql/catalyst/expressions/aggregates.scala | 30 +++- .../sql/catalyst/expressions/literals.scala | 5 ++-- 2 files changed, 18 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4238c17d/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala index dbc0c29..15560a2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala @@ -105,17 +105,18 @@ case class Min(child: Expression) extends PartialAggregate with trees.UnaryNode[ case class MinFunction(expr: Expression, base: AggregateExpression) extends AggregateFunction { def this() = this(null, null) // Required for serialization. - var currentMin: Any = _ + val currentMin: MutableLiteral = MutableLiteral(null, expr.dataType) + val cmp = GreaterThan(currentMin, expr) override def update(input: Row): Unit = { -if (currentMin == null) { - currentMin = expr.eval(input) -} else if(GreaterThan(Literal(currentMin, expr.dataType), expr).eval(input) == true) { - currentMin = expr.eval(input) +if (currentMin.value == null) { + currentMin.value = expr.eval(input) +} else if(cmp.eval(input) == true) { + currentMin.value = expr.eval(input) } } - override def eval(input: Row): Any = currentMin + override def eval(input: Row): Any = currentMin.value } case class
git commit: [SPARK-3197] [SQL] Reduce the Expression tree object creations for aggregation function (min/max)
Repository: spark Updated Branches: refs/heads/branch-1.1 19cda0788 -> 4c7f082c6 [SPARK-3197] [SQL] Reduce the Expression tree object creations for aggregation function (min/max) Aggregation function min/max in catalyst will create expression tree for each single row, however, the expression tree creation is quite expensive in a multithreading env currently. Hence we got a very bad performance for the min/max. Here is the benchmark that I've done in my local. Master | Previous Result (ms) | Current Result (ms) | - | - local | 3645 | 3416 local[6] | 3602 | 1002 The Benchmark source code. ``` case class Record(key: Int, value: Int) object TestHive2 extends HiveContext(new SparkContext("local[6]", "TestSQLContext", new SparkConf())) object DataPrepare extends App { import TestHive2._ val rdd = sparkContext.parallelize((1 to 1000).map(i => Record(i % 3000, i)), 12) runSqlHive("SHOW TABLES") runSqlHive("DROP TABLE if exists a") runSqlHive("DROP TABLE if exists result") rdd.registerAsTable("records") runSqlHive("""CREATE TABLE a (key INT, value INT) | ROW FORMAT SERDE | 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' | STORED AS RCFILE """.stripMargin) runSqlHive("""CREATE TABLE result (key INT, value INT) | ROW FORMAT SERDE | 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe' | STORED AS RCFILE """.stripMargin) hql(s"""from records | insert into table a | select key, value """.stripMargin) } object PerformanceTest extends App { import TestHive2._ hql("SHOW TABLES") hql("set spark.sql.shuffle.partitions=12") val cmd = "select min(value), max(value) from a group by key" val results = ("Result1", benchmark(cmd)) :: ("Result2", benchmark(cmd)) :: ("Result3", benchmark(cmd)) :: Nil results.foreach { case (prompt, result) => { println(s"$prompt: took ${result._1} ms (${result._2} records)") } } def benchmark(cmd: String) = { val begin = System.currentTimeMillis() val count = hql(cmd).count val end = System.currentTimeMillis() ((end - begin), count) } } ``` Author: Cheng Hao Closes #2113 from chenghao-intel/aggregation_expression_optimization and squashes the following commits: db40395 [Cheng Hao] remove the transient and add val for the expression property d56167d [Cheng Hao] Reduce the Expressions creation (cherry picked from commit 4238c17dc9e1f2f93cc9e6c768f92bd27bf1df66) Signed-off-by: Michael Armbrust Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4c7f082c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4c7f082c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4c7f082c Branch: refs/heads/branch-1.1 Commit: 4c7f082c6856bd8e95d56c60541558773a1bc4c9 Parents: 19cda07 Author: Cheng Hao Authored: Wed Aug 27 12:50:47 2014 -0700 Committer: Michael Armbrust Committed: Wed Aug 27 12:50:59 2014 -0700 -- .../sql/catalyst/expressions/aggregates.scala | 30 +++- .../sql/catalyst/expressions/literals.scala | 5 ++-- 2 files changed, 18 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4c7f082c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala index dbc0c29..15560a2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregates.scala @@ -105,17 +105,18 @@ case class Min(child: Expression) extends PartialAggregate with trees.UnaryNode[ case class MinFunction(expr: Expression, base: AggregateExpression) extends AggregateFunction { def this() = this(null, null) // Required for serialization. - var currentMin: Any = _ + val currentMin: MutableLiteral = MutableLiteral(null, expr.dataType) + val cmp = GreaterThan(currentMin, expr) override def update(input: Row): Unit = { -if (currentMin == null) { - currentMin = expr.eval(input) -} else if(GreaterThan(Literal(currentMin, expr.dataType), expr).eval(input) == true) { - currentMin = expr.eval(input) +if (currentMin.value == null) { + currentMin.value = expr.eval(input) +} else if(cmp.eval(input) == true) { + currentMin.value = expr.eval(input) } } - override de
git commit: [SPARK-3256] Added support for :cp that was broken in Scala 2.10.x for REPL
Repository: spark Updated Branches: refs/heads/master 4238c17dc -> 191d7cf2a [SPARK-3256] Added support for :cp that was broken in Scala 2.10.x for REPL As seen with [SI-6502](https://issues.scala-lang.org/browse/SI-6502) of Scala, the _:cp_ command was broken in Scala 2.10.x. As the Spark shell is a friendly wrapper on top of the Scala REPL, it is also affected by this problem. My solution was to alter the internal classpath and invalidate any new entries. I also had to add the ability to add new entries to the parent classloader of the interpreter (SparkIMain's global). The advantage of this versus wiping the interpreter and replaying all of the commands is that you don't have to worry about rerunning heavy Spark-related commands (going to the cluster) or potentially reloading data that might have changed. Instead, you get to work from where you left off. Until this is fixed upstream for 2.10.x, I had to use reflection to alter the internal compiler classpath. The solution now looks like this: ![screen shot 2014-08-13 at 3 46 02 pm](https://cloud.githubusercontent.com/assets/2481802/3912625/f02b1440-232c-11e4-9bf6-bafb3e352d14.png) Author: Chip Senkbeil Closes #1929 from rcsenkbeil/FixReplClasspathSupport and squashes the following commits: f420cbf [Chip Senkbeil] Added SparkContext.addJar calls to support executing code on remote clusters a826795 [Chip Senkbeil] Updated AddUrlsToClasspath to use 'new Run' suggestion over hackish compiler error 2ff1d86 [Chip Senkbeil] Added compilation failure on symbols hack to get Scala classes to load correctly a220639 [Chip Senkbeil] Added support for :cp that was broken in Scala 2.10.x for REPL Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/191d7cf2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/191d7cf2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/191d7cf2 Branch: refs/heads/master Commit: 191d7cf2a655d032f160b9fa181730364681d0e7 Parents: 4238c17 Author: Chip Senkbeil Authored: Wed Aug 27 13:01:11 2014 -0700 Committer: Matei Zaharia Committed: Wed Aug 27 13:01:11 2014 -0700 -- .../org/apache/spark/repl/SparkILoop.scala | 19 +++--- .../org/apache/spark/repl/SparkIMain.scala | 65 ++-- 2 files changed, 73 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/191d7cf2/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala -- diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala index 65788f4..53df599 100644 --- a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala +++ b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala @@ -8,7 +8,11 @@ package org.apache.spark.repl +import java.net.URL + +import scala.reflect.io.AbstractFile import scala.tools.nsc._ +import scala.tools.nsc.backend.JavaPlatform import scala.tools.nsc.interpreter._ import scala.tools.nsc.interpreter.{ Results => IR } @@ -22,11 +26,10 @@ import scala.tools.util.{ Javap } import scala.annotation.tailrec import scala.collection.mutable.ListBuffer import scala.concurrent.ops -import scala.tools.nsc.util.{ ClassPath, Exceptional, stringFromWriter, stringFromStream } +import scala.tools.nsc.util._ import scala.tools.nsc.interpreter._ -import scala.tools.nsc.io.{ File, Directory } +import scala.tools.nsc.io.{File, Directory} import scala.reflect.NameTransformer._ -import scala.tools.nsc.util.ScalaClassLoader import scala.tools.nsc.util.ScalaClassLoader._ import scala.tools.util._ import scala.language.{implicitConversions, existentials} @@ -711,22 +714,24 @@ class SparkILoop(in0: Option[BufferedReader], protected val out: JPrintWriter, added = true addedClasspath = ClassPath.join(addedClasspath, f.path) totalClasspath = ClassPath.join(settings.classpath.value, addedClasspath) +intp.addUrlsToClassPath(f.toURI.toURL) +sparkContext.addJar(f.toURI.toURL.getPath) } } -if (added) replay() } def addClasspath(arg: String): Unit = { val f = File(arg).normalize if (f.exists) { addedClasspath = ClassPath.join(addedClasspath, f.path) - val totalClasspath = ClassPath.join(settings.classpath.value, addedClasspath) - echo("Added '%s'. Your new classpath is:\n\"%s\"".format(f.path, totalClasspath)) - replay() + intp.addUrlsToClassPath(f.toURI.toURL) + sparkContext.addJar(f.toURI.toURL.getPath) + echo("Added '%s'. Your new classpath is:\n\"%s\"".format(f.path, intp.global.classPath.asClasspathString)) } else echo("The path '" + f + "' doesn't seem to exist.") } + def powerCmd(): Result = {
git commit: [SPARK-3138][SQL] sqlContext.parquetFile should be able to take a single file as parameter
Repository: spark Updated Branches: refs/heads/master 191d7cf2a -> 48f42781d [SPARK-3138][SQL] sqlContext.parquetFile should be able to take a single file as parameter ```if (!fs.getFileStatus(path).isDir) throw Exception``` make no sense after this commit #1370 be careful if someone is working on SPARK-2551, make sure the new change passes test case ```test("Read a parquet file instead of a directory")``` Author: chutium Closes #2044 from chutium/parquet-singlefile and squashes the following commits: 4ae477f [chutium] [SPARK-3138][SQL] sqlContext.parquetFile should be able to take a single file as parameter Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/48f42781 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/48f42781 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/48f42781 Branch: refs/heads/master Commit: 48f42781dedecd38ddcb2dcf67dead92bb4318f5 Parents: 191d7cf Author: chutium Authored: Wed Aug 27 13:13:04 2014 -0700 Committer: Michael Armbrust Committed: Wed Aug 27 13:13:04 2014 -0700 -- .../apache/spark/sql/parquet/ParquetTypes.scala | 7 ++--- .../spark/sql/parquet/ParquetQuerySuite.scala | 27 +--- 2 files changed, 26 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/48f42781/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala index 1a52377..2941b97 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala @@ -394,17 +394,14 @@ private[parquet] object ParquetTypesConverter extends Logging { throw new IllegalArgumentException(s"Incorrectly formatted Parquet metadata path $origPath") } val path = origPath.makeQualified(fs) -if (!fs.getFileStatus(path).isDir) { - throw new IllegalArgumentException( -s"Expected $path for be a directory with Parquet files/metadata") -} -ParquetRelation.enableLogForwarding() val children = fs.listStatus(path).filterNot { status => val name = status.getPath.getName (name(0) == '.' || name(0) == '_') && name != ParquetFileWriter.PARQUET_METADATA_FILE } +ParquetRelation.enableLogForwarding() + // NOTE (lian): Parquet "_metadata" file can be very slow if the file consists of lots of row // groups. Since Parquet schema is replicated among all row groups, we only need to touch a // single row group to read schema related metadata. Notice that we are making assumptions that http://git-wip-us.apache.org/repos/asf/spark/blob/48f42781/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala index 4219cc0..42923b6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala @@ -35,7 +35,6 @@ import org.apache.spark.sql.test.TestSQLContext import org.apache.spark.sql.test.TestSQLContext._ import org.apache.spark.util.Utils - case class TestRDDEntry(key: Int, value: String) case class NullReflectData( @@ -420,8 +419,30 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA val rdd_copy = sql("SELECT * FROM tmpx").collect() val rdd_orig = rdd.collect() for(i <- 0 to 99) { - assert(rdd_copy(i).apply(0) === rdd_orig(i).key, s"key error in line $i") - assert(rdd_copy(i).apply(1) === rdd_orig(i).value, s"value in line $i") + assert(rdd_copy(i).apply(0) === rdd_orig(i).key, s"key error in line $i") + assert(rdd_copy(i).apply(1) === rdd_orig(i).value, s"value error in line $i") +} +Utils.deleteRecursively(file) + } + + test("Read a parquet file instead of a directory") { +val file = getTempFilePath("parquet") +val path = file.toString +val fsPath = new Path(path) +val fs: FileSystem = fsPath.getFileSystem(TestSQLContext.sparkContext.hadoopConfiguration) +val rdd = TestSQLContext.sparkContext.parallelize((1 to 100)) + .map(i => TestRDDEntry(i, s"val_$i")) +rdd.coalesce(1).saveAsParquetFile(path) + +val children = fs.listStatus(fsPath).filter(_.getPath.getName.endsWith(".parquet")) +assert(children.length > 0) +val readFile = parquetFile(path + "/" + childr
git commit: [SPARK-3138][SQL] sqlContext.parquetFile should be able to take a single file as parameter
Repository: spark Updated Branches: refs/heads/branch-1.1 4c7f082c6 -> 90f8f3eed [SPARK-3138][SQL] sqlContext.parquetFile should be able to take a single file as parameter ```if (!fs.getFileStatus(path).isDir) throw Exception``` make no sense after this commit #1370 be careful if someone is working on SPARK-2551, make sure the new change passes test case ```test("Read a parquet file instead of a directory")``` Author: chutium Closes #2044 from chutium/parquet-singlefile and squashes the following commits: 4ae477f [chutium] [SPARK-3138][SQL] sqlContext.parquetFile should be able to take a single file as parameter (cherry picked from commit 48f42781dedecd38ddcb2dcf67dead92bb4318f5) Signed-off-by: Michael Armbrust Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/90f8f3ee Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/90f8f3ee Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/90f8f3ee Branch: refs/heads/branch-1.1 Commit: 90f8f3eed026e9c4f1a4b1952e284558c0e3fd23 Parents: 4c7f082 Author: chutium Authored: Wed Aug 27 13:13:04 2014 -0700 Committer: Michael Armbrust Committed: Wed Aug 27 13:13:12 2014 -0700 -- .../apache/spark/sql/parquet/ParquetTypes.scala | 7 ++--- .../spark/sql/parquet/ParquetQuerySuite.scala | 27 +--- 2 files changed, 26 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/90f8f3ee/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala index 1a52377..2941b97 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/parquet/ParquetTypes.scala @@ -394,17 +394,14 @@ private[parquet] object ParquetTypesConverter extends Logging { throw new IllegalArgumentException(s"Incorrectly formatted Parquet metadata path $origPath") } val path = origPath.makeQualified(fs) -if (!fs.getFileStatus(path).isDir) { - throw new IllegalArgumentException( -s"Expected $path for be a directory with Parquet files/metadata") -} -ParquetRelation.enableLogForwarding() val children = fs.listStatus(path).filterNot { status => val name = status.getPath.getName (name(0) == '.' || name(0) == '_') && name != ParquetFileWriter.PARQUET_METADATA_FILE } +ParquetRelation.enableLogForwarding() + // NOTE (lian): Parquet "_metadata" file can be very slow if the file consists of lots of row // groups. Since Parquet schema is replicated among all row groups, we only need to touch a // single row group to read schema related metadata. Notice that we are making assumptions that http://git-wip-us.apache.org/repos/asf/spark/blob/90f8f3ee/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala index 4219cc0..42923b6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetQuerySuite.scala @@ -35,7 +35,6 @@ import org.apache.spark.sql.test.TestSQLContext import org.apache.spark.sql.test.TestSQLContext._ import org.apache.spark.util.Utils - case class TestRDDEntry(key: Int, value: String) case class NullReflectData( @@ -420,8 +419,30 @@ class ParquetQuerySuite extends QueryTest with FunSuiteLike with BeforeAndAfterA val rdd_copy = sql("SELECT * FROM tmpx").collect() val rdd_orig = rdd.collect() for(i <- 0 to 99) { - assert(rdd_copy(i).apply(0) === rdd_orig(i).key, s"key error in line $i") - assert(rdd_copy(i).apply(1) === rdd_orig(i).value, s"value in line $i") + assert(rdd_copy(i).apply(0) === rdd_orig(i).key, s"key error in line $i") + assert(rdd_copy(i).apply(1) === rdd_orig(i).value, s"value error in line $i") +} +Utils.deleteRecursively(file) + } + + test("Read a parquet file instead of a directory") { +val file = getTempFilePath("parquet") +val path = file.toString +val fsPath = new Path(path) +val fs: FileSystem = fsPath.getFileSystem(TestSQLContext.sparkContext.hadoopConfiguration) +val rdd = TestSQLContext.sparkContext.parallelize((1 to 100)) + .map(i => TestRDDEntry(i, s"val_$i")) +rdd.coalesce(1).saveAsParquetFile(path) + +val children = fs.listStatus(fsPath).filter(_.getPath.get
git commit: [SPARK-2871] [PySpark] add RDD.lookup(key)
Repository: spark Updated Branches: refs/heads/master 48f42781d -> 4fa2fda88 [SPARK-2871] [PySpark] add RDD.lookup(key) RDD.lookup(key) Return the list of values in the RDD for key `key`. This operation is done efficiently if the RDD has a known partitioner by only searching the partition that the key maps to. >>> l = range(1000) >>> rdd = sc.parallelize(zip(l, l), 10) >>> rdd.lookup(42) # slow [42] >>> sorted = rdd.sortByKey() >>> sorted.lookup(42) # fast [42] It also clean up the code in RDD.py, and fix several bugs (related to preservesPartitioning). Author: Davies Liu Closes #2093 from davies/lookup and squashes the following commits: 1789cd4 [Davies Liu] `f` in foreach could be generator or not. 2871b80 [Davies Liu] Merge branch 'master' into lookup c6390ea [Davies Liu] address all comments 0f1bce8 [Davies Liu] add test case for lookup() be0e8ba [Davies Liu] fix preservesPartitioning eb1305d [Davies Liu] add RDD.lookup(key) Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4fa2fda8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4fa2fda8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4fa2fda8 Branch: refs/heads/master Commit: 4fa2fda88fc7beebb579ba808e400113b512533b Parents: 48f4278 Author: Davies Liu Authored: Wed Aug 27 13:18:33 2014 -0700 Committer: Josh Rosen Committed: Wed Aug 27 13:18:33 2014 -0700 -- python/pyspark/rdd.py | 211 + 1 file changed, 79 insertions(+), 132 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4fa2fda8/python/pyspark/rdd.py -- diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 3191974..2d80fad 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -147,76 +147,6 @@ class BoundedFloat(float): return obj -class MaxHeapQ(object): - -""" -An implementation of MaxHeap. - ->>> import pyspark.rdd ->>> heap = pyspark.rdd.MaxHeapQ(5) ->>> [heap.insert(i) for i in range(10)] -[None, None, None, None, None, None, None, None, None, None] ->>> sorted(heap.getElements()) -[0, 1, 2, 3, 4] ->>> heap = pyspark.rdd.MaxHeapQ(5) ->>> [heap.insert(i) for i in range(9, -1, -1)] -[None, None, None, None, None, None, None, None, None, None] ->>> sorted(heap.getElements()) -[0, 1, 2, 3, 4] ->>> heap = pyspark.rdd.MaxHeapQ(1) ->>> [heap.insert(i) for i in range(9, -1, -1)] -[None, None, None, None, None, None, None, None, None, None] ->>> heap.getElements() -[0] -""" - -def __init__(self, maxsize): -# We start from q[1], so its children are always 2 * k -self.q = [0] -self.maxsize = maxsize - -def _swim(self, k): -while (k > 1) and (self.q[k / 2] < self.q[k]): -self._swap(k, k / 2) -k = k / 2 - -def _swap(self, i, j): -t = self.q[i] -self.q[i] = self.q[j] -self.q[j] = t - -def _sink(self, k): -N = self.size() -while 2 * k <= N: -j = 2 * k -# Here we test if both children are greater than parent -# if not swap with larger one. -if j < N and self.q[j] < self.q[j + 1]: -j = j + 1 -if(self.q[k] > self.q[j]): -break -self._swap(k, j) -k = j - -def size(self): -return len(self.q) - 1 - -def insert(self, value): -if (self.size()) < self.maxsize: -self.q.append(value) -self._swim(self.size()) -else: -self._replaceRoot(value) - -def getElements(self): -return self.q[1:] - -def _replaceRoot(self, value): -if(self.q[1] > value): -self.q[1] = value -self._sink(1) - - def _parse_memory(s): """ Parse a memory string in the format supported by Java (e.g. 1g, 200m) and @@ -248,6 +178,7 @@ class RDD(object): self.ctx = ctx self._jrdd_deserializer = jrdd_deserializer self._id = jrdd.id() +self._partitionFunc = None def _toPickleSerialization(self): if (self._jrdd_deserializer == PickleSerializer() or @@ -325,8 +256,6 @@ class RDD(object): checkpointFile = self._jrdd.rdd().getCheckpointFile() if checkpointFile.isDefined(): return checkpointFile.get() -else: -return None def map(self, f, preservesPartitioning=False): """ @@ -366,7 +295,7 @@ class RDD(object): """ def func(s, iterator): return f(iterator) -return self.mapPartitionsWithIndex(func) +r
git commit: Spark-3213 Fixes issue with spark-ec2 not detecting slaves created with "Launch More like this"
Repository: spark Updated Branches: refs/heads/branch-1.1 90f8f3eed -> 3cb4e1718 Spark-3213 Fixes issue with spark-ec2 not detecting slaves created with "Launch More like this" ... copy the spark_cluster_tag from a spot instance requests over to the instances. Author: Vida Ha Closes #2163 from vidaha/vida/spark-3213 and squashes the following commits: 5070a70 [Vida Ha] Spark-3214 Fix issue with spark-ec2 not detecting slaves created with 'Launch More Like This' and using Spot Requests (cherry picked from commit 7faf755ae4f0cf510048e432340260a6e609066d) Signed-off-by: Josh Rosen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3cb4e171 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3cb4e171 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3cb4e171 Branch: refs/heads/branch-1.1 Commit: 3cb4e1718f40a18e3d19a33fd627960687bbcb6c Parents: 90f8f3e Author: Vida Ha Authored: Wed Aug 27 14:26:06 2014 -0700 Committer: Josh Rosen Committed: Wed Aug 27 14:26:16 2014 -0700 -- ec2/spark_ec2.py | 45 + 1 file changed, 25 insertions(+), 20 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3cb4e171/ec2/spark_ec2.py -- diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index 58261e2..afef4ef 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -40,7 +40,6 @@ from boto import ec2 # A URL prefix from which to fetch AMI information AMI_PREFIX = "https://raw.github.com/mesos/spark-ec2/v2/ami-list"; - class UsageError(Exception): pass @@ -450,38 +449,45 @@ def launch_cluster(conn, opts, cluster_name): print "Launched master in %s, regid = %s" % (zone, master_res.id) # Give the instances descriptive names -# TODO: Add retry logic for tagging with name since it's used to identify a cluster. for master in master_nodes: name = '{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id) -for i in range(0, 5): -try: -master.add_tag(key='Name', value=name) -except: -print "Failed attempt %i of 5 to tag %s" % ((i + 1), name) -if (i == 5): -raise "Error - failed max attempts to add name tag" -time.sleep(5) - +tag_instance(master, name) for slave in slave_nodes: name = '{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id) -for i in range(0, 5): -try: -slave.add_tag(key='Name', value=name) -except: -print "Failed attempt %i of 5 to tag %s" % ((i + 1), name) -if (i == 5): -raise "Error - failed max attempts to add name tag" -time.sleep(5) +tag_instance(slave, name) # Return all the instances return (master_nodes, slave_nodes) +def tag_instance(instance, name): +for i in range(0, 5): +try: +instance.add_tag(key='Name', value=name) +except: +print "Failed attempt %i of 5 to tag %s" % ((i + 1), name) +if (i == 5): +raise "Error - failed max attempts to add name tag" +time.sleep(5) # Get the EC2 instances in an existing cluster if available. # Returns a tuple of lists of EC2 instance objects for the masters and slaves def get_existing_cluster(conn, opts, cluster_name, die_on_error=True): print "Searching for existing cluster " + cluster_name + "..." +# Search all the spot instance requests, and copy any tags from the spot instance request to the cluster. +spot_instance_requests = conn.get_all_spot_instance_requests() +for req in spot_instance_requests: +if req.state != u'active': +continue +name = req.tags.get(u'Name', "") +if name.startswith(cluster_name): +reservations = conn.get_all_instances(instance_ids=[req.instance_id]) +for res in reservations: +active = [i for i in res.instances if is_active(i)] +for instance in active: +if (instance.tags.get(u'Name') == None): +tag_instance(instance, name) +# Now proceed to detect master and slaves instances. reservations = conn.get_all_instances() master_nodes = [] slave_nodes = [] @@ -504,7 +510,6 @@ def get_existing_cluster(conn, opts, cluster_name, die_on_error=True): print >> sys.stderr, "ERROR: Could not find any existing cluster" sys.exit(1) - # Deploy configuration files and run setup scripts on a newly launched # or started EC2 cluster. def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key): -
git commit: Spark-3213 Fixes issue with spark-ec2 not detecting slaves created with "Launch More like this"
Repository: spark Updated Branches: refs/heads/master 4fa2fda88 -> 7faf755ae Spark-3213 Fixes issue with spark-ec2 not detecting slaves created with "Launch More like this" ... copy the spark_cluster_tag from a spot instance requests over to the instances. Author: Vida Ha Closes #2163 from vidaha/vida/spark-3213 and squashes the following commits: 5070a70 [Vida Ha] Spark-3214 Fix issue with spark-ec2 not detecting slaves created with 'Launch More Like This' and using Spot Requests Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7faf755a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7faf755a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7faf755a Branch: refs/heads/master Commit: 7faf755ae4f0cf510048e432340260a6e609066d Parents: 4fa2fda Author: Vida Ha Authored: Wed Aug 27 14:26:06 2014 -0700 Committer: Josh Rosen Committed: Wed Aug 27 14:26:06 2014 -0700 -- ec2/spark_ec2.py | 45 + 1 file changed, 25 insertions(+), 20 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7faf755a/ec2/spark_ec2.py -- diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py index ddd72a0..ae4c488 100755 --- a/ec2/spark_ec2.py +++ b/ec2/spark_ec2.py @@ -40,7 +40,6 @@ from boto import ec2 # A URL prefix from which to fetch AMI information AMI_PREFIX = "https://raw.github.com/mesos/spark-ec2/v2/ami-list"; - class UsageError(Exception): pass @@ -464,38 +463,45 @@ def launch_cluster(conn, opts, cluster_name): print "Launched master in %s, regid = %s" % (zone, master_res.id) # Give the instances descriptive names -# TODO: Add retry logic for tagging with name since it's used to identify a cluster. for master in master_nodes: name = '{cn}-master-{iid}'.format(cn=cluster_name, iid=master.id) -for i in range(0, 5): -try: -master.add_tag(key='Name', value=name) -except: -print "Failed attempt %i of 5 to tag %s" % ((i + 1), name) -if (i == 5): -raise "Error - failed max attempts to add name tag" -time.sleep(5) - +tag_instance(master, name) for slave in slave_nodes: name = '{cn}-slave-{iid}'.format(cn=cluster_name, iid=slave.id) -for i in range(0, 5): -try: -slave.add_tag(key='Name', value=name) -except: -print "Failed attempt %i of 5 to tag %s" % ((i + 1), name) -if (i == 5): -raise "Error - failed max attempts to add name tag" -time.sleep(5) +tag_instance(slave, name) # Return all the instances return (master_nodes, slave_nodes) +def tag_instance(instance, name): +for i in range(0, 5): +try: +instance.add_tag(key='Name', value=name) +except: +print "Failed attempt %i of 5 to tag %s" % ((i + 1), name) +if (i == 5): +raise "Error - failed max attempts to add name tag" +time.sleep(5) # Get the EC2 instances in an existing cluster if available. # Returns a tuple of lists of EC2 instance objects for the masters and slaves def get_existing_cluster(conn, opts, cluster_name, die_on_error=True): print "Searching for existing cluster " + cluster_name + "..." +# Search all the spot instance requests, and copy any tags from the spot instance request to the cluster. +spot_instance_requests = conn.get_all_spot_instance_requests() +for req in spot_instance_requests: +if req.state != u'active': +continue +name = req.tags.get(u'Name', "") +if name.startswith(cluster_name): +reservations = conn.get_all_instances(instance_ids=[req.instance_id]) +for res in reservations: +active = [i for i in res.instances if is_active(i)] +for instance in active: +if (instance.tags.get(u'Name') == None): +tag_instance(instance, name) +# Now proceed to detect master and slaves instances. reservations = conn.get_all_instances() master_nodes = [] slave_nodes = [] @@ -518,7 +524,6 @@ def get_existing_cluster(conn, opts, cluster_name, die_on_error=True): print >> sys.stderr, "ERROR: Could not find any existing cluster" sys.exit(1) - # Deploy configuration files and run setup scripts on a newly launched # or started EC2 cluster. def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key): - To unsubscribe, e-mail: commits-unsubscr...@
git commit: [SPARK-3243] Don't use stale spark-driver.* system properties
Repository: spark Updated Branches: refs/heads/branch-1.1 3cb4e1718 -> c1ffa3e4c [SPARK-3243] Don't use stale spark-driver.* system properties If we set both `spark.driver.extraClassPath` and `--driver-class-path`, then the latter correctly overrides the former. However, the value of the system property `spark.driver.extraClassPath` still uses the former, which is actually not added to the class path. This may cause some confusion... Of course, this also affects other options (i.e. java options, library path, memory...). Author: Andrew Or Closes #2154 from andrewor14/driver-submit-configs-fix and squashes the following commits: 17ec6fc [Andrew Or] Fix tests 0140836 [Andrew Or] Don't forget spark.driver.memory e39d20f [Andrew Or] Also set spark.driver.extra* configs in client mode (cherry picked from commit 63a053ab140d7bf605e8c5b7fb5a7bd52aca29b2) Signed-off-by: Patrick Wendell Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c1ffa3e4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c1ffa3e4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c1ffa3e4 Branch: refs/heads/branch-1.1 Commit: c1ffa3e4cdfbd1f84b5c8d8de5d0fb958a19e211 Parents: 3cb4e17 Author: Andrew Or Authored: Wed Aug 27 14:46:56 2014 -0700 Committer: Patrick Wendell Committed: Wed Aug 27 14:47:05 2014 -0700 -- .../org/apache/spark/deploy/SparkSubmit.scala| 19 +-- 1 file changed, 9 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c1ffa3e4/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 550ee72..0fdb5ae 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -173,6 +173,14 @@ object SparkSubmit { OptionAssigner(args.master, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.master"), OptionAssigner(args.name, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.app.name"), OptionAssigner(args.jars, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.jars"), + OptionAssigner(args.driverMemory, ALL_CLUSTER_MGRS, CLIENT, +sysProp = "spark.driver.memory"), + OptionAssigner(args.driverExtraClassPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, +sysProp = "spark.driver.extraClassPath"), + OptionAssigner(args.driverExtraJavaOptions, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, +sysProp = "spark.driver.extraJavaOptions"), + OptionAssigner(args.driverExtraLibraryPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, +sysProp = "spark.driver.extraLibraryPath"), // Standalone cluster only OptionAssigner(args.driverMemory, STANDALONE, CLUSTER, clOption = "--memory"), @@ -202,16 +210,7 @@ object SparkSubmit { OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS, ALL_DEPLOY_MODES, sysProp = "spark.cores.max"), OptionAssigner(args.files, LOCAL | STANDALONE | MESOS, ALL_DEPLOY_MODES, -sysProp = "spark.files"), - - // Only process driver specific options for cluster mode here, - // because they have already been processed in bash for client mode - OptionAssigner(args.driverExtraClassPath, STANDALONE | YARN, CLUSTER, -sysProp = "spark.driver.extraClassPath"), - OptionAssigner(args.driverExtraJavaOptions, STANDALONE | YARN, CLUSTER, -sysProp = "spark.driver.extraJavaOptions"), - OptionAssigner(args.driverExtraLibraryPath, STANDALONE | YARN, CLUSTER, -sysProp = "spark.driver.extraLibraryPath") +sysProp = "spark.files") ) // In client mode, launch the application main class directly - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-3243] Don't use stale spark-driver.* system properties
Repository: spark Updated Branches: refs/heads/master 7faf755ae -> 63a053ab1 [SPARK-3243] Don't use stale spark-driver.* system properties If we set both `spark.driver.extraClassPath` and `--driver-class-path`, then the latter correctly overrides the former. However, the value of the system property `spark.driver.extraClassPath` still uses the former, which is actually not added to the class path. This may cause some confusion... Of course, this also affects other options (i.e. java options, library path, memory...). Author: Andrew Or Closes #2154 from andrewor14/driver-submit-configs-fix and squashes the following commits: 17ec6fc [Andrew Or] Fix tests 0140836 [Andrew Or] Don't forget spark.driver.memory e39d20f [Andrew Or] Also set spark.driver.extra* configs in client mode Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/63a053ab Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/63a053ab Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/63a053ab Branch: refs/heads/master Commit: 63a053ab140d7bf605e8c5b7fb5a7bd52aca29b2 Parents: 7faf755 Author: Andrew Or Authored: Wed Aug 27 14:46:56 2014 -0700 Committer: Patrick Wendell Committed: Wed Aug 27 14:46:56 2014 -0700 -- .../org/apache/spark/deploy/SparkSubmit.scala| 19 +-- 1 file changed, 9 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/63a053ab/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 550ee72..0fdb5ae 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -173,6 +173,14 @@ object SparkSubmit { OptionAssigner(args.master, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.master"), OptionAssigner(args.name, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.app.name"), OptionAssigner(args.jars, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, sysProp = "spark.jars"), + OptionAssigner(args.driverMemory, ALL_CLUSTER_MGRS, CLIENT, +sysProp = "spark.driver.memory"), + OptionAssigner(args.driverExtraClassPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, +sysProp = "spark.driver.extraClassPath"), + OptionAssigner(args.driverExtraJavaOptions, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, +sysProp = "spark.driver.extraJavaOptions"), + OptionAssigner(args.driverExtraLibraryPath, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, +sysProp = "spark.driver.extraLibraryPath"), // Standalone cluster only OptionAssigner(args.driverMemory, STANDALONE, CLUSTER, clOption = "--memory"), @@ -202,16 +210,7 @@ object SparkSubmit { OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS, ALL_DEPLOY_MODES, sysProp = "spark.cores.max"), OptionAssigner(args.files, LOCAL | STANDALONE | MESOS, ALL_DEPLOY_MODES, -sysProp = "spark.files"), - - // Only process driver specific options for cluster mode here, - // because they have already been processed in bash for client mode - OptionAssigner(args.driverExtraClassPath, STANDALONE | YARN, CLUSTER, -sysProp = "spark.driver.extraClassPath"), - OptionAssigner(args.driverExtraJavaOptions, STANDALONE | YARN, CLUSTER, -sysProp = "spark.driver.extraJavaOptions"), - OptionAssigner(args.driverExtraLibraryPath, STANDALONE | YARN, CLUSTER, -sysProp = "spark.driver.extraLibraryPath") +sysProp = "spark.files") ) // In client mode, launch the application main class directly - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-3252][SQL] Add missing condition for test
Repository: spark Updated Branches: refs/heads/master 63a053ab1 -> 28d41d627 [SPARK-3252][SQL] Add missing condition for test According to the text message, both relations should be tested. So add the missing condition. Author: viirya Closes #2159 from viirya/fix_test and squashes the following commits: b1c0f52 [viirya] add missing condition. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/28d41d62 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/28d41d62 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/28d41d62 Branch: refs/heads/master Commit: 28d41d627919fcb196d9d31bad65d664770bee67 Parents: 63a053a Author: viirya Authored: Wed Aug 27 14:55:05 2014 -0700 Committer: Michael Armbrust Committed: Wed Aug 27 14:55:05 2014 -0700 -- .../test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala| 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/28d41d62/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala -- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 8d6ca99..a35c40e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -150,7 +150,8 @@ class StatisticsSuite extends QueryTest with BeforeAndAfterAll { val sizes = rdd.queryExecution.analyzed.collect { case r if ct.runtimeClass.isAssignableFrom(r.getClass) => r.statistics.sizeInBytes } - assert(sizes.size === 2 && sizes(0) <= autoBroadcastJoinThreshold, + assert(sizes.size === 2 && sizes(0) <= autoBroadcastJoinThreshold +&& sizes(1) <= autoBroadcastJoinThreshold, s"query should contain two relations, each of which has size smaller than autoConvertSize") // Using `sparkPlan` because for relevant patterns in HashJoin to be - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-3252][SQL] Add missing condition for test
Repository: spark Updated Branches: refs/heads/branch-1.1 c1ffa3e4c -> b3d763b0b [SPARK-3252][SQL] Add missing condition for test According to the text message, both relations should be tested. So add the missing condition. Author: viirya Closes #2159 from viirya/fix_test and squashes the following commits: b1c0f52 [viirya] add missing condition. (cherry picked from commit 28d41d627919fcb196d9d31bad65d664770bee67) Signed-off-by: Michael Armbrust Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b3d763b0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b3d763b0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b3d763b0 Branch: refs/heads/branch-1.1 Commit: b3d763b0b7fc6345dac5d222414f902e4afdee13 Parents: c1ffa3e Author: viirya Authored: Wed Aug 27 14:55:05 2014 -0700 Committer: Michael Armbrust Committed: Wed Aug 27 15:04:35 2014 -0700 -- .../test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala| 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b3d763b0/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala -- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala index 8d6ca99..a35c40e 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala @@ -150,7 +150,8 @@ class StatisticsSuite extends QueryTest with BeforeAndAfterAll { val sizes = rdd.queryExecution.analyzed.collect { case r if ct.runtimeClass.isAssignableFrom(r.getClass) => r.statistics.sizeInBytes } - assert(sizes.size === 2 && sizes(0) <= autoBroadcastJoinThreshold, + assert(sizes.size === 2 && sizes(0) <= autoBroadcastJoinThreshold +&& sizes(1) <= autoBroadcastJoinThreshold, s"query should contain two relations, each of which has size smaller than autoConvertSize") // Using `sparkPlan` because for relevant patterns in HashJoin to be - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SQL] [SPARK-3236] Reading Parquet tables from Metastore mangles location
Repository: spark Updated Branches: refs/heads/master 28d41d627 -> cc275f4b7 [SQL] [SPARK-3236] Reading Parquet tables from Metastore mangles location Currently we do `relation.hiveQlTable.getDataLocation.getPath`, which returns the path-part of the URI (e.g., "s3n://my-bucket/my-path" => "/my-path"). We should do `relation.hiveQlTable.getDataLocation.toString` instead, as a URI's toString returns a faithful representation of the full URI, which can later be passed into a Hadoop Path. Author: Aaron Davidson Closes #2150 from aarondav/parquet-location and squashes the following commits: 459f72c [Aaron Davidson] [SQL] [SPARK-3236] Reading Parquet tables from Metastore mangles location Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cc275f4b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cc275f4b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cc275f4b Branch: refs/heads/master Commit: cc275f4b7910f6d0ad266a43bac2fdae58e9739e Parents: 28d41d6 Author: Aaron Davidson Authored: Wed Aug 27 15:05:47 2014 -0700 Committer: Michael Armbrust Committed: Wed Aug 27 15:05:47 2014 -0700 -- .../src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/cc275f4b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala index 10fa831..47e24f0 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala @@ -135,7 +135,7 @@ private[hive] trait HiveStrategies { .fakeOutput(projectList.map(_.toAttribute)):: Nil } else { hiveContext -.parquetFile(relation.hiveQlTable.getDataLocation.getPath) +.parquetFile(relation.hiveQlTable.getDataLocation.toString) .lowerCase .where(unresolvedOtherPredicates) .select(unresolvedProjection:_*) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SQL] [SPARK-3236] Reading Parquet tables from Metastore mangles location
Repository: spark Updated Branches: refs/heads/branch-1.1 b3d763b0b -> 77116875f [SQL] [SPARK-3236] Reading Parquet tables from Metastore mangles location Currently we do `relation.hiveQlTable.getDataLocation.getPath`, which returns the path-part of the URI (e.g., "s3n://my-bucket/my-path" => "/my-path"). We should do `relation.hiveQlTable.getDataLocation.toString` instead, as a URI's toString returns a faithful representation of the full URI, which can later be passed into a Hadoop Path. Author: Aaron Davidson Closes #2150 from aarondav/parquet-location and squashes the following commits: 459f72c [Aaron Davidson] [SQL] [SPARK-3236] Reading Parquet tables from Metastore mangles location (cherry picked from commit cc275f4b7910f6d0ad266a43bac2fdae58e9739e) Signed-off-by: Michael Armbrust Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/77116875 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/77116875 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/77116875 Branch: refs/heads/branch-1.1 Commit: 77116875f4184e0a637d9d7fd5b1dfeaabe0c9d3 Parents: b3d763b Author: Aaron Davidson Authored: Wed Aug 27 15:05:47 2014 -0700 Committer: Michael Armbrust Committed: Wed Aug 27 15:06:04 2014 -0700 -- .../src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/77116875/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala index 10fa831..47e24f0 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala @@ -135,7 +135,7 @@ private[hive] trait HiveStrategies { .fakeOutput(projectList.map(_.toAttribute)):: Nil } else { hiveContext -.parquetFile(relation.hiveQlTable.getDataLocation.getPath) +.parquetFile(relation.hiveQlTable.getDataLocation.toString) .lowerCase .where(unresolvedOtherPredicates) .select(unresolvedProjection:_*) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-3065][SQL] Add locale setting to fix results do not match for udf_unix_timestamp format "yyyy MMM dd h:mm:ss a" run with not "America/Los_Angeles" TimeZone in HiveCompatibilitySuite
Repository: spark Updated Branches: refs/heads/master cc275f4b7 -> 65253502b [SPARK-3065][SQL] Add locale setting to fix results do not match for udf_unix_timestamp format " MMM dd h:mm:ss a" run with not "America/Los_Angeles" TimeZone in HiveCompatibilitySuite When run the udf_unix_timestamp of org.apache.spark.sql.hive.execution.HiveCompatibilitySuite testcase with not "America/Los_Angeles" TimeZone throws error. [https://issues.apache.org/jira/browse/SPARK-3065] add locale setting on beforeAll and afterAll method to fix the bug of HiveCompatibilitySuite testcase Author: luogankun Closes #1968 from luogankun/SPARK-3065 and squashes the following commits: c167832 [luogankun] [SPARK-3065][SQL] Add Locale setting to HiveCompatibilitySuite 0a25e3a [luogankun] [SPARK-3065][SQL] Add Locale setting to HiveCompatibilitySuite Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/65253502 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/65253502 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/65253502 Branch: refs/heads/master Commit: 65253502b913f390b26b9b631380b2c6cf1ccdf7 Parents: cc275f4 Author: luogankun Authored: Wed Aug 27 15:08:22 2014 -0700 Committer: Michael Armbrust Committed: Wed Aug 27 15:08:22 2014 -0700 -- .../spark/sql/hive/execution/HiveCompatibilitySuite.scala| 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/65253502/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala -- diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index 7a69e3c..035fd32 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.hive.execution import java.io.File -import java.util.TimeZone +import java.util.{Locale, TimeZone} import org.scalatest.BeforeAndAfter @@ -33,6 +33,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { File.separator + "test" + File.separator + "queries" + File.separator + "clientpositive") var originalTimeZone: TimeZone = _ + var originalLocale: Locale = _ def testCases = hiveQueryDir.listFiles.map(f => f.getName.stripSuffix(".q") -> f) @@ -41,11 +42,16 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*) originalTimeZone = TimeZone.getDefault TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles")) + +// Add Locale setting +originalLocale = Locale.getDefault +Locale.setDefault(Locale.US) } override def afterAll() { TestHive.cacheTables = false TimeZone.setDefault(originalTimeZone) +Locale.setDefault(originalLocale) } /** A list of tests deemed out of scope currently and thus completely disregarded. */ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-3065][SQL] Add locale setting to fix results do not match for udf_unix_timestamp format "yyyy MMM dd h:mm:ss a" run with not "America/Los_Angeles" TimeZone in HiveCompatibilitySuite
Repository: spark Updated Branches: refs/heads/branch-1.1 77116875f -> 5ea260ebd [SPARK-3065][SQL] Add locale setting to fix results do not match for udf_unix_timestamp format " MMM dd h:mm:ss a" run with not "America/Los_Angeles" TimeZone in HiveCompatibilitySuite When run the udf_unix_timestamp of org.apache.spark.sql.hive.execution.HiveCompatibilitySuite testcase with not "America/Los_Angeles" TimeZone throws error. [https://issues.apache.org/jira/browse/SPARK-3065] add locale setting on beforeAll and afterAll method to fix the bug of HiveCompatibilitySuite testcase Author: luogankun Closes #1968 from luogankun/SPARK-3065 and squashes the following commits: c167832 [luogankun] [SPARK-3065][SQL] Add Locale setting to HiveCompatibilitySuite 0a25e3a [luogankun] [SPARK-3065][SQL] Add Locale setting to HiveCompatibilitySuite (cherry picked from commit 65253502b913f390b26b9b631380b2c6cf1ccdf7) Signed-off-by: Michael Armbrust Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5ea260eb Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5ea260eb Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5ea260eb Branch: refs/heads/branch-1.1 Commit: 5ea260ebd1acbbe9705849a16ee67758e33c65b0 Parents: 7711687 Author: luogankun Authored: Wed Aug 27 15:08:22 2014 -0700 Committer: Michael Armbrust Committed: Wed Aug 27 15:08:34 2014 -0700 -- .../spark/sql/hive/execution/HiveCompatibilitySuite.scala| 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5ea260eb/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala -- diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index 7a69e3c..035fd32 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -18,7 +18,7 @@ package org.apache.spark.sql.hive.execution import java.io.File -import java.util.TimeZone +import java.util.{Locale, TimeZone} import org.scalatest.BeforeAndAfter @@ -33,6 +33,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { File.separator + "test" + File.separator + "queries" + File.separator + "clientpositive") var originalTimeZone: TimeZone = _ + var originalLocale: Locale = _ def testCases = hiveQueryDir.listFiles.map(f => f.getName.stripSuffix(".q") -> f) @@ -41,11 +42,16 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*) originalTimeZone = TimeZone.getDefault TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles")) + +// Add Locale setting +originalLocale = Locale.getDefault +Locale.setDefault(Locale.US) } override def afterAll() { TestHive.cacheTables = false TimeZone.setDefault(originalTimeZone) +Locale.setDefault(originalLocale) } /** A list of tests deemed out of scope currently and thus completely disregarded. */ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-3235][SQL] Ensure in-memory tables don't always broadcast.
Repository: spark Updated Branches: refs/heads/master 65253502b -> 7d2a7a91f [SPARK-3235][SQL] Ensure in-memory tables don't always broadcast. Author: Michael Armbrust Closes #2147 from marmbrus/inMemDefaultSize and squashes the following commits: 5390360 [Michael Armbrust] Merge remote-tracking branch 'origin/master' into inMemDefaultSize 14204d3 [Michael Armbrust] Set the context before creating SparkLogicalPlans. 8da4414 [Michael Armbrust] Make sure we throw errors when leaf nodes fail to provide statistcs 18ce029 [Michael Armbrust] Ensure in-memory tables don't always broadcast. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7d2a7a91 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7d2a7a91 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7d2a7a91 Branch: refs/heads/master Commit: 7d2a7a91f263bb9fbf24dc4dbffde8fe5e2c7442 Parents: 6525350 Author: Michael Armbrust Authored: Wed Aug 27 15:14:08 2014 -0700 Committer: Michael Armbrust Committed: Wed Aug 27 15:14:08 2014 -0700 -- .../sql/catalyst/plans/logical/LogicalPlan.scala | 14 -- .../main/scala/org/apache/spark/sql/SQLContext.scala | 4 +++- .../sql/columnar/InMemoryColumnarTableScan.scala | 3 +++ .../org/apache/spark/sql/execution/SparkPlan.scala| 2 +- .../sql/columnar/InMemoryColumnarQuerySuite.scala | 8 5 files changed, 23 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7d2a7a91/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 8616ac4..f81d911 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -41,9 +41,14 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] { case class Statistics( sizeInBytes: BigInt ) - lazy val statistics: Statistics = Statistics( -sizeInBytes = children.map(_.statistics).map(_.sizeInBytes).product - ) + lazy val statistics: Statistics = { +if (children.size == 0) { + throw new UnsupportedOperationException(s"LeafNode $nodeName must implement statistics.") +} + +Statistics( + sizeInBytes = children.map(_.statistics).map(_.sizeInBytes).product) + } /** * Returns the set of attributes that this node takes as @@ -117,9 +122,6 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] { */ abstract class LeafNode extends LogicalPlan with trees.LeafNode[LogicalPlan] { self: Product => - - override lazy val statistics: Statistics = -throw new UnsupportedOperationException(s"LeafNode $nodeName must implement statistics.") } /** http://git-wip-us.apache.org/repos/asf/spark/blob/7d2a7a91/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 6f0eed3..a75af94 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -89,8 +89,10 @@ class SQLContext(@transient val sparkContext: SparkContext) * * @group userf */ - implicit def createSchemaRDD[A <: Product: TypeTag](rdd: RDD[A]) = + implicit def createSchemaRDD[A <: Product: TypeTag](rdd: RDD[A]) = { +SparkPlan.currentContext.set(self) new SchemaRDD(this, SparkLogicalPlan(ExistingRdd.fromProductRdd(rdd))(self)) + } /** * :: DeveloperApi :: http://git-wip-us.apache.org/repos/asf/spark/blob/7d2a7a91/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala index 24e88ee..bc36bac 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala @@ -39,6 +39,9 @@ private[sql] case class InMemoryRelation( (private var _cachedColumnBuffers: RDD[Array[ByteBuffer]] = null) extends LogicalPlan with MultiInstanceRelation { + override lazy val statistics = +Statistics(sizeInBytes = chi
git commit: [SPARK-3235][SQL] Ensure in-memory tables don't always broadcast.
Repository: spark Updated Branches: refs/heads/branch-1.1 5ea260ebd -> 9a62cf365 [SPARK-3235][SQL] Ensure in-memory tables don't always broadcast. Author: Michael Armbrust Closes #2147 from marmbrus/inMemDefaultSize and squashes the following commits: 5390360 [Michael Armbrust] Merge remote-tracking branch 'origin/master' into inMemDefaultSize 14204d3 [Michael Armbrust] Set the context before creating SparkLogicalPlans. 8da4414 [Michael Armbrust] Make sure we throw errors when leaf nodes fail to provide statistcs 18ce029 [Michael Armbrust] Ensure in-memory tables don't always broadcast. (cherry picked from commit 7d2a7a91f263bb9fbf24dc4dbffde8fe5e2c7442) Signed-off-by: Michael Armbrust Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9a62cf36 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9a62cf36 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9a62cf36 Branch: refs/heads/branch-1.1 Commit: 9a62cf3655dcab49b5c0f94ad094603eaf288251 Parents: 5ea260e Author: Michael Armbrust Authored: Wed Aug 27 15:14:08 2014 -0700 Committer: Michael Armbrust Committed: Wed Aug 27 15:14:29 2014 -0700 -- .../sql/catalyst/plans/logical/LogicalPlan.scala | 14 -- .../main/scala/org/apache/spark/sql/SQLContext.scala | 4 +++- .../sql/columnar/InMemoryColumnarTableScan.scala | 3 +++ .../org/apache/spark/sql/execution/SparkPlan.scala| 2 +- .../sql/columnar/InMemoryColumnarQuerySuite.scala | 8 5 files changed, 23 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9a62cf36/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 8616ac4..f81d911 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -41,9 +41,14 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] { case class Statistics( sizeInBytes: BigInt ) - lazy val statistics: Statistics = Statistics( -sizeInBytes = children.map(_.statistics).map(_.sizeInBytes).product - ) + lazy val statistics: Statistics = { +if (children.size == 0) { + throw new UnsupportedOperationException(s"LeafNode $nodeName must implement statistics.") +} + +Statistics( + sizeInBytes = children.map(_.statistics).map(_.sizeInBytes).product) + } /** * Returns the set of attributes that this node takes as @@ -117,9 +122,6 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] { */ abstract class LeafNode extends LogicalPlan with trees.LeafNode[LogicalPlan] { self: Product => - - override lazy val statistics: Statistics = -throw new UnsupportedOperationException(s"LeafNode $nodeName must implement statistics.") } /** http://git-wip-us.apache.org/repos/asf/spark/blob/9a62cf36/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 6f0eed3..a75af94 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -89,8 +89,10 @@ class SQLContext(@transient val sparkContext: SparkContext) * * @group userf */ - implicit def createSchemaRDD[A <: Product: TypeTag](rdd: RDD[A]) = + implicit def createSchemaRDD[A <: Product: TypeTag](rdd: RDD[A]) = { +SparkPlan.currentContext.set(self) new SchemaRDD(this, SparkLogicalPlan(ExistingRdd.fromProductRdd(rdd))(self)) + } /** * :: DeveloperApi :: http://git-wip-us.apache.org/repos/asf/spark/blob/9a62cf36/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala index 24e88ee..bc36bac 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/columnar/InMemoryColumnarTableScan.scala @@ -39,6 +39,9 @@ private[sql] case class InMemoryRelation( (private var _cachedColumnBuffers: RDD[Array[ByteBuffer]] = null) extends
git commit: HOTFIX: Don't build with YARN support for Mapr3
Repository: spark Updated Branches: refs/heads/master 7d2a7a91f -> 8712653f1 HOTFIX: Don't build with YARN support for Mapr3 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8712653f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8712653f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8712653f Branch: refs/heads/master Commit: 8712653f11b9730f6e7ef1c99c8c5850154abc56 Parents: 7d2a7a9 Author: Patrick Wendell Authored: Wed Aug 27 15:40:40 2014 -0700 Committer: Patrick Wendell Committed: Wed Aug 27 15:41:09 2014 -0700 -- dev/create-release/create-release.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8712653f/dev/create-release/create-release.sh -- diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh index eab6313..7549fbb 100755 --- a/dev/create-release/create-release.sh +++ b/dev/create-release/create-release.sh @@ -121,7 +121,7 @@ make_binary_release "hadoop1" "-Phive -Dhadoop.version=1.0.4" & make_binary_release "hadoop2.3" "-Phadoop-2.3 -Phive -Pyarn" & make_binary_release "hadoop2.4" "-Phadoop-2.4 -Phive -Pyarn" & make_binary_release "hadoop2.4-without-hive" "-Phadoop-2.4 -Pyarn" & -make_binary_release "mapr3" "-Pmapr3 -Pyarn -Phive" & +make_binary_release "mapr3" "-Pmapr3 -Phive" & make_binary_release "mapr4" "-Pmapr4 -Pyarn -Phive" & wait - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[1/2] git commit: Revert "[maven-release-plugin] prepare for next development iteration"
Repository: spark Updated Branches: refs/heads/branch-1.1 9a62cf365 -> 0b17c7d4f Revert "[maven-release-plugin] prepare for next development iteration" This reverts commit 9af3fb7385d1f9f221962f1d2d725ff79bd82033. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0c03fb62 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0c03fb62 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0c03fb62 Branch: refs/heads/branch-1.1 Commit: 0c03fb621e5b080f24863cfc17032bd828b65b99 Parents: 9a62cf3 Author: Patrick Wendell Authored: Wed Aug 27 15:48:00 2014 -0700 Committer: Patrick Wendell Committed: Wed Aug 27 15:48:00 2014 -0700 -- assembly/pom.xml | 2 +- bagel/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka/pom.xml| 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml | 2 +- external/zeromq/pom.xml | 2 +- extras/kinesis-asl/pom.xml| 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- mllib/pom.xml | 2 +- pom.xml | 4 ++-- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- yarn/pom.xml | 2 +- yarn/stable/pom.xml | 2 +- 24 files changed, 25 insertions(+), 25 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0c03fb62/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 4709b7d..799f8d9 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.1-SNAPSHOT +1.1.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/0c03fb62/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index f29540b..8eec7e5 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.1-SNAPSHOT +1.1.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/0c03fb62/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index debc4dd..83e6026 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.1-SNAPSHOT +1.1.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/0c03fb62/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index f35d3d6..9bde90e 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.1-SNAPSHOT +1.1.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/0c03fb62/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index 17d0fe2..daaae05 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.1-SNAPSHOT +1.1.0 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/0c03fb62/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index 402af35..830eb32 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.1-SNAPSHOT +1.1.0 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/0c03fb62/external/kafka/pom.xml -- diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml index 5123d05..e3df553 100644 --- a/external/kafka/pom.xml +++ b/external/kafka/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.1-SNAPSHOT +1.1.0 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/0c03fb62/external/mqtt/pom.xml -- diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml index 9c00bfc..1f9e52b 100644 --- a/external/mqtt/pom.xml +++ b
[2/2] git commit: Revert "[maven-release-plugin] prepare release v1.1.0-snapshot2"
Revert "[maven-release-plugin] prepare release v1.1.0-snapshot2" This reverts commit e1535ad3c6f7400f2b7915ea91da9c60510557ba. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0b17c7d4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0b17c7d4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0b17c7d4 Branch: refs/heads/branch-1.1 Commit: 0b17c7d4f2176f0c0e8aaab95e034be54467ff30 Parents: 0c03fb6 Author: Patrick Wendell Authored: Wed Aug 27 15:48:13 2014 -0700 Committer: Patrick Wendell Committed: Wed Aug 27 15:48:13 2014 -0700 -- assembly/pom.xml | 6 +++--- bagel/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka/pom.xml| 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml | 2 +- external/zeromq/pom.xml | 2 +- extras/kinesis-asl/pom.xml| 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- mllib/pom.xml | 2 +- pom.xml | 9 + repl/pom.xml | 2 +- sql/catalyst/pom.xml | 5 +++-- sql/core/pom.xml | 5 +++-- sql/hive-thriftserver/pom.xml | 5 +++-- sql/hive/pom.xml | 5 +++-- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- yarn/pom.xml | 2 +- yarn/stable/pom.xml | 2 +- 24 files changed, 38 insertions(+), 33 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0b17c7d4/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 799f8d9..9fbb037 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.0 +1.1.0-SNAPSHOT ../pom.xml @@ -124,8 +124,8 @@ log4j.properties - - + + http://git-wip-us.apache.org/repos/asf/spark/blob/0b17c7d4/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index 8eec7e5..bd51b11 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.0 +1.1.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/0b17c7d4/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index 83e6026..6d8be37 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.0 +1.1.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/0b17c7d4/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index 9bde90e..8c4c128 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.0 +1.1.0-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/0b17c7d4/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index daaae05..b345276 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.0 +1.1.0-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/0b17c7d4/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index 830eb32..f71f6b6 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.0 +1.1.0-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/0b17c7d4/external/kafka/pom.xml -- diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml index e3df553..4e2275a 100644 --- a/external/kafka/pom.xml +++ b/external/kafka/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.0 +1.1.0-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/0b17c7d4/external/mqtt/pom.xml -
git commit: Add line continuation for script to work w/ py2.7.5
Repository: spark Updated Branches: refs/heads/master 8712653f1 -> 64d8ecbbe Add line continuation for script to work w/ py2.7.5 Error was - $ SPARK_HOME=$PWD/dist ./dev/create-release/generate-changelist.py File "./dev/create-release/generate-changelist.py", line 128 if day < SPARK_REPO_CHANGE_DATE1 or ^ SyntaxError: invalid syntax Author: Matthew Farrellee Closes #2139 from mattf/master-fix-generate-changelist.py-0 and squashes the following commits: 6b3a900 [Matthew Farrellee] Add line continuation for script to work w/ py2.7.5 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/64d8ecbb Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/64d8ecbb Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/64d8ecbb Branch: refs/heads/master Commit: 64d8ecbbe94c47236ff2d8c94d7401636ba6fca4 Parents: 8712653 Author: Matthew Farrellee Authored: Wed Aug 27 15:50:30 2014 -0700 Committer: Patrick Wendell Committed: Wed Aug 27 15:50:30 2014 -0700 -- dev/create-release/generate-changelist.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/64d8ecbb/dev/create-release/generate-changelist.py -- diff --git a/dev/create-release/generate-changelist.py b/dev/create-release/generate-changelist.py index de1b5d4..2e1a35a 100755 --- a/dev/create-release/generate-changelist.py +++ b/dev/create-release/generate-changelist.py @@ -125,8 +125,8 @@ for h in hashes: pr_num = [line.split()[1].lstrip("#") for line in body_lines if "Closes #" in line][0] github_url = "github.com/apache/spark/pull/%s" % pr_num day = time.strptime(date.split()[0], "%Y-%m-%d") -if day < SPARK_REPO_CHANGE_DATE1 or -(day < SPARK_REPO_CHANGE_DATE2 and pr_num < SPARK_REPO_PR_NUM_THRESH): +if (day < SPARK_REPO_CHANGE_DATE1 or +(day < SPARK_REPO_CHANGE_DATE2 and pr_num < SPARK_REPO_PR_NUM_THRESH)): github_url = "github.com/apache/incubator-spark/pull/%s" % pr_num append_to_changelist(" %s" % subject) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: Add line continuation for script to work w/ py2.7.5
Repository: spark Updated Branches: refs/heads/branch-1.1 0b17c7d4f -> d4cf7a068 Add line continuation for script to work w/ py2.7.5 Error was - $ SPARK_HOME=$PWD/dist ./dev/create-release/generate-changelist.py File "./dev/create-release/generate-changelist.py", line 128 if day < SPARK_REPO_CHANGE_DATE1 or ^ SyntaxError: invalid syntax Author: Matthew Farrellee Closes #2139 from mattf/master-fix-generate-changelist.py-0 and squashes the following commits: 6b3a900 [Matthew Farrellee] Add line continuation for script to work w/ py2.7.5 (cherry picked from commit 64d8ecbbe94c47236ff2d8c94d7401636ba6fca4) Signed-off-by: Patrick Wendell Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d4cf7a06 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d4cf7a06 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d4cf7a06 Branch: refs/heads/branch-1.1 Commit: d4cf7a068da099f0f07f04a834d7edf6b743ceb3 Parents: 0b17c7d Author: Matthew Farrellee Authored: Wed Aug 27 15:50:30 2014 -0700 Committer: Patrick Wendell Committed: Wed Aug 27 15:50:37 2014 -0700 -- dev/create-release/generate-changelist.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d4cf7a06/dev/create-release/generate-changelist.py -- diff --git a/dev/create-release/generate-changelist.py b/dev/create-release/generate-changelist.py index de1b5d4..2e1a35a 100755 --- a/dev/create-release/generate-changelist.py +++ b/dev/create-release/generate-changelist.py @@ -125,8 +125,8 @@ for h in hashes: pr_num = [line.split()[1].lstrip("#") for line in body_lines if "Closes #" in line][0] github_url = "github.com/apache/spark/pull/%s" % pr_num day = time.strptime(date.split()[0], "%Y-%m-%d") -if day < SPARK_REPO_CHANGE_DATE1 or -(day < SPARK_REPO_CHANGE_DATE2 and pr_num < SPARK_REPO_PR_NUM_THRESH): +if (day < SPARK_REPO_CHANGE_DATE1 or +(day < SPARK_REPO_CHANGE_DATE2 and pr_num < SPARK_REPO_PR_NUM_THRESH)): github_url = "github.com/apache/incubator-spark/pull/%s" % pr_num append_to_changelist(" %s" % subject) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[1/3] BUILD: Updating CHANGES.txt for Spark 1.1
Repository: spark Updated Branches: refs/heads/branch-1.1 d4cf7a068 -> 8597e9cf3 http://git-wip-us.apache.org/repos/asf/spark/blob/8597e9cf/dev/create-release/generate-changelist.py -- diff --git a/dev/create-release/generate-changelist.py b/dev/create-release/generate-changelist.py index 2e1a35a..916ec90 100755 --- a/dev/create-release/generate-changelist.py +++ b/dev/create-release/generate-changelist.py @@ -31,8 +31,8 @@ import time import traceback SPARK_HOME = os.environ["SPARK_HOME"] -NEW_RELEASE_VERSION = "1.0.0" -PREV_RELEASE_GIT_TAG = "v0.9.1" +NEW_RELEASE_VERSION = "1.1.0" +PREV_RELEASE_GIT_TAG = "v1.0.0" CHANGELIST = "CHANGES.txt" OLD_CHANGELIST = "%s.old" % (CHANGELIST) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/3] BUILD: Updating CHANGES.txt for Spark 1.1
http://git-wip-us.apache.org/repos/asf/spark/blob/8597e9cf/CHANGES.txt -- diff --git a/CHANGES.txt b/CHANGES.txt new file mode 100644 index 000..6efb022 --- /dev/null +++ b/CHANGES.txt @@ -0,0 +1,14470 @@ +Spark Change Log + + +Release 1.1.0 + + Add line continuation for script to work w/ py2.7.5 + Matthew Farrellee + 2014-08-27 15:50:30 -0700 + Commit: d4cf7a0, github.com/apache/spark/pull/2139 + + [SPARK-3235][SQL] Ensure in-memory tables don't always broadcast. + Michael Armbrust + 2014-08-27 15:14:08 -0700 + Commit: 9a62cf3, github.com/apache/spark/pull/2147 + + [SPARK-3065][SQL] Add locale setting to fix results do not match for udf_unix_timestamp format " MMM dd h:mm:ss a" run with not "America/Los_Angeles" TimeZone in HiveCompatibilitySuite + luogankun + 2014-08-27 15:08:22 -0700 + Commit: 5ea260e, github.com/apache/spark/pull/1968 + + [SQL] [SPARK-3236] Reading Parquet tables from Metastore mangles location + Aaron Davidson + 2014-08-27 15:05:47 -0700 + Commit: 7711687, github.com/apache/spark/pull/2150 + + [SPARK-3252][SQL] Add missing condition for test + viirya + 2014-08-27 14:55:05 -0700 + Commit: b3d763b, github.com/apache/spark/pull/2159 + + [SPARK-3243] Don't use stale spark-driver.* system properties + Andrew Or + 2014-08-27 14:46:56 -0700 + Commit: c1ffa3e, github.com/apache/spark/pull/2154 + + Spark-3213 Fixes issue with spark-ec2 not detecting slaves created with "Launch More like this" + Vida Ha + 2014-08-27 14:26:06 -0700 + Commit: 3cb4e17, github.com/apache/spark/pull/2163 + + [SPARK-3138][SQL] sqlContext.parquetFile should be able to take a single file as parameter + chutium + 2014-08-27 13:13:04 -0700 + Commit: 90f8f3e, github.com/apache/spark/pull/2044 + + [SPARK-3197] [SQL] Reduce the Expression tree object creations for aggregation function (min/max) + Cheng Hao + 2014-08-27 12:50:47 -0700 + Commit: 4c7f082, github.com/apache/spark/pull/2113 + + [SPARK-3118][SQL]add "SHOW TBLPROPERTIES tblname;" and "SHOW COLUMNS (FROM|IN) table_name [(FROM|IN) db_name]" support + u0jing + 2014-08-27 12:47:14 -0700 + Commit: 19cda07, github.com/apache/spark/pull/2034 + + SPARK-3259 - User data should be given to the master + Allan Douglas R. de Oliveira + 2014-08-27 12:43:22 -0700 + Commit: 0c94a5b, github.com/apache/spark/pull/2162 + + [SPARK-2608][Core] Fixed command line option passing issue over Mesos via SPARK_EXECUTOR_OPTS + Cheng Lian + 2014-08-27 12:39:21 -0700 + Commit: 935bffe, github.com/apache/spark/pull/2161 + + [SPARK-3239] [PySpark] randomize the dirs for each process + Davies Liu + 2014-08-27 10:40:35 -0700 + Commit: 092121e, github.com/apache/spark/pull/2152 + + [SPARK-3170][CORE][BUG]:RDD info loss in "StorageTab" and "ExecutorTab" + uncleGen + 2014-08-27 10:32:13 -0700 + Commit: 8f8e2a4, github.com/apache/spark/pull/2131 + + [SPARK-3154][STREAMING] Make FlumePollingInputDStream shutdown cleaner. + Hari Shreedharan + 2014-08-27 02:39:02 -0700 + Commit: 1d468df, github.com/apache/spark/pull/2065 + + [SPARK-3227] [mllib] Added migration guide for v1.0 to v1.1 + Joseph K. Bradley + 2014-08-27 01:45:59 -0700 + Commit: 7286d57, github.com/apache/spark/pull/2146 + + [SPARK-2830][MLLIB] doc update for 1.1 + Xiangrui Meng + 2014-08-27 01:19:48 -0700 + Commit: 7401247, github.com/apache/spark/pull/2151 + + [SPARK-3237][SQL] Fix parquet filters with UDFs + Michael Armbrust + 2014-08-27 00:59:23 -0700 + Commit: ca01de1, github.com/apache/spark/pull/2153 + + [SPARK-3139] Made ContextCleaner to not block on shuffles + Tathagata Das + 2014-08-27 00:13:38 -0700 + Commit: 5cf1e44, github.com/apache/spark/pull/2143 + + HOTFIX: Minor typo in conf template + Patrick Wendell + 2014-08-26 23:40:50 -0700 + Commit: 6f82a4b + + [SPARK-3167] Handle special driver configs in Windows (Branch 1.1) + Andrew Or + 2014-08-26 23:06:11 -0700 + Commit: e7672f1, github.com/apache/spark/pull/2156 + + [SPARK-3224] FetchFailed reduce stages should only show up once in failed stages (in UI) + Reynold Xin , Kay Ousterhout + 2014-08-26 21:59:48 -0700 + Commit: 2381e90, github.com/apache/spark/pull/2127 + + Fix unclosed HTML tag in Yarn docs. + Josh Rosen + 2014-08-26 18:55:00 -0700 + Commit: 7726e56 + + [SPARK-3036][SPARK-3037][SQL] Add MapType/ArrayType containing null value support to Parquet. + Takuya UESHIN + 2014-08-26 18:28:41 -0700 + Commit: 8b5af6f, github.com/apache/spark/pull/2032 + + [Docs] Run tests like in contributing guide + nchammas + 2014-08-26 17:50:04 -0700 + Commit: 0d97233, github.com/apache/spark/pull/2149 + + [SPARK-2964] [SQL] Remove duplicated code from spark-sql and start-thriftserver.sh + Cheng Lian , Kousuke Saruta + 2014-08-26 17:33:40 -0700 + Commit: c0e1f99, github.com/apache/spark/pull/1886 + + [SPARK-3194][SQL] Add AttributeSet to fix bugs with invalid comp
[3/3] git commit: BUILD: Updating CHANGES.txt for Spark 1.1
BUILD: Updating CHANGES.txt for Spark 1.1 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8597e9cf Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8597e9cf Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8597e9cf Branch: refs/heads/branch-1.1 Commit: 8597e9cf356b0d8e17600a49efc4c4a0356ecb5d Parents: d4cf7a0 Author: Patrick Wendell Authored: Wed Aug 27 15:55:59 2014 -0700 Committer: Patrick Wendell Committed: Wed Aug 27 15:56:08 2014 -0700 -- CHANGES.txt | 14470 +++ dev/create-release/generate-changelist.py | 4 +- 2 files changed, 14472 insertions(+), 2 deletions(-) -- - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [maven-release-plugin] prepare release v1.1.0-rc1
Repository: spark Updated Branches: refs/heads/branch-1.1 8597e9cf3 -> 58b0be6a2 [maven-release-plugin] prepare release v1.1.0-rc1 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/58b0be6a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/58b0be6a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/58b0be6a Branch: refs/heads/branch-1.1 Commit: 58b0be6a29eab817d350729710345e9f39e4c506 Parents: 8597e9c Author: Patrick Wendell Authored: Wed Aug 27 23:28:08 2014 + Committer: Patrick Wendell Committed: Wed Aug 27 23:28:08 2014 + -- assembly/pom.xml | 6 +++--- bagel/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka/pom.xml| 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml | 2 +- external/zeromq/pom.xml | 2 +- extras/kinesis-asl/pom.xml| 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- mllib/pom.xml | 2 +- pom.xml | 9 - repl/pom.xml | 2 +- sql/catalyst/pom.xml | 5 ++--- sql/core/pom.xml | 5 ++--- sql/hive-thriftserver/pom.xml | 5 ++--- sql/hive/pom.xml | 5 ++--- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- yarn/pom.xml | 2 +- yarn/stable/pom.xml | 2 +- 24 files changed, 33 insertions(+), 38 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/58b0be6a/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 9fbb037..799f8d9 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.0-SNAPSHOT +1.1.0 ../pom.xml @@ -124,8 +124,8 @@ log4j.properties - - + + http://git-wip-us.apache.org/repos/asf/spark/blob/58b0be6a/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index bd51b11..8eec7e5 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.0-SNAPSHOT +1.1.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/58b0be6a/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index 6d8be37..83e6026 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.0-SNAPSHOT +1.1.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/58b0be6a/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index 8c4c128..9bde90e 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.0-SNAPSHOT +1.1.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/58b0be6a/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index b345276..daaae05 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.0-SNAPSHOT +1.1.0 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/58b0be6a/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index f71f6b6..830eb32 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.0-SNAPSHOT +1.1.0 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/58b0be6a/external/kafka/pom.xml -- diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml index 4e2275a..e3df553 100644 --- a/external/kafka/pom.xml +++ b/external/kafka/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.0-SNAPSHOT +1.1.0 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/58b0be6a/external/mqtt/pom.xml --
git commit: [maven-release-plugin] prepare for next development iteration
Repository: spark Updated Branches: refs/heads/branch-1.1 58b0be6a2 -> 78e3c036e [maven-release-plugin] prepare for next development iteration Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/78e3c036 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/78e3c036 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/78e3c036 Branch: refs/heads/branch-1.1 Commit: 78e3c036eee7113b2ed144eec5061e070b479e56 Parents: 58b0be6 Author: Patrick Wendell Authored: Wed Aug 27 23:28:27 2014 + Committer: Patrick Wendell Committed: Wed Aug 27 23:28:27 2014 + -- assembly/pom.xml | 2 +- bagel/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka/pom.xml| 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml | 2 +- external/zeromq/pom.xml | 2 +- extras/kinesis-asl/pom.xml| 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- mllib/pom.xml | 2 +- pom.xml | 4 ++-- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- yarn/pom.xml | 2 +- yarn/stable/pom.xml | 2 +- 24 files changed, 25 insertions(+), 25 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/78e3c036/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 799f8d9..4709b7d 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.0 +1.1.1-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/78e3c036/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index 8eec7e5..f29540b 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.0 +1.1.1-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/78e3c036/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index 83e6026..debc4dd 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.0 +1.1.1-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/78e3c036/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index 9bde90e..f35d3d6 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.0 +1.1.1-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/78e3c036/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index daaae05..17d0fe2 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.0 +1.1.1-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/78e3c036/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index 830eb32..402af35 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.0 +1.1.1-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/78e3c036/external/kafka/pom.xml -- diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml index e3df553..5123d05 100644 --- a/external/kafka/pom.xml +++ b/external/kafka/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.0 +1.1.1-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/78e3c036/external/mqtt/pom.xml -- diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml index 1f9e52b..9c00bfc 100644 --- a/external/mqtt/pom.xml +++ b/external/mqtt/pom.xml @@ -21,7 +21,7 @@ org.apache.spark
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.1.0-rc1 [created] 1dc825d90 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [SPARK-3271] delete unused methods in Utils
Repository: spark Updated Branches: refs/heads/master 64d8ecbbe -> b86277c13 [SPARK-3271] delete unused methods in Utils delete no used method in Utils Author: scwf Closes #2160 from scwf/delete-no-use-method and squashes the following commits: d8f6b0d [scwf] delete no use method in Utils Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b86277c1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b86277c1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b86277c1 Branch: refs/heads/master Commit: b86277c13232c3e65ce6c6cf7f6ede6a00546485 Parents: 64d8ecb Author: scwf Authored: Wed Aug 27 19:44:26 2014 -0700 Committer: Matei Zaharia Committed: Wed Aug 27 19:44:30 2014 -0700 -- .../scala/org/apache/spark/util/Utils.scala | 37 1 file changed, 37 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b86277c1/core/src/main/scala/org/apache/spark/util/Utils.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 69a84a3..86f646d 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -52,11 +52,6 @@ private[spark] case class CallSite(shortForm: String, longForm: String) private[spark] object Utils extends Logging { val random = new Random() - def sparkBin(sparkHome: String, which: String): File = { -val suffix = if (isWindows) ".cmd" else "" -new File(sparkHome + File.separator + "bin", which + suffix) - } - /** Serialize an object using Java serialization */ def serialize[T](o: T): Array[Byte] = { val bos = new ByteArrayOutputStream() @@ -162,30 +157,6 @@ private[spark] object Utils extends Logging { } } - def isAlpha(c: Char): Boolean = { -(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') - } - - /** Split a string into words at non-alphabetic characters */ - def splitWords(s: String): Seq[String] = { -val buf = new ArrayBuffer[String] -var i = 0 -while (i < s.length) { - var j = i - while (j < s.length && isAlpha(s.charAt(j))) { -j += 1 - } - if (j > i) { -buf += s.substring(i, j) - } - i = j - while (i < s.length && !isAlpha(s.charAt(i))) { -i += 1 - } -} -buf - } - private val shutdownDeletePaths = new scala.collection.mutable.HashSet[String]() private val shutdownDeleteTachyonPaths = new scala.collection.mutable.HashSet[String]() @@ -831,14 +802,6 @@ private[spark] object Utils extends Logging { } /** - * Execute a command in the current working directory, throwing an exception if it completes - * with an exit code other than 0. - */ - def execute(command: Seq[String]) { -execute(command, new File(".")) - } - - /** * Execute a command and get its output, throwing an exception if it yields a code other than 0. */ def executeAndGetOutput(command: Seq[String], workingDir: File = new File("."), - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: SPARK-3265 Allow using custom ipython executable with pyspark
Repository: spark Updated Branches: refs/heads/master b86277c13 -> f38fab97c SPARK-3265 Allow using custom ipython executable with pyspark Although you can make pyspark use ipython with `IPYTHON=1`, and also change the python executable with `PYSPARK_PYTHON=...`, you can't use both at the same time because it hardcodes the default ipython script. This makes it use the `PYSPARK_PYTHON` variable if present and fall back to default python, similarly to how the default python executable is handled. So you can use a custom ipython like so: `PYSPARK_PYTHON=./anaconda/bin/ipython IPYTHON_OPTS="notebook" pyspark` Author: Rob O'Dwyer Closes #2167 from robbles/patch-1 and squashes the following commits: d98e8a9 [Rob O'Dwyer] Allow using custom ipython executable with pyspark Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f38fab97 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f38fab97 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f38fab97 Branch: refs/heads/master Commit: f38fab97c7970168f1bd81d4dc202e36322c95e3 Parents: b86277c Author: Rob O'Dwyer Authored: Wed Aug 27 19:47:33 2014 -0700 Committer: Matei Zaharia Committed: Wed Aug 27 19:47:33 2014 -0700 -- bin/pyspark | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f38fab97/bin/pyspark -- diff --git a/bin/pyspark b/bin/pyspark index 01d4202..59cfdfa 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -104,7 +104,7 @@ if [[ "$1" =~ \.py$ ]]; then else # Only use ipython if no command line arguments were provided [SPARK-1134] if [[ "$IPYTHON" = "1" ]]; then -exec ipython $IPYTHON_OPTS +exec ${PYSPARK_PYTHON:-ipython} $IPYTHON_OPTS else exec "$PYSPARK_PYTHON" fi - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [HOTFIX] Wait for EOF only for the PySpark shell
Repository: spark Updated Branches: refs/heads/branch-1.1 78e3c036e -> 54ccd93e6 [HOTFIX] Wait for EOF only for the PySpark shell In `SparkSubmitDriverBootstrapper`, we wait for the parent process to send us an `EOF` before finishing the application. This is applicable for the PySpark shell because we terminate the application the same way. However if we run a python application, for instance, the JVM actually never exits unless it receives a manual EOF from the user. This is causing a few tests to timeout. We only need to do this for the PySpark shell because Spark submit runs as a python subprocess only in this case. Thus, the normal Spark shell doesn't need to go through this case even though it is also a REPL. Thanks davies for reporting this. Author: Andrew Or Closes #2170 from andrewor14/bootstrap-hotfix and squashes the following commits: 42963f5 [Andrew Or] Do not wait for EOF unless this is the pyspark shell (cherry picked from commit dafe343499bbc688e266106e4bb897f9e619834e) Signed-off-by: Patrick Wendell Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/54ccd93e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/54ccd93e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/54ccd93e Branch: refs/heads/branch-1.1 Commit: 54ccd93e621c1bc4afc709a208b609232ab701d1 Parents: 78e3c03 Author: Andrew Or Authored: Wed Aug 27 23:03:46 2014 -0700 Committer: Patrick Wendell Committed: Wed Aug 27 23:04:28 2014 -0700 -- bin/pyspark | 2 ++ .../deploy/SparkSubmitDriverBootstrapper.scala | 26 +++- 2 files changed, 17 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/54ccd93e/bin/pyspark -- diff --git a/bin/pyspark b/bin/pyspark index 01d4202..6687648 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -102,6 +102,8 @@ if [[ "$1" =~ \.py$ ]]; then gatherSparkSubmitOpts "$@" exec $FWDIR/bin/spark-submit "${SUBMISSION_OPTS[@]}" $primary "${APPLICATION_OPTS[@]}" else + # PySpark shell requires special handling downstream + export PYSPARK_SHELL=1 # Only use ipython if no command line arguments were provided [SPARK-1134] if [[ "$IPYTHON" = "1" ]]; then exec ipython $IPYTHON_OPTS http://git-wip-us.apache.org/repos/asf/spark/blob/54ccd93e/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala index 7ca96ed..38b5d8e 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala @@ -132,25 +132,29 @@ private[spark] object SparkSubmitDriverBootstrapper { val builder = new ProcessBuilder(filteredCommand) val process = builder.start() -// Redirect stdin, stdout, and stderr to/from the child JVM +// Redirect stdout and stderr from the child JVM val stdoutThread = new RedirectThread(process.getInputStream, System.out, "redirect stdout") val stderrThread = new RedirectThread(process.getErrorStream, System.err, "redirect stderr") stdoutThread.start() stderrThread.start() -// In Windows, the subprocess reads directly from our stdin, so we should avoid spawning -// a thread that contends with the subprocess in reading from System.in. -if (Utils.isWindows) { - // For the PySpark shell, the termination of this process is handled in java_gateway.py - process.waitFor() -} else { - // Terminate on broken pipe, which signals that the parent process has exited. This is - // important for the PySpark shell, where Spark submit itself is a python subprocess. +// Redirect stdin to child JVM only if we're not running Windows. This is because the +// subprocess there already reads directly from our stdin, so we should avoid spawning a +// thread that contends with the subprocess in reading from System.in. +val isWindows = Utils.isWindows +val isPySparkShell = sys.env.contains("PYSPARK_SHELL") +if (!isWindows) { val stdinThread = new RedirectThread(System.in, process.getOutputStream, "redirect stdin") stdinThread.start() - stdinThread.join() - process.destroy() + // For the PySpark shell, Spark submit itself runs as a python subprocess, and so this JVM + // should terminate on broken pipe, which signals that the parent process has exited. In + // Windows, the termination logic for the PySpark shell is han
git commit: [HOTFIX] Wait for EOF only for the PySpark shell
Repository: spark Updated Branches: refs/heads/master f38fab97c -> dafe34349 [HOTFIX] Wait for EOF only for the PySpark shell In `SparkSubmitDriverBootstrapper`, we wait for the parent process to send us an `EOF` before finishing the application. This is applicable for the PySpark shell because we terminate the application the same way. However if we run a python application, for instance, the JVM actually never exits unless it receives a manual EOF from the user. This is causing a few tests to timeout. We only need to do this for the PySpark shell because Spark submit runs as a python subprocess only in this case. Thus, the normal Spark shell doesn't need to go through this case even though it is also a REPL. Thanks davies for reporting this. Author: Andrew Or Closes #2170 from andrewor14/bootstrap-hotfix and squashes the following commits: 42963f5 [Andrew Or] Do not wait for EOF unless this is the pyspark shell Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dafe3434 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dafe3434 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dafe3434 Branch: refs/heads/master Commit: dafe343499bbc688e266106e4bb897f9e619834e Parents: f38fab9 Author: Andrew Or Authored: Wed Aug 27 23:03:46 2014 -0700 Committer: Patrick Wendell Committed: Wed Aug 27 23:03:46 2014 -0700 -- bin/pyspark | 2 ++ .../deploy/SparkSubmitDriverBootstrapper.scala | 26 +++- 2 files changed, 17 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/dafe3434/bin/pyspark -- diff --git a/bin/pyspark b/bin/pyspark index 59cfdfa..f553b31 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -102,6 +102,8 @@ if [[ "$1" =~ \.py$ ]]; then gatherSparkSubmitOpts "$@" exec $FWDIR/bin/spark-submit "${SUBMISSION_OPTS[@]}" $primary "${APPLICATION_OPTS[@]}" else + # PySpark shell requires special handling downstream + export PYSPARK_SHELL=1 # Only use ipython if no command line arguments were provided [SPARK-1134] if [[ "$IPYTHON" = "1" ]]; then exec ${PYSPARK_PYTHON:-ipython} $IPYTHON_OPTS http://git-wip-us.apache.org/repos/asf/spark/blob/dafe3434/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala index 7ca96ed..38b5d8e 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitDriverBootstrapper.scala @@ -132,25 +132,29 @@ private[spark] object SparkSubmitDriverBootstrapper { val builder = new ProcessBuilder(filteredCommand) val process = builder.start() -// Redirect stdin, stdout, and stderr to/from the child JVM +// Redirect stdout and stderr from the child JVM val stdoutThread = new RedirectThread(process.getInputStream, System.out, "redirect stdout") val stderrThread = new RedirectThread(process.getErrorStream, System.err, "redirect stderr") stdoutThread.start() stderrThread.start() -// In Windows, the subprocess reads directly from our stdin, so we should avoid spawning -// a thread that contends with the subprocess in reading from System.in. -if (Utils.isWindows) { - // For the PySpark shell, the termination of this process is handled in java_gateway.py - process.waitFor() -} else { - // Terminate on broken pipe, which signals that the parent process has exited. This is - // important for the PySpark shell, where Spark submit itself is a python subprocess. +// Redirect stdin to child JVM only if we're not running Windows. This is because the +// subprocess there already reads directly from our stdin, so we should avoid spawning a +// thread that contends with the subprocess in reading from System.in. +val isWindows = Utils.isWindows +val isPySparkShell = sys.env.contains("PYSPARK_SHELL") +if (!isWindows) { val stdinThread = new RedirectThread(System.in, process.getOutputStream, "redirect stdin") stdinThread.start() - stdinThread.join() - process.destroy() + // For the PySpark shell, Spark submit itself runs as a python subprocess, and so this JVM + // should terminate on broken pipe, which signals that the parent process has exited. In + // Windows, the termination logic for the PySpark shell is handled in java_gateway.py + if (isPySparkShell) { +stdinThread.join() +
git commit: [HOTFIX][SQL] Remove cleaning of UDFs
Repository: spark Updated Branches: refs/heads/master dafe34349 -> 024178c57 [HOTFIX][SQL] Remove cleaning of UDFs It is not safe to run the closure cleaner on slaves. #2153 introduced this which broke all UDF execution on slaves. Will re-add cleaning of UDF closures in a follow-up PR. Author: Michael Armbrust Closes #2174 from marmbrus/fixUdfs and squashes the following commits: 55406de [Michael Armbrust] [HOTFIX] Remove cleaning of UDFs Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/024178c5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/024178c5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/024178c5 Branch: refs/heads/master Commit: 024178c57419f915d26414e1b91ea0019c3650db Parents: dafe343 Author: Michael Armbrust Authored: Wed Aug 27 23:05:34 2014 -0700 Committer: Patrick Wendell Committed: Wed Aug 27 23:05:34 2014 -0700 -- .../org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala | 3 --- 1 file changed, 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/024178c5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala index 0b3c1df..589816c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala @@ -23,9 +23,6 @@ import org.apache.spark.util.ClosureCleaner case class ScalaUdf(function: AnyRef, dataType: DataType, children: Seq[Expression]) extends Expression { - // Clean function when not called with default no-arg constructor. - if (function != null) { ClosureCleaner.clean(function) } - type EvaluatedType = Any def nullable = true - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [HOTFIX][SQL] Remove cleaning of UDFs
Repository: spark Updated Branches: refs/heads/branch-1.1 54ccd93e6 -> 233c283e3 [HOTFIX][SQL] Remove cleaning of UDFs It is not safe to run the closure cleaner on slaves. #2153 introduced this which broke all UDF execution on slaves. Will re-add cleaning of UDF closures in a follow-up PR. Author: Michael Armbrust Closes #2174 from marmbrus/fixUdfs and squashes the following commits: 55406de [Michael Armbrust] [HOTFIX] Remove cleaning of UDFs (cherry picked from commit 024178c57419f915d26414e1b91ea0019c3650db) Signed-off-by: Patrick Wendell Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/233c283e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/233c283e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/233c283e Branch: refs/heads/branch-1.1 Commit: 233c283e3d946bdcbf418375122c5763559c0119 Parents: 54ccd93 Author: Michael Armbrust Authored: Wed Aug 27 23:05:34 2014 -0700 Committer: Patrick Wendell Committed: Wed Aug 27 23:06:14 2014 -0700 -- .../org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala | 3 --- 1 file changed, 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/233c283e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala index 0b3c1df..589816c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUdf.scala @@ -23,9 +23,6 @@ import org.apache.spark.util.ClosureCleaner case class ScalaUdf(function: AnyRef, dataType: DataType, children: Seq[Expression]) extends Expression { - // Clean function when not called with default no-arg constructor. - if (function != null) { ClosureCleaner.clean(function) } - type EvaluatedType = Any def nullable = true - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: HOTFIX: Don't build with YARN support for Mapr3
Repository: spark Updated Branches: refs/heads/branch-1.1 233c283e3 -> ad0fab23d HOTFIX: Don't build with YARN support for Mapr3 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ad0fab23 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ad0fab23 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ad0fab23 Branch: refs/heads/branch-1.1 Commit: ad0fab23d810f0f62e71813edfc3101fcec5e40b Parents: 233c283 Author: Patrick Wendell Authored: Wed Aug 27 15:40:40 2014 -0700 Committer: Patrick Wendell Committed: Wed Aug 27 23:08:44 2014 -0700 -- dev/create-release/create-release.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ad0fab23/dev/create-release/create-release.sh -- diff --git a/dev/create-release/create-release.sh b/dev/create-release/create-release.sh index eab6313..7549fbb 100755 --- a/dev/create-release/create-release.sh +++ b/dev/create-release/create-release.sh @@ -121,7 +121,7 @@ make_binary_release "hadoop1" "-Phive -Dhadoop.version=1.0.4" & make_binary_release "hadoop2.3" "-Phadoop-2.3 -Phive -Pyarn" & make_binary_release "hadoop2.4" "-Phadoop-2.4 -Phive -Pyarn" & make_binary_release "hadoop2.4-without-hive" "-Phadoop-2.4 -Pyarn" & -make_binary_release "mapr3" "-Pmapr3 -Pyarn -Phive" & +make_binary_release "mapr3" "-Pmapr3 -Phive" & make_binary_release "mapr4" "-Pmapr4 -Pyarn -Phive" & wait - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.1.0-rc1 [deleted] 1dc825d90 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [maven-release-plugin] prepare for next development iteration
Repository: spark Updated Branches: refs/heads/branch-1.1 79e86ef3e -> a118ea5c5 [maven-release-plugin] prepare for next development iteration Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a118ea5c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a118ea5c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a118ea5c Branch: refs/heads/branch-1.1 Commit: a118ea5c59d653f5a3feda21455ba60bc722b3b1 Parents: 79e86ef Author: Patrick Wendell Authored: Thu Aug 28 06:46:02 2014 + Committer: Patrick Wendell Committed: Thu Aug 28 06:46:02 2014 + -- assembly/pom.xml | 2 +- bagel/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka/pom.xml| 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml | 2 +- external/zeromq/pom.xml | 2 +- extras/kinesis-asl/pom.xml| 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- mllib/pom.xml | 2 +- pom.xml | 4 ++-- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- yarn/pom.xml | 2 +- yarn/stable/pom.xml | 2 +- 24 files changed, 25 insertions(+), 25 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a118ea5c/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 5896b6f..7a9e680 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.1 +1.1.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/a118ea5c/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index f1c2b21..83faf29 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.1 +1.1.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/a118ea5c/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index 4e094bd..3661eac 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.1 +1.1.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/a118ea5c/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index eb7fd0a..0fc7b58 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.1 +1.1.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/a118ea5c/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index c443eaa..692f87b 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.1 +1.1.2-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/a118ea5c/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index 6d06a2d..5d0f7ff 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.1 +1.1.2-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/a118ea5c/external/kafka/pom.xml -- diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml index 09602f6..b267c47 100644 --- a/external/kafka/pom.xml +++ b/external/kafka/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.1 +1.1.2-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/a118ea5c/external/mqtt/pom.xml -- diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml index 462079c..c7fbf4b 100644 --- a/external/mqtt/pom.xml +++ b/external/mqtt/pom.xml @@ -21,7 +21,7 @@ org.apache.spark
Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.1.0-rc1 [created] d6d19b581 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
git commit: [maven-release-plugin] prepare release v1.1.0-rc1
Repository: spark Updated Branches: refs/heads/branch-1.1 ad0fab23d -> 79e86ef3e [maven-release-plugin] prepare release v1.1.0-rc1 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/79e86ef3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/79e86ef3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/79e86ef3 Branch: refs/heads/branch-1.1 Commit: 79e86ef3e1a3ee03a7e3b166a5c7dee11c6d60d7 Parents: ad0fab2 Author: Patrick Wendell Authored: Thu Aug 28 06:45:54 2014 + Committer: Patrick Wendell Committed: Thu Aug 28 06:45:54 2014 + -- assembly/pom.xml | 2 +- bagel/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka/pom.xml| 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml | 2 +- external/zeromq/pom.xml | 2 +- extras/kinesis-asl/pom.xml| 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- mllib/pom.xml | 2 +- pom.xml | 4 ++-- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- yarn/pom.xml | 2 +- yarn/stable/pom.xml | 2 +- 24 files changed, 25 insertions(+), 25 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/79e86ef3/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 4709b7d..5896b6f 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.1-SNAPSHOT +1.1.1 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/79e86ef3/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index f29540b..f1c2b21 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.1-SNAPSHOT +1.1.1 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/79e86ef3/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index debc4dd..4e094bd 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.1-SNAPSHOT +1.1.1 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/79e86ef3/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index f35d3d6..eb7fd0a 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.1-SNAPSHOT +1.1.1 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/79e86ef3/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index 17d0fe2..c443eaa 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.1-SNAPSHOT +1.1.1 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/79e86ef3/external/flume/pom.xml -- diff --git a/external/flume/pom.xml b/external/flume/pom.xml index 402af35..6d06a2d 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.1-SNAPSHOT +1.1.1 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/79e86ef3/external/kafka/pom.xml -- diff --git a/external/kafka/pom.xml b/external/kafka/pom.xml index 5123d05..09602f6 100644 --- a/external/kafka/pom.xml +++ b/external/kafka/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent -1.1.1-SNAPSHOT +1.1.1 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/79e86ef3/external/mqtt/pom.xml -- diff --git a/external/mqtt/pom.xml b/external/mqtt/pom.xml index 9c00bfc..462079c 100644 --- a/external/mqtt/pom.xml +++ b/external/mqtt/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent