spark git commit: [SPARK-20036][DOC] Note incompatible dependencies on org.apache.kafka artifacts
Repository: spark Updated Branches: refs/heads/branch-2.2 8baa970bc -> 80a60da8f [SPARK-20036][DOC] Note incompatible dependencies on org.apache.kafka artifacts ## What changes were proposed in this pull request? Note that you shouldn't manually add dependencies on org.apache.kafka artifacts ## How was this patch tested? Doc only change, did jekyll build and looked at the page. Author: cody koeninger Closes #17675 from koeninger/SPARK-20036. (cherry picked from commit 71a8e9df12e547cb4716f954ecb762b358f862d5) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/80a60da8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/80a60da8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/80a60da8 Branch: refs/heads/branch-2.2 Commit: 80a60da8f42e86ae1a045d9fd0dcec3234b6ff40 Parents: 8baa970 Author: cody koeninger Authored: Wed Apr 19 18:58:58 2017 +0100 Committer: Sean Owen Committed: Wed Apr 19 18:59:07 2017 +0100 -- docs/streaming-kafka-0-10-integration.md | 2 ++ 1 file changed, 2 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/80a60da8/docs/streaming-kafka-0-10-integration.md -- diff --git a/docs/streaming-kafka-0-10-integration.md b/docs/streaming-kafka-0-10-integration.md index e383701..92c296a 100644 --- a/docs/streaming-kafka-0-10-integration.md +++ b/docs/streaming-kafka-0-10-integration.md @@ -12,6 +12,8 @@ For Scala/Java applications using SBT/Maven project definitions, link your strea artifactId = spark-streaming-kafka-0-10_{{site.SCALA_BINARY_VERSION}} version = {{site.SPARK_VERSION_SHORT}} +**Do not** manually add dependencies on `org.apache.kafka` artifacts (e.g. `kafka-clients`). The `spark-streaming-kafka-0-10` artifact has the appropriate transitive dependencies already, and different versions may be incompatible in hard to diagnose ways. + ### Creating a Direct Stream Note that the namespace for the import includes the version, org.apache.spark.streaming.kafka010 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20156][SQL][FOLLOW-UP] Java String toLowerCase "Turkish locale bug" in Database and Table DDLs
Repository: spark Updated Branches: refs/heads/branch-2.2 8d658b90b -> d01122dbc [SPARK-20156][SQL][FOLLOW-UP] Java String toLowerCase "Turkish locale bug" in Database and Table DDLs ### What changes were proposed in this pull request? Database and Table names conform the Hive standard ("[a-zA-z_0-9]+"), i.e. if this name only contains characters, numbers, and _. When calling `toLowerCase` on the names, we should add `Locale.ROOT` to the `toLowerCase`for avoiding inadvertent locale-sensitive variation in behavior (aka the "Turkish locale problem"). ### How was this patch tested? Added a test case Author: Xiao Li Closes #17655 from gatorsmile/locale. (cherry picked from commit 55bea56911a958f6d3ec3ad96fb425cc71ec03f4) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d01122db Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d01122db Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d01122db Branch: refs/heads/branch-2.2 Commit: d01122dbc23206e203784d62312e9cac93564b45 Parents: 8d658b9 Author: Xiao Li Authored: Thu Apr 20 11:13:48 2017 +0100 Committer: Sean Owen Committed: Thu Apr 20 11:13:56 2017 +0100 -- .../analysis/ResolveTableValuedFunctions.scala | 4 ++- .../sql/catalyst/catalog/SessionCatalog.scala | 4 +-- .../apache/spark/sql/internal/SharedState.scala | 4 ++- .../spark/sql/execution/command/DDLSuite.scala | 19 + .../apache/spark/sql/test/SQLTestUtils.scala| 28 +++- 5 files changed, 54 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d01122db/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala index 8841309..de6de24 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.catalyst.analysis +import java.util.Locale + import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Range} import org.apache.spark.sql.catalyst.rules._ @@ -103,7 +105,7 @@ object ResolveTableValuedFunctions extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { case u: UnresolvedTableValuedFunction if u.functionArgs.forall(_.resolved) => - builtinFunctions.get(u.functionName.toLowerCase()) match { + builtinFunctions.get(u.functionName.toLowerCase(Locale.ROOT)) match { case Some(tvf) => val resolved = tvf.flatMap { case (argList, resolver) => argList.implicitCast(u.functionArgs) match { http://git-wip-us.apache.org/repos/asf/spark/blob/d01122db/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 3fbf83f..6c6d600 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -115,14 +115,14 @@ class SessionCatalog( * Format table name, taking into account case sensitivity. */ protected[this] def formatTableName(name: String): String = { -if (conf.caseSensitiveAnalysis) name else name.toLowerCase +if (conf.caseSensitiveAnalysis) name else name.toLowerCase(Locale.ROOT) } /** * Format database name, taking into account case sensitivity. */ protected[this] def formatDatabaseName(name: String): String = { -if (conf.caseSensitiveAnalysis) name else name.toLowerCase +if (conf.caseSensitiveAnalysis) name else name.toLowerCase(Locale.ROOT) } /** http://git-wip-us.apache.org/repos/asf/spark/blob/d01122db/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala index 0289471..d06dbaa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedS
spark git commit: [SPARK-20156][SQL][FOLLOW-UP] Java String toLowerCase "Turkish locale bug" in Database and Table DDLs
Repository: spark Updated Branches: refs/heads/master 46c574976 -> 55bea5691 [SPARK-20156][SQL][FOLLOW-UP] Java String toLowerCase "Turkish locale bug" in Database and Table DDLs ### What changes were proposed in this pull request? Database and Table names conform the Hive standard ("[a-zA-z_0-9]+"), i.e. if this name only contains characters, numbers, and _. When calling `toLowerCase` on the names, we should add `Locale.ROOT` to the `toLowerCase`for avoiding inadvertent locale-sensitive variation in behavior (aka the "Turkish locale problem"). ### How was this patch tested? Added a test case Author: Xiao Li Closes #17655 from gatorsmile/locale. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/55bea569 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/55bea569 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/55bea569 Branch: refs/heads/master Commit: 55bea56911a958f6d3ec3ad96fb425cc71ec03f4 Parents: 46c5749 Author: Xiao Li Authored: Thu Apr 20 11:13:48 2017 +0100 Committer: Sean Owen Committed: Thu Apr 20 11:13:48 2017 +0100 -- .../analysis/ResolveTableValuedFunctions.scala | 4 ++- .../sql/catalyst/catalog/SessionCatalog.scala | 4 +-- .../apache/spark/sql/internal/SharedState.scala | 4 ++- .../spark/sql/execution/command/DDLSuite.scala | 19 + .../apache/spark/sql/test/SQLTestUtils.scala| 28 +++- 5 files changed, 54 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/55bea569/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala index 8841309..de6de24 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.catalyst.analysis +import java.util.Locale + import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Range} import org.apache.spark.sql.catalyst.rules._ @@ -103,7 +105,7 @@ object ResolveTableValuedFunctions extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { case u: UnresolvedTableValuedFunction if u.functionArgs.forall(_.resolved) => - builtinFunctions.get(u.functionName.toLowerCase()) match { + builtinFunctions.get(u.functionName.toLowerCase(Locale.ROOT)) match { case Some(tvf) => val resolved = tvf.flatMap { case (argList, resolver) => argList.implicitCast(u.functionArgs) match { http://git-wip-us.apache.org/repos/asf/spark/blob/55bea569/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 3fbf83f..6c6d600 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -115,14 +115,14 @@ class SessionCatalog( * Format table name, taking into account case sensitivity. */ protected[this] def formatTableName(name: String): String = { -if (conf.caseSensitiveAnalysis) name else name.toLowerCase +if (conf.caseSensitiveAnalysis) name else name.toLowerCase(Locale.ROOT) } /** * Format database name, taking into account case sensitivity. */ protected[this] def formatDatabaseName(name: String): String = { -if (conf.caseSensitiveAnalysis) name else name.toLowerCase +if (conf.caseSensitiveAnalysis) name else name.toLowerCase(Locale.ROOT) } /** http://git-wip-us.apache.org/repos/asf/spark/blob/55bea569/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala index 0289471..d06dbaa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala @@ -17,6 +17,8
spark git commit: Small rewording about history server use case
Repository: spark Updated Branches: refs/heads/master e2b3d2367 -> 34767997e Small rewording about history server use case Hello PR #10991 removed the built-in history view from Spark Standalone, so the history server is no longer useful to Yarn or Mesos only. Author: Hervé Closes #17709 from dud225/patch-1. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/34767997 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/34767997 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/34767997 Branch: refs/heads/master Commit: 34767997e0c6cb28e1fac8cb650fa3511f260ca5 Parents: e2b3d23 Author: Hervé Authored: Fri Apr 21 08:52:18 2017 +0100 Committer: Sean Owen Committed: Fri Apr 21 08:52:18 2017 +0100 -- docs/monitoring.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/34767997/docs/monitoring.md -- diff --git a/docs/monitoring.md b/docs/monitoring.md index da95438..3e577c5 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -27,8 +27,8 @@ in the UI to persisted storage. ## Viewing After the Fact -If Spark is run on Mesos or YARN, it is still possible to construct the UI of an -application through Spark's history server, provided that the application's event logs exist. +It is still possible to construct the UI of an application through Spark's history server, +provided that the application's event logs exist. You can start the history server by executing: ./sbin/start-history-server.sh - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: Small rewording about history server use case
Repository: spark Updated Branches: refs/heads/branch-2.1 66e7a8f1d -> fb0351a3f Small rewording about history server use case Hello PR #10991 removed the built-in history view from Spark Standalone, so the history server is no longer useful to Yarn or Mesos only. Author: Hervé Closes #17709 from dud225/patch-1. (cherry picked from commit 34767997e0c6cb28e1fac8cb650fa3511f260ca5) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fb0351a3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fb0351a3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fb0351a3 Branch: refs/heads/branch-2.1 Commit: fb0351a3f76b535c7132f107cc8ea94923d51fd7 Parents: 66e7a8f Author: Hervé Authored: Fri Apr 21 08:52:18 2017 +0100 Committer: Sean Owen Committed: Fri Apr 21 08:52:54 2017 +0100 -- docs/monitoring.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fb0351a3/docs/monitoring.md -- diff --git a/docs/monitoring.md b/docs/monitoring.md index 077af08..8583213 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -27,8 +27,8 @@ in the UI to persisted storage. ## Viewing After the Fact -If Spark is run on Mesos or YARN, it is still possible to construct the UI of an -application through Spark's history server, provided that the application's event logs exist. +It is still possible to construct the UI of an application through Spark's history server, +provided that the application's event logs exist. You can start the history server by executing: ./sbin/start-history-server.sh - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: Small rewording about history server use case
Repository: spark Updated Branches: refs/heads/branch-2.2 cddb4b7db -> eb4d097c3 Small rewording about history server use case Hello PR #10991 removed the built-in history view from Spark Standalone, so the history server is no longer useful to Yarn or Mesos only. Author: Hervé Closes #17709 from dud225/patch-1. (cherry picked from commit 34767997e0c6cb28e1fac8cb650fa3511f260ca5) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/eb4d097c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/eb4d097c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/eb4d097c Branch: refs/heads/branch-2.2 Commit: eb4d097c3c73d1aaf4cd9e17193a6b06ba273429 Parents: cddb4b7 Author: Hervé Authored: Fri Apr 21 08:52:18 2017 +0100 Committer: Sean Owen Committed: Fri Apr 21 08:52:28 2017 +0100 -- docs/monitoring.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/eb4d097c/docs/monitoring.md -- diff --git a/docs/monitoring.md b/docs/monitoring.md index da95438..3e577c5 100644 --- a/docs/monitoring.md +++ b/docs/monitoring.md @@ -27,8 +27,8 @@ in the UI to persisted storage. ## Viewing After the Fact -If Spark is run on Mesos or YARN, it is still possible to construct the UI of an -application through Spark's history server, provided that the application's event logs exist. +It is still possible to construct the UI of an application through Spark's history server, +provided that the application's event logs exist. You can start the history server by executing: ./sbin/start-history-server.sh - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20401][DOC] In the spark official configuration document, the 'spark.driver.supervise' configuration parameter specification and default values are necessary.
Repository: spark Updated Branches: refs/heads/master fd648bff6 -> ad290402a [SPARK-20401][DOC] In the spark official configuration document, the 'spark.driver.supervise' configuration parameter specification and default values are necessary. ## What changes were proposed in this pull request? Use the REST interface submits the spark job. e.g. curl -X POST http://10.43.183.120:6066/v1/submissions/create --header "Content-Type:application/json;charset=UTF-8" --data'{ "action": "CreateSubmissionRequest", "appArgs": [ "myAppArgument" ], "appResource": "/home/mr/gxl/test.jar", "clientSparkVersion": "2.2.0", "environmentVariables": { "SPARK_ENV_LOADED": "1" }, "mainClass": "cn.zte.HdfsTest", "sparkProperties": { "spark.jars": "/home/mr/gxl/test.jar", **"spark.driver.supervise": "true",** "spark.app.name": "HdfsTest", "spark.eventLog.enabled": "false", "spark.submit.deployMode": "cluster", "spark.master": "spark://10.43.183.120:6066" } }' **I hope that make sure that the driver is automatically restarted if it fails with non-zero exit code. But I can not find the 'spark.driver.supervise' configuration parameter specification and default values from the spark official document.** ## How was this patch tested? manual tests Please review http://spark.apache.org/contributing.html before opening a pull request. Author: éå°é¾ 10207633 Author: guoxiaolong Author: guoxiaolongzte Closes #17696 from guoxiaolongzte/SPARK-20401. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ad290402 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ad290402 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ad290402 Branch: refs/heads/master Commit: ad290402aa1d609abf5a2883a6d87fa8bc2bd517 Parents: fd648bf Author: éå°é¾ 10207633 Authored: Fri Apr 21 20:08:26 2017 +0100 Committer: Sean Owen Committed: Fri Apr 21 20:08:26 2017 +0100 -- docs/configuration.md | 8 1 file changed, 8 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ad290402/docs/configuration.md -- diff --git a/docs/configuration.md b/docs/configuration.md index 2687f54..6b65d2b 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -213,6 +213,14 @@ of the most common options to set are: and typically can have up to 50 characters. + + spark.driver.supervise + false + +If true, restarts the driver automatically if it fails with a non-zero exit status. +Only has effect in Spark standalone mode or Mesos cluster deploy mode. + + Apart from these, the following properties are also available, and may be useful in some situations: - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20401][DOC] In the spark official configuration document, the 'spark.driver.supervise' configuration parameter specification and default values are necessary.
Repository: spark Updated Branches: refs/heads/branch-2.2 ff1f989f2 -> 6c2489c66 [SPARK-20401][DOC] In the spark official configuration document, the 'spark.driver.supervise' configuration parameter specification and default values are necessary. ## What changes were proposed in this pull request? Use the REST interface submits the spark job. e.g. curl -X POST http://10.43.183.120:6066/v1/submissions/create --header "Content-Type:application/json;charset=UTF-8" --data'{ "action": "CreateSubmissionRequest", "appArgs": [ "myAppArgument" ], "appResource": "/home/mr/gxl/test.jar", "clientSparkVersion": "2.2.0", "environmentVariables": { "SPARK_ENV_LOADED": "1" }, "mainClass": "cn.zte.HdfsTest", "sparkProperties": { "spark.jars": "/home/mr/gxl/test.jar", **"spark.driver.supervise": "true",** "spark.app.name": "HdfsTest", "spark.eventLog.enabled": "false", "spark.submit.deployMode": "cluster", "spark.master": "spark://10.43.183.120:6066" } }' **I hope that make sure that the driver is automatically restarted if it fails with non-zero exit code. But I can not find the 'spark.driver.supervise' configuration parameter specification and default values from the spark official document.** ## How was this patch tested? manual tests Please review http://spark.apache.org/contributing.html before opening a pull request. Author: éå°é¾ 10207633 Author: guoxiaolong Author: guoxiaolongzte Closes #17696 from guoxiaolongzte/SPARK-20401. (cherry picked from commit ad290402aa1d609abf5a2883a6d87fa8bc2bd517) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6c2489c6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6c2489c6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6c2489c6 Branch: refs/heads/branch-2.2 Commit: 6c2489c66682fdc6a886346ed980d95e6e5eefde Parents: ff1f989 Author: éå°é¾ 10207633 Authored: Fri Apr 21 20:08:26 2017 +0100 Committer: Sean Owen Committed: Fri Apr 21 20:08:34 2017 +0100 -- docs/configuration.md | 8 1 file changed, 8 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6c2489c6/docs/configuration.md -- diff --git a/docs/configuration.md b/docs/configuration.md index 2687f54..6b65d2b 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -213,6 +213,14 @@ of the most common options to set are: and typically can have up to 50 characters. + + spark.driver.supervise + false + +If true, restarts the driver automatically if it fails with a non-zero exit status. +Only has effect in Spark standalone mode or Mesos cluster deploy mode. + + Apart from these, the following properties are also available, and may be useful in some situations: - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20386][SPARK CORE] modify the log info if the block exists on the slave already
Repository: spark Updated Branches: refs/heads/master ad290402a -> 05a451491 [SPARK-20386][SPARK CORE] modify the log info if the block exists on the slave already ## What changes were proposed in this pull request? Modify the added memory size to memSize-originalMemSize if the block exists on the slave already since if the block exists, the added memory size should be memSize-originalMemSize; if originalMemSize is bigger than memSize ,then the log info should be Removed memory, removed size should be originalMemSize-memSize ## How was this patch tested? Multiple runs on existing unit tests (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Please review http://spark.apache.org/contributing.html before opening a pull request. Author: eatoncys Closes #17683 from eatoncys/SPARK-20386. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/05a45149 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/05a45149 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/05a45149 Branch: refs/heads/master Commit: 05a451491d535c0828413ce2eb06fe94571069ac Parents: ad29040 Author: eatoncys Authored: Sat Apr 22 12:29:35 2017 +0100 Committer: Sean Owen Committed: Sat Apr 22 12:29:35 2017 +0100 -- .../storage/BlockManagerMasterEndpoint.scala| 52 +--- 1 file changed, 35 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/05a45149/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala -- diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala index 467c3e0..6f85b9e 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala @@ -497,11 +497,17 @@ private[spark] class BlockManagerInfo( updateLastSeenMs() -if (_blocks.containsKey(blockId)) { +val blockExists = _blocks.containsKey(blockId) +var originalMemSize: Long = 0 +var originalDiskSize: Long = 0 +var originalLevel: StorageLevel = StorageLevel.NONE + +if (blockExists) { // The block exists on the slave already. val blockStatus: BlockStatus = _blocks.get(blockId) - val originalLevel: StorageLevel = blockStatus.storageLevel - val originalMemSize: Long = blockStatus.memSize + originalLevel = blockStatus.storageLevel + originalMemSize = blockStatus.memSize + originalDiskSize = blockStatus.diskSize if (originalLevel.useMemory) { _remainingMem += originalMemSize @@ -520,32 +526,44 @@ private[spark] class BlockManagerInfo( blockStatus = BlockStatus(storageLevel, memSize = memSize, diskSize = 0) _blocks.put(blockId, blockStatus) _remainingMem -= memSize -logInfo("Added %s in memory on %s (size: %s, free: %s)".format( - blockId, blockManagerId.hostPort, Utils.bytesToString(memSize), - Utils.bytesToString(_remainingMem))) +if (blockExists) { + logInfo(s"Updated $blockId in memory on ${blockManagerId.hostPort}" + +s" (current size: ${Utils.bytesToString(memSize)}," + +s" original size: ${Utils.bytesToString(originalMemSize)}," + +s" free: ${Utils.bytesToString(_remainingMem)})") +} else { + logInfo(s"Added $blockId in memory on ${blockManagerId.hostPort}" + +s" (size: ${Utils.bytesToString(memSize)}," + +s" free: ${Utils.bytesToString(_remainingMem)})") +} } if (storageLevel.useDisk) { blockStatus = BlockStatus(storageLevel, memSize = 0, diskSize = diskSize) _blocks.put(blockId, blockStatus) -logInfo("Added %s on disk on %s (size: %s)".format( - blockId, blockManagerId.hostPort, Utils.bytesToString(diskSize))) +if (blockExists) { + logInfo(s"Updated $blockId on disk on ${blockManagerId.hostPort}" + +s" (current size: ${Utils.bytesToString(diskSize)}," + +s" original size: ${Utils.bytesToString(originalDiskSize)})") +} else { + logInfo(s"Added $blockId on disk on ${blockManagerId.hostPort}" + +s" (size: ${Utils.bytesToString(diskSize)})") +} } if (!blockId.isBroadcast && blockStatus.isCached) { _cachedBlocks += blockId } -} else if (_blocks.containsKey(blockId)) { +} else if (blockExists) { // If isValid is not true, drop the block. - val blo
spark git commit: [SPARK-20386][SPARK CORE] modify the log info if the block exists on the slave already
Repository: spark Updated Branches: refs/heads/branch-2.2 6c2489c66 -> d68e0a3a5 [SPARK-20386][SPARK CORE] modify the log info if the block exists on the slave already ## What changes were proposed in this pull request? Modify the added memory size to memSize-originalMemSize if the block exists on the slave already since if the block exists, the added memory size should be memSize-originalMemSize; if originalMemSize is bigger than memSize ,then the log info should be Removed memory, removed size should be originalMemSize-memSize ## How was this patch tested? Multiple runs on existing unit tests (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Please review http://spark.apache.org/contributing.html before opening a pull request. Author: eatoncys Closes #17683 from eatoncys/SPARK-20386. (cherry picked from commit 05a451491d535c0828413ce2eb06fe94571069ac) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d68e0a3a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d68e0a3a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d68e0a3a Branch: refs/heads/branch-2.2 Commit: d68e0a3a5ec39a3cb4358aacfc2bd1c5d783e51e Parents: 6c2489c Author: eatoncys Authored: Sat Apr 22 12:29:35 2017 +0100 Committer: Sean Owen Committed: Sat Apr 22 12:53:06 2017 +0100 -- .../storage/BlockManagerMasterEndpoint.scala| 52 +--- 1 file changed, 35 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d68e0a3a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala -- diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala index 467c3e0..6f85b9e 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala @@ -497,11 +497,17 @@ private[spark] class BlockManagerInfo( updateLastSeenMs() -if (_blocks.containsKey(blockId)) { +val blockExists = _blocks.containsKey(blockId) +var originalMemSize: Long = 0 +var originalDiskSize: Long = 0 +var originalLevel: StorageLevel = StorageLevel.NONE + +if (blockExists) { // The block exists on the slave already. val blockStatus: BlockStatus = _blocks.get(blockId) - val originalLevel: StorageLevel = blockStatus.storageLevel - val originalMemSize: Long = blockStatus.memSize + originalLevel = blockStatus.storageLevel + originalMemSize = blockStatus.memSize + originalDiskSize = blockStatus.diskSize if (originalLevel.useMemory) { _remainingMem += originalMemSize @@ -520,32 +526,44 @@ private[spark] class BlockManagerInfo( blockStatus = BlockStatus(storageLevel, memSize = memSize, diskSize = 0) _blocks.put(blockId, blockStatus) _remainingMem -= memSize -logInfo("Added %s in memory on %s (size: %s, free: %s)".format( - blockId, blockManagerId.hostPort, Utils.bytesToString(memSize), - Utils.bytesToString(_remainingMem))) +if (blockExists) { + logInfo(s"Updated $blockId in memory on ${blockManagerId.hostPort}" + +s" (current size: ${Utils.bytesToString(memSize)}," + +s" original size: ${Utils.bytesToString(originalMemSize)}," + +s" free: ${Utils.bytesToString(_remainingMem)})") +} else { + logInfo(s"Added $blockId in memory on ${blockManagerId.hostPort}" + +s" (size: ${Utils.bytesToString(memSize)}," + +s" free: ${Utils.bytesToString(_remainingMem)})") +} } if (storageLevel.useDisk) { blockStatus = BlockStatus(storageLevel, memSize = 0, diskSize = diskSize) _blocks.put(blockId, blockStatus) -logInfo("Added %s on disk on %s (size: %s)".format( - blockId, blockManagerId.hostPort, Utils.bytesToString(diskSize))) +if (blockExists) { + logInfo(s"Updated $blockId on disk on ${blockManagerId.hostPort}" + +s" (current size: ${Utils.bytesToString(diskSize)}," + +s" original size: ${Utils.bytesToString(originalDiskSize)})") +} else { + logInfo(s"Added $blockId on disk on ${blockManagerId.hostPort}" + +s" (size: ${Utils.bytesToString(diskSize)})") +} } if (!blockId.isBroadcast && blockStatus.isCached) { _cachedBlocks += blockId } -} else if (_blocks.containsKey(blo
spark git commit: [SPARK-20385][WEB-UI] Submitted Time' field, the date format needs to be formatted, in running Drivers table or Completed Drivers table in master web ui.
Repository: spark Updated Branches: refs/heads/master 8765bc17d -> 2eaf4f3fe [SPARK-20385][WEB-UI] Submitted Time' field, the date format needs to be formatted, in running Drivers table or Completed Drivers table in master web ui. ## What changes were proposed in this pull request? Submitted Time' field, the date format **needs to be formatted**, in running Drivers table or Completed Drivers table in master web ui. Before fix this problem e.g. Completed Drivers Submission ID**Submitted Time** Worker State Cores Memory Main Class driver-20170419145755-0005 **Wed Apr 19 14:57:55 CST 2017** worker-20170419145250-zdh120-40412 FAILED 1 1024.0 MB cn.zte.HdfsTest please see the attachment:https://issues.apache.org/jira/secure/attachment/12863977/before_fix.png After fix this problem e.g. Completed Drivers Submission ID**Submitted Time** Worker State Cores Memory Main Class driver-20170419145755-0006 **2017/04/19 16:01:25** worker-20170419145250-zdh120-40412 FAILED1 1024.0 MB cn.zte.HdfsTest please see the attachment:https://issues.apache.org/jira/secure/attachment/12863976/after_fix.png 'Submitted Time' field, the date format **has been formatted**, in running Applications table or Completed Applicationstable in master web ui, **it is correct.** e.g. Running Applications Application ID NameCores Memory per Executor**Submitted Time** User State Duration app-20170419160910- (kill) SparkSQL::10.43.183.120 1 5.0 GB **2017/04/19 16:09:10**root RUNNING 53 s **Format after the time easier to observe, and consistent with the applications table,so I think it's worth fixing.** ## How was this patch tested? (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Please review http://spark.apache.org/contributing.html before opening a pull request. Author: éå°é¾ 10207633 Author: guoxiaolong Author: guoxiaolongzte Closes #17682 from guoxiaolongzte/SPARK-20385. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2eaf4f3f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2eaf4f3f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2eaf4f3f Branch: refs/heads/master Commit: 2eaf4f3fe3595ae341a3a5ce886b859992dea5b2 Parents: 8765bc1 Author: éå°é¾ 10207633 Authored: Sun Apr 23 13:33:14 2017 +0100 Committer: Sean Owen Committed: Sun Apr 23 13:33:14 2017 +0100 -- .../org/apache/spark/deploy/master/ui/ApplicationPage.scala | 2 +- .../scala/org/apache/spark/deploy/master/ui/MasterPage.scala | 2 +- .../scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala | 4 ++-- .../org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala | 8 4 files changed, 8 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2eaf4f3f/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala index 946a928..a8d721f 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala @@ -83,7 +83,7 @@ private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app") Executor Memory: {Utils.megabytesToString(app.desc.memoryPerExecutorMB)} -Submit Date: {app.submitDate} +Submit Date: {UIUtils.formatDate(app.submitDate)} State: {app.state} { if (!app.isFinished) { http://git-wip-us.apache.org/repos/asf/spark/blob/2eaf4f3f/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala index e722a24..9351c72 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala @@ -252,7 +252,7 @@ private[ui] class Ma
spark git commit: [SPARK-20385][WEB-UI] Submitted Time' field, the date format needs to be formatted, in running Drivers table or Completed Drivers table in master web ui.
Repository: spark Updated Branches: refs/heads/branch-2.2 807c71892 -> cad33a730 [SPARK-20385][WEB-UI] Submitted Time' field, the date format needs to be formatted, in running Drivers table or Completed Drivers table in master web ui. ## What changes were proposed in this pull request? Submitted Time' field, the date format **needs to be formatted**, in running Drivers table or Completed Drivers table in master web ui. Before fix this problem e.g. Completed Drivers Submission ID**Submitted Time** Worker State Cores Memory Main Class driver-20170419145755-0005 **Wed Apr 19 14:57:55 CST 2017** worker-20170419145250-zdh120-40412 FAILED 1 1024.0 MB cn.zte.HdfsTest please see the attachment:https://issues.apache.org/jira/secure/attachment/12863977/before_fix.png After fix this problem e.g. Completed Drivers Submission ID**Submitted Time** Worker State Cores Memory Main Class driver-20170419145755-0006 **2017/04/19 16:01:25** worker-20170419145250-zdh120-40412 FAILED1 1024.0 MB cn.zte.HdfsTest please see the attachment:https://issues.apache.org/jira/secure/attachment/12863976/after_fix.png 'Submitted Time' field, the date format **has been formatted**, in running Applications table or Completed Applicationstable in master web ui, **it is correct.** e.g. Running Applications Application ID NameCores Memory per Executor**Submitted Time** User State Duration app-20170419160910- (kill) SparkSQL::10.43.183.120 1 5.0 GB **2017/04/19 16:09:10**root RUNNING 53 s **Format after the time easier to observe, and consistent with the applications table,so I think it's worth fixing.** ## How was this patch tested? (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Please review http://spark.apache.org/contributing.html before opening a pull request. Author: éå°é¾ 10207633 Author: guoxiaolong Author: guoxiaolongzte Closes #17682 from guoxiaolongzte/SPARK-20385. (cherry picked from commit 2eaf4f3fe3595ae341a3a5ce886b859992dea5b2) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cad33a73 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cad33a73 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cad33a73 Branch: refs/heads/branch-2.2 Commit: cad33a7301f6e0b40b88789f0a96f9cc7ebf9d6e Parents: 807c718 Author: éå°é¾ 10207633 Authored: Sun Apr 23 13:33:14 2017 +0100 Committer: Sean Owen Committed: Sun Apr 23 13:33:22 2017 +0100 -- .../org/apache/spark/deploy/master/ui/ApplicationPage.scala | 2 +- .../scala/org/apache/spark/deploy/master/ui/MasterPage.scala | 2 +- .../scala/org/apache/spark/deploy/mesos/ui/DriverPage.scala | 4 ++-- .../org/apache/spark/deploy/mesos/ui/MesosClusterPage.scala | 8 4 files changed, 8 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/cad33a73/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala index 946a928..a8d721f 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala @@ -83,7 +83,7 @@ private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app") Executor Memory: {Utils.megabytesToString(app.desc.memoryPerExecutorMB)} -Submit Date: {app.submitDate} +Submit Date: {UIUtils.formatDate(app.submitDate)} State: {app.state} { if (!app.isFinished) { http://git-wip-us.apache.org/repos/asf/spark/blob/cad33a73/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala index e722a24..9351c72 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala +++ b/core/sr
spark git commit: [BUILD] Close stale PRs
Repository: spark Updated Branches: refs/heads/master 2eaf4f3fe -> e9f97154b [BUILD] Close stale PRs ## What changes were proposed in this pull request? This pr proposed to close stale PRs. Currently, we have 400+ open PRs and there are some stale PRs whose JIRA tickets have been already closed and whose JIRA tickets does not exist (also, they seem not to be minor issues). // Open PRs whose JIRA tickets have been already closed Closes #11785 Closes #13027 Closes #13614 Closes #13761 Closes #15197 Closes #14006 Closes #12576 Closes #15447 Closes #13259 Closes #15616 Closes #14473 Closes #16638 Closes #16146 Closes #17269 Closes #17313 Closes #17418 Closes #17485 Closes #17551 Closes #17463 Closes #17625 // Open PRs whose JIRA tickets does not exist and they are not minor issues Closes #10739 Closes #15193 Closes #15344 Closes #14804 Closes #16993 Closes #17040 Closes #15180 Closes #17238 ## How was this patch tested? N/A Author: Takeshi Yamamuro Closes #17734 from maropu/resolved_pr. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e9f97154 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e9f97154 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e9f97154 Branch: refs/heads/master Commit: e9f97154bc4af60376a550238315d7fc57099f9c Parents: 2eaf4f3 Author: Takeshi Yamamuro Authored: Mon Apr 24 09:34:38 2017 +0100 Committer: Sean Owen Committed: Mon Apr 24 09:34:38 2017 +0100 -- -- - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/5] spark-website git commit: adjust the content structure to make it more reasonable
adjust the content structure to make it more reasonable Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/4e458563 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/4e458563 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/4e458563 Branch: refs/heads/asf-site Commit: 4e458563361e07e4cfb8286fd0c64a948185271a Parents: 05c9946 Author: Stan Zhai Authored: Fri Mar 10 00:45:48 2017 +0800 Committer: Stan Zhai Committed: Fri Mar 10 00:45:48 2017 +0800 -- developer-tools.md| 97 + site/developer-tools.html | 98 +- 2 files changed, 97 insertions(+), 98 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/4e458563/developer-tools.md -- diff --git a/developer-tools.md b/developer-tools.md index e012c8e..e712e7d 100644 --- a/developer-tools.md +++ b/developer-tools.md @@ -48,6 +48,23 @@ builds. This process will auto-start after the first time `build/mvn` is called shut down at any time by running `build/zinc-/bin/zinc -shutdown` and will automatically restart whenever `build/mvn` is called. +Building submodules individually + +For instance, you can build the Spark Core module using: + +``` +$ # sbt +$ build/sbt +> project core +> package + +$ # or you can build the spark-core module with sbt directly using: +$ build/sbt core/package + +$ # Maven +$ build/mvn package -DskipTests -pl :spark-core_2.11 +``` + Running Individual Tests @@ -95,7 +112,6 @@ $ build/sbt "core/testOnly *DAGSchedulerSuite -- -z SPARK-12345" For more about how to run individual tests with sbt, see the [sbt documentation](http://www.scala-sbt.org/0.13/docs/Testing.html). - Testing with Maven With Maven, you can use the `-DwildcardSuites` flag to run individual Scala tests: @@ -112,6 +128,37 @@ To run individual Java tests, you can use the `-Dtest` flag: build/mvn test -DwildcardSuites=none -Dtest=org.apache.spark.streaming.JavaAPISuite test ``` +ScalaTest Issues + +If the following error occurs when running ScalaTest + +``` +An internal error occurred during: "Launching XYZSuite.scala". +java.lang.NullPointerException +``` +It is due to an incorrect Scala library in the classpath. To fix it: + +- Right click on project +- Select `Build Path | Configure Build Path` +- `Add Library | Scala Library` +- Remove `scala-library-2.10.4.jar - lib_managed\jars` + +In the event of "Could not find resource path for Web UI: org/apache/spark/ui/static", +it's due to a classpath issue (some classes were probably not compiled). To fix this, it +sufficient to run a test from the command line: + +``` +build/sbt "test-only org.apache.spark.rdd.SortingSuite" +``` + +Running Different Test Permutations on Jenkins + +When running tests for a pull request on Jenkins, you can add special phrases to the title of +your pull request to change testing behavior. This includes: + +- `[test-maven]` - signals to test the pull request using maven +- `[test-hadoop2.7]` - signals to test using Spark's Hadoop 2.7 profile + Checking Out Pull Requests Git provides a mechanism for fetching remote pull requests into your own local repository. @@ -156,54 +203,6 @@ $ build/mvn -DskipTests install $ build/mvn dependency:tree ``` -Building submodules individually - -For instance, you can build the Spark Core module using: - -``` -$ # sbt -$ build/sbt -> project core -> package - -$ # or you can build the spark-core module with sbt directly using: -$ build/sbt core/package - -$ # Maven -$ build/mvn package -DskipTests -pl :spark-core_2.11 -``` - -ScalaTest Issues - -If the following error occurs when running ScalaTest - -``` -An internal error occurred during: "Launching XYZSuite.scala". -java.lang.NullPointerException -``` -It is due to an incorrect Scala library in the classpath. To fix it: - -- Right click on project -- Select `Build Path | Configure Build Path` -- `Add Library | Scala Library` -- Remove `scala-library-2.10.4.jar - lib_managed\jars` - -In the event of "Could not find resource path for Web UI: org/apache/spark/ui/static", -it's due to a classpath issue (some classes were probably not compiled). To fix this, it -sufficient to run a test from the command line: - -``` -build/sbt "test-only org.apache.spark.rdd.SortingSuite" -``` - -Running Different Test Permutations on Jenkins - -When running tests for a pull request on Jenkins, you can add special phrases to the title of -your pull request to change testing behavior. This includes: - -- `[test-maven]` - signals to test the pull request using maven -- `[test-hadoop2.7]` - signals to test using Spark's Hadoop 2.7 profile - Organizing Imports Yo
[4/5] spark-website git commit: Merge branch 'asf-site' of https://github.com/apache/spark-website into add-sbt-package
Merge branch 'asf-site' of https://github.com/apache/spark-website into add-sbt-package Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/3c96a509 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/3c96a509 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/3c96a509 Branch: refs/heads/asf-site Commit: 3c96a509cb1e00df716d8f38eb5b214d8778e45d Parents: 9902531 aa1c66e Author: Stan Zhai Authored: Tue Apr 25 15:17:57 2017 +0800 Committer: Stan Zhai Committed: Tue Apr 25 15:17:57 2017 +0800 -- _layouts/global.html| 1 + community.md| 6 + contributing.md | 4 + developer-tools.md | 57 +++- improvement-proposals.md| 91 ++ ...3-31-spark-summit-june-2017-agenda-posted.md | 15 + site/committers.html| 7 +- site/community.html | 13 +- site/contributing.html | 11 +- site/developer-tools.html | 106 ++- site/documentation.html | 7 +- site/downloads.html | 7 +- site/examples.html | 7 +- site/faq.html | 7 +- site/graphx/index.html | 7 +- site/improvement-proposals.html | 295 +++ site/index.html | 7 +- site/mailing-lists.html | 7 +- site/mllib/index.html | 7 +- site/news/amp-camp-2013-registration-ope.html | 7 +- .../news/announcing-the-first-spark-summit.html | 7 +- .../news/fourth-spark-screencast-published.html | 7 +- site/news/index.html| 16 +- site/news/nsdi-paper.html | 7 +- site/news/one-month-to-spark-summit-2015.html | 7 +- .../proposals-open-for-spark-summit-east.html | 7 +- ...registration-open-for-spark-summit-east.html | 7 +- .../news/run-spark-and-shark-on-amazon-emr.html | 7 +- site/news/spark-0-6-1-and-0-5-2-released.html | 7 +- site/news/spark-0-6-2-released.html | 7 +- site/news/spark-0-7-0-released.html | 7 +- site/news/spark-0-7-2-released.html | 7 +- site/news/spark-0-7-3-released.html | 7 +- site/news/spark-0-8-0-released.html | 7 +- site/news/spark-0-8-1-released.html | 7 +- site/news/spark-0-9-0-released.html | 7 +- site/news/spark-0-9-1-released.html | 7 +- site/news/spark-0-9-2-released.html | 7 +- site/news/spark-1-0-0-released.html | 7 +- site/news/spark-1-0-1-released.html | 7 +- site/news/spark-1-0-2-released.html | 7 +- site/news/spark-1-1-0-released.html | 7 +- site/news/spark-1-1-1-released.html | 7 +- site/news/spark-1-2-0-released.html | 7 +- site/news/spark-1-2-1-released.html | 7 +- site/news/spark-1-2-2-released.html | 7 +- site/news/spark-1-3-0-released.html | 7 +- site/news/spark-1-4-0-released.html | 7 +- site/news/spark-1-4-1-released.html | 7 +- site/news/spark-1-5-0-released.html | 7 +- site/news/spark-1-5-1-released.html | 7 +- site/news/spark-1-5-2-released.html | 7 +- site/news/spark-1-6-0-released.html | 7 +- site/news/spark-1-6-1-released.html | 7 +- site/news/spark-1-6-2-released.html | 7 +- site/news/spark-1-6-3-released.html | 7 +- site/news/spark-2-0-0-released.html | 7 +- site/news/spark-2-0-1-released.html | 7 +- site/news/spark-2-0-2-released.html | 7 +- site/news/spark-2-1-0-released.html | 7 +- site/news/spark-2.0.0-preview.html | 7 +- .../spark-accepted-into-apache-incubator.html | 7 +- site/news/spark-and-shark-in-the-news.html | 7 +- site/news/spark-becomes-tlp.html| 7 +- site/news/spark-featured-in-wired.html | 7 +- .../spark-mailing-lists-moving-to-apache.html | 7 +- site/news/spark-meetups.html| 7 +- site/news/spark-screencasts-published.html | 7 +- site/news/spark-summit-2013-is-a-wrap.html | 7 +- site/news/spark-summit-2014-videos-posted.html | 7 +- site/news/spark-summit-2015-videos-posted.html | 7 +- site/news/spark-summit-agenda-posted.html | 7 +- .../spark-summit-east-2015-videos-posted.html | 7 +- .../spark-summit-east-2016-cfp-closing.html | 7 +- ..
[1/5] spark-website git commit: fix an error in the descriptions of `Build Targets For Individual Projects`
Repository: spark-website Updated Branches: refs/heads/asf-site aa1c66e42 -> 09046892b fix an error in the descriptions of `Build Targets For Individual Projects` Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/05c99469 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/05c99469 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/05c99469 Branch: refs/heads/asf-site Commit: 05c99469b15a6039df855d8619972c4db1a3e663 Parents: c1b9ad3 Author: Stan Zhai Authored: Fri Mar 10 00:33:52 2017 +0800 Committer: Stan Zhai Committed: Fri Mar 10 00:33:52 2017 +0800 -- developer-tools.md| 16 site/developer-tools.html | 16 2 files changed, 24 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/05c99469/developer-tools.md -- diff --git a/developer-tools.md b/developer-tools.md index 88f3f36..e012c8e 100644 --- a/developer-tools.md +++ b/developer-tools.md @@ -48,6 +48,7 @@ builds. This process will auto-start after the first time `build/mvn` is called shut down at any time by running `build/zinc-/bin/zinc -shutdown` and will automatically restart whenever `build/mvn` is called. + Running Individual Tests When developing locally, it's often convenient to run a single test or a few tests, rather than running the entire test suite. @@ -155,14 +156,21 @@ $ build/mvn -DskipTests install $ build/mvn dependency:tree ``` - -Running Build Targets For Individual Projects +Building submodules individually + +For instance, you can build the Spark Core module using: ``` $ # sbt -$ build/sbt package +$ build/sbt +> project core +> package + +$ # or you can build the spark-core module with sbt directly using: +$ build/sbt core/package + $ # Maven -$ build/mvn package -DskipTests -pl assembly +$ build/mvn package -DskipTests -pl :spark-core_2.11 ``` ScalaTest Issues http://git-wip-us.apache.org/repos/asf/spark-website/blob/05c99469/site/developer-tools.html -- diff --git a/site/developer-tools.html b/site/developer-tools.html index 615adea..1cbe7bb 100644 --- a/site/developer-tools.html +++ b/site/developer-tools.html @@ -232,6 +232,7 @@ builds. This process will auto-start after the first time build/mvn shut down at any time by running build/zinc-/bin/zinc -shutdown and will automatically restart whenever build/mvn is called. + Running Individual Tests When developing locally, it’s often convenient to run a single test or a few tests, rather than running the entire test suite. @@ -326,13 +327,20 @@ $ build/mvn -DskipTests install $ build/mvn dependency:tree - -Running Build Targets For Individual Projects +Building submodules individually + +For instance, you can build the Spark Core module using: $ # sbt -$ build/sbt package +$ build/sbt +> project core +> package + +$ # or you can build the spark-core module with sbt directly using: +$ build/sbt core/package + $ # Maven -$ build/mvn package -DskipTests -pl assembly +$ build/mvn package -DskipTests -pl :spark-core_2.11 ScalaTest Issues - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[3/5] spark-website git commit: simplify maven build
simplify maven build Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/9902531e Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/9902531e Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/9902531e Branch: refs/heads/asf-site Commit: 9902531e626ea28f6ab01688091339faaf09389a Parents: 4e45856 Author: Stan Zhai Authored: Fri Mar 17 12:33:57 2017 +0800 Committer: Stan Zhai Committed: Fri Mar 17 12:33:57 2017 +0800 -- developer-tools.md| 2 +- site/developer-tools.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/9902531e/developer-tools.md -- diff --git a/developer-tools.md b/developer-tools.md index e712e7d..0723115 100644 --- a/developer-tools.md +++ b/developer-tools.md @@ -62,7 +62,7 @@ $ # or you can build the spark-core module with sbt directly using: $ build/sbt core/package $ # Maven -$ build/mvn package -DskipTests -pl :spark-core_2.11 +$ build/mvn package -DskipTests -pl core ``` http://git-wip-us.apache.org/repos/asf/spark-website/blob/9902531e/site/developer-tools.html -- diff --git a/site/developer-tools.html b/site/developer-tools.html index b46d664..62793ef 100644 --- a/site/developer-tools.html +++ b/site/developer-tools.html @@ -245,7 +245,7 @@ $ # or you can build the spark-core module with sbt directly using: $ build/sbt core/package $ # Maven -$ build/mvn package -DskipTests -pl :spark-core_2.11 +$ build/mvn package -DskipTests -pl core - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[5/5] spark-website git commit: fix conflict
fix conflict Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/09046892 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/09046892 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/09046892 Branch: refs/heads/asf-site Commit: 09046892bf9702dbb48c2b325e05e7c4091164ea Parents: 3c96a50 Author: Stan Zhai Authored: Tue Apr 25 15:18:47 2017 +0800 Committer: Stan Zhai Committed: Tue Apr 25 15:18:47 2017 +0800 -- site/developer-tools.html | 49 +- 1 file changed, 1 insertion(+), 48 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/09046892/site/developer-tools.html -- diff --git a/site/developer-tools.html b/site/developer-tools.html index 680e089..d6c4169 100644 --- a/site/developer-tools.html +++ b/site/developer-tools.html @@ -304,7 +304,6 @@ $ build/mvn package -DskipTests -pl core build/mvn test -DwildcardSuites=none -Dtest=org.apache.spark.streaming.JavaAPISuite test -<<< HEAD ScalaTest Issues If the following error occurs when running ScalaTest @@ -337,7 +336,7 @@ your pull request to change testing behavior. This includes: [test-maven] - signals to test the pull request using maven [test-hadoop2.7] - signals to test using Spark’s Hadoop 2.7 profile -=== + Binary compatibility To ensure binary compatibility, Spark uses https://github.com/typesafehub/migration-manager";>MiMa. @@ -386,7 +385,6 @@ JIRA number of the issue you’re working on as well as its title. updating your pull request. Usually, the problems reported by MiMa are self-explanatory and revolve around missing members (methods or fields) that you will have to add back in order to maintain binary compatibility. ->>> aa1c66e424e024cb2e9f962aae8952bb4ad75cb5 Checking Out Pull Requests @@ -428,51 +426,6 @@ $ build/mvn -DskipTests install $ build/mvn dependency:tree -<<< HEAD -=== - -Running Build Targets For Individual Projects - -$ # sbt -$ build/sbt package -$ # Maven -$ build/mvn package -DskipTests -pl assembly - - -ScalaTest Issues - -If the following error occurs when running ScalaTest - -An internal error occurred during: "Launching XYZSuite.scala". -java.lang.NullPointerException - -It is due to an incorrect Scala library in the classpath. To fix it: - - - Right click on project - Select Build Path | Configure Build Path - Add Library | Scala Library - Remove scala-library-2.10.4.jar - lib_managed\jars - - -In the event of “Could not find resource path for Web UI: org/apache/spark/ui/static”, -it’s due to a classpath issue (some classes were probably not compiled). To fix this, it is -sufficient to run a test from the command line: - -build/sbt "test-only org.apache.spark.rdd.SortingSuite" - - -Running Different Test Permutations on Jenkins - -When running tests for a pull request on Jenkins, you can add special phrases to the title of -your pull request to change testing behavior. This includes: - - - [test-maven] - signals to test the pull request using maven - [test-hadoop2.7] - signals to test using Spark’s Hadoop 2.7 profile - - ->>> aa1c66e424e024cb2e9f962aae8952bb4ad75cb5 Organizing Imports You can use a https://plugins.jetbrains.com/plugin/7350";>IntelliJ Imports Organizer - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20455][DOCS] Fix Broken Docker IT Docs
Repository: spark Updated Branches: refs/heads/master 31345fde8 -> c8f121951 [SPARK-20455][DOCS] Fix Broken Docker IT Docs ## What changes were proposed in this pull request? Just added the Maven `test`goal. ## How was this patch tested? No test needed, just a trivial documentation fix. Author: Armin Braun Closes #17756 from original-brownbear/SPARK-20455. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c8f12195 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c8f12195 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c8f12195 Branch: refs/heads/master Commit: c8f1219510f469935aa9ff0b1c92cfe20372377c Parents: 31345fd Author: Armin Braun Authored: Tue Apr 25 09:13:50 2017 +0100 Committer: Sean Owen Committed: Tue Apr 25 09:13:50 2017 +0100 -- docs/building-spark.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c8f12195/docs/building-spark.md -- diff --git a/docs/building-spark.md b/docs/building-spark.md index e99b70f..0f551bc 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -232,7 +232,7 @@ Once installed, the `docker` service needs to be started, if not already running On Linux, this can be done by `sudo service docker start`. ./build/mvn install -DskipTests -./build/mvn -Pdocker-integration-tests -pl :spark-docker-integration-tests_2.11 +./build/mvn test -Pdocker-integration-tests -pl :spark-docker-integration-tests_2.11 or - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20455][DOCS] Fix Broken Docker IT Docs
Repository: spark Updated Branches: refs/heads/branch-2.2 fb59a1954 -> c18de9c04 [SPARK-20455][DOCS] Fix Broken Docker IT Docs ## What changes were proposed in this pull request? Just added the Maven `test`goal. ## How was this patch tested? No test needed, just a trivial documentation fix. Author: Armin Braun Closes #17756 from original-brownbear/SPARK-20455. (cherry picked from commit c8f1219510f469935aa9ff0b1c92cfe20372377c) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c18de9c0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c18de9c0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c18de9c0 Branch: refs/heads/branch-2.2 Commit: c18de9c045aaf7d17113f87a6b2146811b4af0eb Parents: fb59a19 Author: Armin Braun Authored: Tue Apr 25 09:13:50 2017 +0100 Committer: Sean Owen Committed: Tue Apr 25 09:13:58 2017 +0100 -- docs/building-spark.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c18de9c0/docs/building-spark.md -- diff --git a/docs/building-spark.md b/docs/building-spark.md index e99b70f..0f551bc 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -232,7 +232,7 @@ Once installed, the `docker` service needs to be started, if not already running On Linux, this can be done by `sudo service docker start`. ./build/mvn install -DskipTests -./build/mvn -Pdocker-integration-tests -pl :spark-docker-integration-tests_2.11 +./build/mvn test -Pdocker-integration-tests -pl :spark-docker-integration-tests_2.11 or - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20455][DOCS] Fix Broken Docker IT Docs
Repository: spark Updated Branches: refs/heads/branch-2.1 427966597 -> 65990fc57 [SPARK-20455][DOCS] Fix Broken Docker IT Docs ## What changes were proposed in this pull request? Just added the Maven `test`goal. ## How was this patch tested? No test needed, just a trivial documentation fix. Author: Armin Braun Closes #17756 from original-brownbear/SPARK-20455. (cherry picked from commit c8f1219510f469935aa9ff0b1c92cfe20372377c) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/65990fc5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/65990fc5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/65990fc5 Branch: refs/heads/branch-2.1 Commit: 65990fc5708b35cf53b3582c146a4de5ece1da3c Parents: 4279665 Author: Armin Braun Authored: Tue Apr 25 09:13:50 2017 +0100 Committer: Sean Owen Committed: Tue Apr 25 09:14:10 2017 +0100 -- docs/building-spark.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/65990fc5/docs/building-spark.md -- diff --git a/docs/building-spark.md b/docs/building-spark.md index 0945095..33ff80e 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -302,7 +302,7 @@ Once installed, the `docker` service needs to be started, if not already running On Linux, this can be done by `sudo service docker start`. ./build/mvn install -DskipTests -./build/mvn -Pdocker-integration-tests -pl :spark-docker-integration-tests_2.11 +./build/mvn test -Pdocker-integration-tests -pl :spark-docker-integration-tests_2.11 or - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20404][CORE] Using Option(name) instead of Some(name)
Repository: spark Updated Branches: refs/heads/master c8f121951 -> 0bc7a9021 [SPARK-20404][CORE] Using Option(name) instead of Some(name) Using Option(name) instead of Some(name) to prevent runtime failures when using accumulators created like the following ``` sparkContext.accumulator(0, null) ``` Author: Sergey Zhemzhitsky Closes #17740 from szhem/SPARK-20404-null-acc-names. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0bc7a902 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0bc7a902 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0bc7a902 Branch: refs/heads/master Commit: 0bc7a90210aad9025c1e1bdc99f8e723c1bf0fbf Parents: c8f1219 Author: Sergey Zhemzhitsky Authored: Tue Apr 25 09:18:36 2017 +0100 Committer: Sean Owen Committed: Tue Apr 25 09:18:36 2017 +0100 -- core/src/main/scala/org/apache/spark/SparkContext.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0bc7a902/core/src/main/scala/org/apache/spark/SparkContext.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 99efc48..0ec1bdd 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -1350,7 +1350,7 @@ class SparkContext(config: SparkConf) extends Logging { @deprecated("use AccumulatorV2", "2.0.0") def accumulator[T](initialValue: T, name: String)(implicit param: AccumulatorParam[T]) : Accumulator[T] = { -val acc = new Accumulator(initialValue, param, Some(name)) +val acc = new Accumulator(initialValue, param, Option(name)) cleaner.foreach(_.registerAccumulatorForCleanup(acc.newAcc)) acc } @@ -1379,7 +1379,7 @@ class SparkContext(config: SparkConf) extends Logging { @deprecated("use AccumulatorV2", "2.0.0") def accumulable[R, T](initialValue: R, name: String)(implicit param: AccumulableParam[R, T]) : Accumulable[R, T] = { -val acc = new Accumulable(initialValue, param, Some(name)) +val acc = new Accumulable(initialValue, param, Option(name)) cleaner.foreach(_.registerAccumulatorForCleanup(acc.newAcc)) acc } @@ -1414,7 +1414,7 @@ class SparkContext(config: SparkConf) extends Logging { * @note Accumulators must be registered before use, or it will throw exception. */ def register(acc: AccumulatorV2[_, _], name: String): Unit = { -acc.register(this, name = Some(name)) +acc.register(this, name = Option(name)) } /** - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20404][CORE] Using Option(name) instead of Some(name)
Repository: spark Updated Branches: refs/heads/branch-2.1 65990fc57 -> 2d47e1aaf [SPARK-20404][CORE] Using Option(name) instead of Some(name) Using Option(name) instead of Some(name) to prevent runtime failures when using accumulators created like the following ``` sparkContext.accumulator(0, null) ``` Author: Sergey Zhemzhitsky Closes #17740 from szhem/SPARK-20404-null-acc-names. (cherry picked from commit 0bc7a90210aad9025c1e1bdc99f8e723c1bf0fbf) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2d47e1aa Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2d47e1aa Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2d47e1aa Branch: refs/heads/branch-2.1 Commit: 2d47e1aaf93fa13c0407d5c0dcca0f7c898e5b94 Parents: 65990fc Author: Sergey Zhemzhitsky Authored: Tue Apr 25 09:18:36 2017 +0100 Committer: Sean Owen Committed: Tue Apr 25 09:18:53 2017 +0100 -- core/src/main/scala/org/apache/spark/SparkContext.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2d47e1aa/core/src/main/scala/org/apache/spark/SparkContext.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 5ae9db7..6e24656 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -1275,7 +1275,7 @@ class SparkContext(config: SparkConf) extends Logging { @deprecated("use AccumulatorV2", "2.0.0") def accumulator[T](initialValue: T, name: String)(implicit param: AccumulatorParam[T]) : Accumulator[T] = { -val acc = new Accumulator(initialValue, param, Some(name)) +val acc = new Accumulator(initialValue, param, Option(name)) cleaner.foreach(_.registerAccumulatorForCleanup(acc.newAcc)) acc } @@ -1304,7 +1304,7 @@ class SparkContext(config: SparkConf) extends Logging { @deprecated("use AccumulatorV2", "2.0.0") def accumulable[R, T](initialValue: R, name: String)(implicit param: AccumulableParam[R, T]) : Accumulable[R, T] = { -val acc = new Accumulable(initialValue, param, Some(name)) +val acc = new Accumulable(initialValue, param, Option(name)) cleaner.foreach(_.registerAccumulatorForCleanup(acc.newAcc)) acc } @@ -1339,7 +1339,7 @@ class SparkContext(config: SparkConf) extends Logging { * @note Accumulators must be registered before use, or it will throw exception. */ def register(acc: AccumulatorV2[_, _], name: String): Unit = { -acc.register(this, name = Some(name)) +acc.register(this, name = Option(name)) } /** - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20404][CORE] Using Option(name) instead of Some(name)
Repository: spark Updated Branches: refs/heads/branch-2.2 c18de9c04 -> b62ebd91b [SPARK-20404][CORE] Using Option(name) instead of Some(name) Using Option(name) instead of Some(name) to prevent runtime failures when using accumulators created like the following ``` sparkContext.accumulator(0, null) ``` Author: Sergey Zhemzhitsky Closes #17740 from szhem/SPARK-20404-null-acc-names. (cherry picked from commit 0bc7a90210aad9025c1e1bdc99f8e723c1bf0fbf) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b62ebd91 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b62ebd91 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b62ebd91 Branch: refs/heads/branch-2.2 Commit: b62ebd91bb2c64e1ecef0f2d97db91f5ce32743b Parents: c18de9c Author: Sergey Zhemzhitsky Authored: Tue Apr 25 09:18:36 2017 +0100 Committer: Sean Owen Committed: Tue Apr 25 09:18:44 2017 +0100 -- core/src/main/scala/org/apache/spark/SparkContext.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b62ebd91/core/src/main/scala/org/apache/spark/SparkContext.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 99efc48..0ec1bdd 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -1350,7 +1350,7 @@ class SparkContext(config: SparkConf) extends Logging { @deprecated("use AccumulatorV2", "2.0.0") def accumulator[T](initialValue: T, name: String)(implicit param: AccumulatorParam[T]) : Accumulator[T] = { -val acc = new Accumulator(initialValue, param, Some(name)) +val acc = new Accumulator(initialValue, param, Option(name)) cleaner.foreach(_.registerAccumulatorForCleanup(acc.newAcc)) acc } @@ -1379,7 +1379,7 @@ class SparkContext(config: SparkConf) extends Logging { @deprecated("use AccumulatorV2", "2.0.0") def accumulable[R, T](initialValue: R, name: String)(implicit param: AccumulableParam[R, T]) : Accumulable[R, T] = { -val acc = new Accumulable(initialValue, param, Some(name)) +val acc = new Accumulable(initialValue, param, Option(name)) cleaner.foreach(_.registerAccumulatorForCleanup(acc.newAcc)) acc } @@ -1414,7 +1414,7 @@ class SparkContext(config: SparkConf) extends Logging { * @note Accumulators must be registered before use, or it will throw exception. */ def register(acc: AccumulatorV2[_, _], name: String): Unit = { -acc.register(this, name = Some(name)) +acc.register(this, name = Option(name)) } /** - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20400][DOCS] Remove References to 3rd Party Vendor Tools
Repository: spark Updated Branches: refs/heads/master df58a95a3 -> 7a365257e [SPARK-20400][DOCS] Remove References to 3rd Party Vendor Tools ## What changes were proposed in this pull request? Simple documentation change to remove explicit vendor references. ## How was this patch tested? NA Please review http://spark.apache.org/contributing.html before opening a pull request. Author: anabranch Closes #17695 from anabranch/remove-vendor. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7a365257 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7a365257 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7a365257 Branch: refs/heads/master Commit: 7a365257e934e838bd90f6a0c50362bf47202b0e Parents: df58a95 Author: anabranch Authored: Wed Apr 26 09:49:05 2017 +0100 Committer: Sean Owen Committed: Wed Apr 26 09:49:05 2017 +0100 -- docs/configuration.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7a365257/docs/configuration.md -- diff --git a/docs/configuration.md b/docs/configuration.md index 87b7632..8b53e92 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -2270,8 +2270,8 @@ should be included on Spark's classpath: * `hdfs-site.xml`, which provides default behaviors for the HDFS client. * `core-site.xml`, which sets the default filesystem name. -The location of these configuration files varies across CDH and HDP versions, but -a common location is inside of `/etc/hadoop/conf`. Some tools, such as Cloudera Manager, create +The location of these configuration files varies across Hadoop versions, but +a common location is inside of `/etc/hadoop/conf`. Some tools create configurations on-the-fly, but offer a mechanisms to download copies of them. To make these files visible to Spark, set `HADOOP_CONF_DIR` in `$SPARK_HOME/spark-env.sh` - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20400][DOCS] Remove References to 3rd Party Vendor Tools
Repository: spark Updated Branches: refs/heads/branch-2.2 c8803c068 -> a2f5ced32 [SPARK-20400][DOCS] Remove References to 3rd Party Vendor Tools ## What changes were proposed in this pull request? Simple documentation change to remove explicit vendor references. ## How was this patch tested? NA Please review http://spark.apache.org/contributing.html before opening a pull request. Author: anabranch Closes #17695 from anabranch/remove-vendor. (cherry picked from commit 7a365257e934e838bd90f6a0c50362bf47202b0e) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a2f5ced3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a2f5ced3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a2f5ced3 Branch: refs/heads/branch-2.2 Commit: a2f5ced3236db665bb33adc1bf1f90553997f46b Parents: c8803c0 Author: anabranch Authored: Wed Apr 26 09:49:05 2017 +0100 Committer: Sean Owen Committed: Wed Apr 26 09:49:13 2017 +0100 -- docs/configuration.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a2f5ced3/docs/configuration.md -- diff --git a/docs/configuration.md b/docs/configuration.md index 87b7632..8b53e92 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -2270,8 +2270,8 @@ should be included on Spark's classpath: * `hdfs-site.xml`, which provides default behaviors for the HDFS client. * `core-site.xml`, which sets the default filesystem name. -The location of these configuration files varies across CDH and HDP versions, but -a common location is inside of `/etc/hadoop/conf`. Some tools, such as Cloudera Manager, create +The location of these configuration files varies across Hadoop versions, but +a common location is inside of `/etc/hadoop/conf`. Some tools create configurations on-the-fly, but offer a mechanisms to download copies of them. To make these files visible to Spark, set `HADOOP_CONF_DIR` in `$SPARK_HOME/spark-env.sh` - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20465][CORE] Throws a proper exception when any temp directory could not be got
Repository: spark Updated Branches: refs/heads/master 59e3a5644 -> 8c911adac [SPARK-20465][CORE] Throws a proper exception when any temp directory could not be got ## What changes were proposed in this pull request? This PR proposes to throw an exception with better message rather than `ArrayIndexOutOfBoundsException` when temp directories could not be created. Running the commands below: ```bash ./bin/spark-shell --conf spark.local.dir=/NONEXISTENT_DIR_ONE,/NONEXISTENT_DIR_TWO ``` produces ... **Before** ``` Exception in thread "main" java.lang.ExceptionInInitializerError ... Caused by: java.lang.ArrayIndexOutOfBoundsException: 0 ... ``` **After** ``` Exception in thread "main" java.lang.ExceptionInInitializerError ... Caused by: java.io.IOException: Failed to get a temp directory under [/NONEXISTENT_DIR_ONE,/NONEXISTENT_DIR_TWO]. ... ``` ## How was this patch tested? Unit tests in `LocalDirsSuite.scala`. Author: hyukjinkwon Closes #17768 from HyukjinKwon/throws-temp-dir-exception. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8c911ada Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8c911ada Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8c911ada Branch: refs/heads/master Commit: 8c911adac56a1b1d95bc19915e0070ce7305257c Parents: 59e3a56 Author: hyukjinkwon Authored: Fri Apr 28 08:49:35 2017 +0100 Committer: Sean Owen Committed: Fri Apr 28 08:49:35 2017 +0100 -- .../scala/org/apache/spark/util/Utils.scala | 6 - .../apache/spark/storage/LocalDirsSuite.scala | 23 +--- 2 files changed, 25 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8c911ada/core/src/main/scala/org/apache/spark/util/Utils.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index e042bad..4d37db9 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -740,7 +740,11 @@ private[spark] object Utils extends Logging { * always return a single directory. */ def getLocalDir(conf: SparkConf): String = { -getOrCreateLocalRootDirs(conf)(0) +getOrCreateLocalRootDirs(conf).headOption.getOrElse { + val configuredLocalDirs = getConfiguredLocalDirs(conf) + throw new IOException( +s"Failed to get a temp directory under [${configuredLocalDirs.mkString(",")}].") +} } private[spark] def isRunningInYarnContainer(conf: SparkConf): Boolean = { http://git-wip-us.apache.org/repos/asf/spark/blob/8c911ada/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala b/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala index c707407..f7b3a27 100644 --- a/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.storage -import java.io.File +import java.io.{File, IOException} import org.scalatest.BeforeAndAfter @@ -33,9 +33,13 @@ class LocalDirsSuite extends SparkFunSuite with BeforeAndAfter { Utils.clearLocalRootDirs() } + after { +Utils.clearLocalRootDirs() + } + test("Utils.getLocalDir() returns a valid directory, even if some local dirs are missing") { // Regression test for SPARK-2974 -assert(!new File("/NONEXISTENT_DIR").exists()) +assert(!new File("/NONEXISTENT_PATH").exists()) val conf = new SparkConf(false) .set("spark.local.dir", s"/NONEXISTENT_PATH,${System.getProperty("java.io.tmpdir")}") assert(new File(Utils.getLocalDir(conf)).exists()) @@ -43,7 +47,7 @@ class LocalDirsSuite extends SparkFunSuite with BeforeAndAfter { test("SPARK_LOCAL_DIRS override also affects driver") { // Regression test for SPARK-2975 -assert(!new File("/NONEXISTENT_DIR").exists()) +assert(!new File("/NONEXISTENT_PATH").exists()) // spark.local.dir only contains invalid directories, but that's not a problem since // SPARK_LOCAL_DIRS will override it on both the driver and workers: val conf = new SparkConfWithEnv(Map("SPARK_LOCAL_DIRS" -> System.getProperty("java.io.tmpdir"))) @@ -51,4 +55,17 @@ class LocalDirsSuite extends SparkFunSuite with BeforeAndAfter { assert(new File(Utils.getLocalDir(conf)).exists()) } + test("Utils.getLocalDir() throws an exception if any temporary directory cannot be retrieved") { +val path1 = "/NONEXISTENT_PATH_ONE" +val path2 = "/NONE
spark git commit: [SPARK-20465][CORE] Throws a proper exception when any temp directory could not be got
Repository: spark Updated Branches: refs/heads/branch-2.2 af3a1411a -> ea5b11446 [SPARK-20465][CORE] Throws a proper exception when any temp directory could not be got ## What changes were proposed in this pull request? This PR proposes to throw an exception with better message rather than `ArrayIndexOutOfBoundsException` when temp directories could not be created. Running the commands below: ```bash ./bin/spark-shell --conf spark.local.dir=/NONEXISTENT_DIR_ONE,/NONEXISTENT_DIR_TWO ``` produces ... **Before** ``` Exception in thread "main" java.lang.ExceptionInInitializerError ... Caused by: java.lang.ArrayIndexOutOfBoundsException: 0 ... ``` **After** ``` Exception in thread "main" java.lang.ExceptionInInitializerError ... Caused by: java.io.IOException: Failed to get a temp directory under [/NONEXISTENT_DIR_ONE,/NONEXISTENT_DIR_TWO]. ... ``` ## How was this patch tested? Unit tests in `LocalDirsSuite.scala`. Author: hyukjinkwon Closes #17768 from HyukjinKwon/throws-temp-dir-exception. (cherry picked from commit 8c911adac56a1b1d95bc19915e0070ce7305257c) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ea5b1144 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ea5b1144 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ea5b1144 Branch: refs/heads/branch-2.2 Commit: ea5b114467c04f5b1ff39a7187f8299af49d22eb Parents: af3a141 Author: hyukjinkwon Authored: Fri Apr 28 08:49:35 2017 +0100 Committer: Sean Owen Committed: Fri Apr 28 08:54:15 2017 +0100 -- .../scala/org/apache/spark/util/Utils.scala | 6 - .../apache/spark/storage/LocalDirsSuite.scala | 23 +--- 2 files changed, 25 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ea5b1144/core/src/main/scala/org/apache/spark/util/Utils.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index e042bad..4d37db9 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -740,7 +740,11 @@ private[spark] object Utils extends Logging { * always return a single directory. */ def getLocalDir(conf: SparkConf): String = { -getOrCreateLocalRootDirs(conf)(0) +getOrCreateLocalRootDirs(conf).headOption.getOrElse { + val configuredLocalDirs = getConfiguredLocalDirs(conf) + throw new IOException( +s"Failed to get a temp directory under [${configuredLocalDirs.mkString(",")}].") +} } private[spark] def isRunningInYarnContainer(conf: SparkConf): Boolean = { http://git-wip-us.apache.org/repos/asf/spark/blob/ea5b1144/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala b/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala index c707407..f7b3a27 100644 --- a/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/LocalDirsSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.storage -import java.io.File +import java.io.{File, IOException} import org.scalatest.BeforeAndAfter @@ -33,9 +33,13 @@ class LocalDirsSuite extends SparkFunSuite with BeforeAndAfter { Utils.clearLocalRootDirs() } + after { +Utils.clearLocalRootDirs() + } + test("Utils.getLocalDir() returns a valid directory, even if some local dirs are missing") { // Regression test for SPARK-2974 -assert(!new File("/NONEXISTENT_DIR").exists()) +assert(!new File("/NONEXISTENT_PATH").exists()) val conf = new SparkConf(false) .set("spark.local.dir", s"/NONEXISTENT_PATH,${System.getProperty("java.io.tmpdir")}") assert(new File(Utils.getLocalDir(conf)).exists()) @@ -43,7 +47,7 @@ class LocalDirsSuite extends SparkFunSuite with BeforeAndAfter { test("SPARK_LOCAL_DIRS override also affects driver") { // Regression test for SPARK-2975 -assert(!new File("/NONEXISTENT_DIR").exists()) +assert(!new File("/NONEXISTENT_PATH").exists()) // spark.local.dir only contains invalid directories, but that's not a problem since // SPARK_LOCAL_DIRS will override it on both the driver and workers: val conf = new SparkConfWithEnv(Map("SPARK_LOCAL_DIRS" -> System.getProperty("java.io.tmpdir"))) @@ -51,4 +55,17 @@ class LocalDirsSuite extends SparkFunSuite with BeforeAndAfter { assert(new File(Utils.getLocalDir(conf)).exists()) } + test("Utils.getLocalDir() throws an exception if any tempo
spark git commit: [SPARK-20521][DOC][CORE] The default of 'spark.worker.cleanup.appDataTtl' should be 604800 in spark-standalone.md
Repository: spark Updated Branches: refs/heads/master d228cd0b0 -> 4d99b95ad [SPARK-20521][DOC][CORE] The default of 'spark.worker.cleanup.appDataTtl' should be 604800 in spark-standalone.md ## What changes were proposed in this pull request? Currently, our project needs to be set to clean up the worker directory cleanup cycle is three days. When I follow http://spark.apache.org/docs/latest/spark-standalone.html, configure the 'spark.worker.cleanup.appDataTtl' parameter, I configured to 3 * 24 * 3600. When I start the spark service, the startup fails, and the worker log displays the error log as follows: 2017-04-28 15:02:03,306 INFO Utils: Successfully started service 'sparkWorker' on port 48728. Exception in thread "main" java.lang.NumberFormatException: For input string: "3 * 24 * 3600" at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65) at java.lang.Long.parseLong(Long.java:430) at java.lang.Long.parseLong(Long.java:483) at scala.collection.immutable.StringLike$class.toLong(StringLike.scala:276) at scala.collection.immutable.StringOps.toLong(StringOps.scala:29) at org.apache.spark.SparkConf$$anonfun$getLong$2.apply(SparkConf.scala:380) at org.apache.spark.SparkConf$$anonfun$getLong$2.apply(SparkConf.scala:380) at scala.Option.map(Option.scala:146) at org.apache.spark.SparkConf.getLong(SparkConf.scala:380) at org.apache.spark.deploy.worker.Worker.(Worker.scala:100) at org.apache.spark.deploy.worker.Worker$.startRpcEnvAndEndpoint(Worker.scala:730) at org.apache.spark.deploy.worker.Worker$.main(Worker.scala:709) at org.apache.spark.deploy.worker.Worker.main(Worker.scala) **Because we put 7 * 24 * 3600 as a string, forced to convert to the dragon type, will lead to problems in the program.** **So I think the default value of the current configuration should be a specific long value, rather than 7 * 24 * 3600,should be 604800. Because it would mislead users for similar configurations, resulting in spark start failure.** ## How was this patch tested? manual tests Please review http://spark.apache.org/contributing.html before opening a pull request. Author: éå°é¾ 10207633 Author: guoxiaolong Author: guoxiaolongzte Closes #17798 from guoxiaolongzte/SPARK-20521. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4d99b95a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4d99b95a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4d99b95a Branch: refs/heads/master Commit: 4d99b95ad0d0c7ef909c8e492ec45e94cf0189b4 Parents: d228cd0 Author: éå°é¾ 10207633 Authored: Sun Apr 30 09:06:25 2017 +0100 Committer: Sean Owen Committed: Sun Apr 30 09:06:25 2017 +0100 -- docs/spark-standalone.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4d99b95a/docs/spark-standalone.md -- diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md index 1c0b60f..34ced9e 100644 --- a/docs/spark-standalone.md +++ b/docs/spark-standalone.md @@ -242,7 +242,7 @@ SPARK_WORKER_OPTS supports the following system properties: spark.worker.cleanup.appDataTtl - 7 * 24 * 3600 (7 days) + 604800 (7 days, 7 * 24 * 3600) The number of seconds to retain application work directories on each worker. This is a Time To Live and should depend on the amount of available disk space you have. Application logs and jars are - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20521][DOC][CORE] The default of 'spark.worker.cleanup.appDataTtl' should be 604800 in spark-standalone.md
Repository: spark Updated Branches: refs/heads/branch-2.2 9789d5c57 -> c5f559315 [SPARK-20521][DOC][CORE] The default of 'spark.worker.cleanup.appDataTtl' should be 604800 in spark-standalone.md ## What changes were proposed in this pull request? Currently, our project needs to be set to clean up the worker directory cleanup cycle is three days. When I follow http://spark.apache.org/docs/latest/spark-standalone.html, configure the 'spark.worker.cleanup.appDataTtl' parameter, I configured to 3 * 24 * 3600. When I start the spark service, the startup fails, and the worker log displays the error log as follows: 2017-04-28 15:02:03,306 INFO Utils: Successfully started service 'sparkWorker' on port 48728. Exception in thread "main" java.lang.NumberFormatException: For input string: "3 * 24 * 3600" at java.lang.NumberFormatException.forInputString(NumberFormatException.java:65) at java.lang.Long.parseLong(Long.java:430) at java.lang.Long.parseLong(Long.java:483) at scala.collection.immutable.StringLike$class.toLong(StringLike.scala:276) at scala.collection.immutable.StringOps.toLong(StringOps.scala:29) at org.apache.spark.SparkConf$$anonfun$getLong$2.apply(SparkConf.scala:380) at org.apache.spark.SparkConf$$anonfun$getLong$2.apply(SparkConf.scala:380) at scala.Option.map(Option.scala:146) at org.apache.spark.SparkConf.getLong(SparkConf.scala:380) at org.apache.spark.deploy.worker.Worker.(Worker.scala:100) at org.apache.spark.deploy.worker.Worker$.startRpcEnvAndEndpoint(Worker.scala:730) at org.apache.spark.deploy.worker.Worker$.main(Worker.scala:709) at org.apache.spark.deploy.worker.Worker.main(Worker.scala) **Because we put 7 * 24 * 3600 as a string, forced to convert to the dragon type, will lead to problems in the program.** **So I think the default value of the current configuration should be a specific long value, rather than 7 * 24 * 3600,should be 604800. Because it would mislead users for similar configurations, resulting in spark start failure.** ## How was this patch tested? manual tests Please review http://spark.apache.org/contributing.html before opening a pull request. Author: éå°é¾ 10207633 Author: guoxiaolong Author: guoxiaolongzte Closes #17798 from guoxiaolongzte/SPARK-20521. (cherry picked from commit 4d99b95ad0d0c7ef909c8e492ec45e94cf0189b4) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c5f55931 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c5f55931 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c5f55931 Branch: refs/heads/branch-2.2 Commit: c5f559315c88935cd6ac76c6db97327f5d1ee669 Parents: 9789d5c Author: éå°é¾ 10207633 Authored: Sun Apr 30 09:06:25 2017 +0100 Committer: Sean Owen Committed: Sun Apr 30 09:06:34 2017 +0100 -- docs/spark-standalone.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c5f55931/docs/spark-standalone.md -- diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md index 1c0b60f..34ced9e 100644 --- a/docs/spark-standalone.md +++ b/docs/spark-standalone.md @@ -242,7 +242,7 @@ SPARK_WORKER_OPTS supports the following system properties: spark.worker.cleanup.appDataTtl - 7 * 24 * 3600 (7 days) + 604800 (7 days, 7 * 24 * 3600) The number of seconds to retain application work directories on each worker. This is a Time To Live and should depend on the amount of available disk space you have. Application logs and jars are - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20523][BUILD] Clean up build warnings for 2.2.0 release
Repository: spark Updated Branches: refs/heads/master db2fb84b4 -> 16fab6b0e [SPARK-20523][BUILD] Clean up build warnings for 2.2.0 release ## What changes were proposed in this pull request? Fix build warnings primarily related to Breeze 0.13 operator changes, Java style problems ## How was this patch tested? Existing tests Author: Sean Owen Closes #17803 from srowen/SPARK-20523. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/16fab6b0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/16fab6b0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/16fab6b0 Branch: refs/heads/master Commit: 16fab6b0ef3dcb33f92df30e17680922ad5fb672 Parents: db2fb84 Author: Sean Owen Authored: Wed May 3 10:18:35 2017 +0100 Committer: Sean Owen Committed: Wed May 3 10:18:35 2017 +0100 -- .../apache/spark/network/yarn/YarnShuffleService.java | 4 ++-- .../main/java/org/apache/spark/unsafe/Platform.java | 3 ++- .../org/apache/spark/memory/TaskMemoryManager.java| 3 ++- .../apache/spark/scheduler/TaskSetManagerSuite.scala | 11 ++- .../spark/storage/BlockReplicationPolicySuite.scala | 1 + dev/checkstyle-suppressions.xml | 4 .../sql/streaming/JavaStructuredSessionization.java | 2 -- .../scala/org/apache/spark/graphx/lib/PageRank.scala | 14 +++--- .../scala/org/apache/spark/ml/ann/LossFunction.scala | 4 ++-- .../apache/spark/ml/clustering/GaussianMixture.scala | 2 +- .../spark/mllib/clustering/GaussianMixture.scala | 2 +- .../org/apache/spark/mllib/clustering/LDAModel.scala | 8 .../apache/spark/mllib/clustering/LDAOptimizer.scala | 12 ++-- .../org/apache/spark/mllib/clustering/LDAUtils.scala | 2 +- .../spark/ml/classification/NaiveBayesSuite.scala | 2 +- pom.xml | 4 .../scheduler/cluster/YarnSchedulerBackendSuite.scala | 2 ++ .../apache/spark/sql/streaming/GroupStateTimeout.java | 5 - .../catalyst/expressions/JsonExpressionsSuite.scala | 2 +- .../parquet/SpecificParquetRecordReaderBase.java | 5 +++-- .../spark/sql/execution/QueryExecutionSuite.scala | 2 ++ .../sql/streaming/StreamingQueryListenerSuite.scala | 1 + .../spark/sql/hive/execution/HiveDDLSuite.scala | 2 +- 23 files changed, 54 insertions(+), 43 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/16fab6b0/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java -- diff --git a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java index 4acc203..fd50e3a 100644 --- a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java +++ b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java @@ -363,9 +363,9 @@ public class YarnShuffleService extends AuxiliaryService { // If another DB was initialized first just make sure all the DBs are in the same // location. Path newLoc = new Path(_recoveryPath, dbName); - Path copyFrom = new Path(f.toURI()); + Path copyFrom = new Path(f.toURI()); if (!newLoc.equals(copyFrom)) { -logger.info("Moving " + copyFrom + " to: " + newLoc); +logger.info("Moving " + copyFrom + " to: " + newLoc); try { // The move here needs to handle moving non-empty directories across NFS mounts FileSystem fs = FileSystem.getLocal(_conf); http://git-wip-us.apache.org/repos/asf/spark/blob/16fab6b0/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java -- diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java index 1321b83..4ab5b68 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java @@ -48,7 +48,8 @@ public final class Platform { boolean _unaligned; String arch = System.getProperty("os.arch", ""); if (arch.equals("ppc64le") || arch.equals("ppc64")) { - // Since java.nio.Bits.unaligned() doesn't return true on ppc (See JDK-8165231), but ppc64 and ppc64le support it + // Since java.nio.Bits.unaligned() doesn't return true on ppc (See JDK-8165231), but + // ppc64 and ppc64le support it _unaligned = true; } else
spark git commit: [SPARK-20523][BUILD] Clean up build warnings for 2.2.0 release
Repository: spark Updated Branches: refs/heads/branch-2.2 4f647ab66 -> b5947f5c3 [SPARK-20523][BUILD] Clean up build warnings for 2.2.0 release ## What changes were proposed in this pull request? Fix build warnings primarily related to Breeze 0.13 operator changes, Java style problems ## How was this patch tested? Existing tests Author: Sean Owen Closes #17803 from srowen/SPARK-20523. (cherry picked from commit 16fab6b0ef3dcb33f92df30e17680922ad5fb672) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b5947f5c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b5947f5c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b5947f5c Branch: refs/heads/branch-2.2 Commit: b5947f5c33eb403d65b1c316d1781c3d7cebf01b Parents: 4f647ab Author: Sean Owen Authored: Wed May 3 10:18:35 2017 +0100 Committer: Sean Owen Committed: Wed May 3 10:18:48 2017 +0100 -- .../apache/spark/network/yarn/YarnShuffleService.java | 4 ++-- .../main/java/org/apache/spark/unsafe/Platform.java | 3 ++- .../org/apache/spark/memory/TaskMemoryManager.java| 3 ++- .../apache/spark/scheduler/TaskSetManagerSuite.scala | 11 ++- .../spark/storage/BlockReplicationPolicySuite.scala | 1 + dev/checkstyle-suppressions.xml | 4 .../sql/streaming/JavaStructuredSessionization.java | 2 -- .../scala/org/apache/spark/graphx/lib/PageRank.scala | 14 +++--- .../scala/org/apache/spark/ml/ann/LossFunction.scala | 4 ++-- .../apache/spark/ml/clustering/GaussianMixture.scala | 2 +- .../spark/mllib/clustering/GaussianMixture.scala | 2 +- .../org/apache/spark/mllib/clustering/LDAModel.scala | 8 .../apache/spark/mllib/clustering/LDAOptimizer.scala | 12 ++-- .../org/apache/spark/mllib/clustering/LDAUtils.scala | 2 +- .../spark/ml/classification/NaiveBayesSuite.scala | 2 +- pom.xml | 4 .../scheduler/cluster/YarnSchedulerBackendSuite.scala | 2 ++ .../apache/spark/sql/streaming/GroupStateTimeout.java | 5 - .../catalyst/expressions/JsonExpressionsSuite.scala | 2 +- .../parquet/SpecificParquetRecordReaderBase.java | 5 +++-- .../spark/sql/execution/QueryExecutionSuite.scala | 2 ++ .../sql/streaming/StreamingQueryListenerSuite.scala | 1 + .../spark/sql/hive/execution/HiveDDLSuite.scala | 2 +- 23 files changed, 54 insertions(+), 43 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b5947f5c/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java -- diff --git a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java index 4acc203..fd50e3a 100644 --- a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java +++ b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java @@ -363,9 +363,9 @@ public class YarnShuffleService extends AuxiliaryService { // If another DB was initialized first just make sure all the DBs are in the same // location. Path newLoc = new Path(_recoveryPath, dbName); - Path copyFrom = new Path(f.toURI()); + Path copyFrom = new Path(f.toURI()); if (!newLoc.equals(copyFrom)) { -logger.info("Moving " + copyFrom + " to: " + newLoc); +logger.info("Moving " + copyFrom + " to: " + newLoc); try { // The move here needs to handle moving non-empty directories across NFS mounts FileSystem fs = FileSystem.getLocal(_conf); http://git-wip-us.apache.org/repos/asf/spark/blob/b5947f5c/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java -- diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java index 1321b83..4ab5b68 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java @@ -48,7 +48,8 @@ public final class Platform { boolean _unaligned; String arch = System.getProperty("os.arch", ""); if (arch.equals("ppc64le") || arch.equals("ppc64")) { - // Since java.nio.Bits.unaligned() doesn't return true on ppc (See JDK-8165231), but ppc64 and ppc64le support it + // Since java.nio.Bits.unaligned() doesn't return true on
spark git commit: [SPARK-16957][MLLIB] Use midpoints for split values.
Repository: spark Updated Branches: refs/heads/master 16fab6b0e -> 7f96f2d7f [SPARK-16957][MLLIB] Use midpoints for split values. ## What changes were proposed in this pull request? Use midpoints for split values now, and maybe later to make it weighted. ## How was this patch tested? + [x] add unit test. + [x] revise Split's unit test. Author: Yan Facai (é¢åæ) Author: é¢åæï¼Yan Facaiï¼ Closes #17556 from facaiy/ENH/decision_tree_overflow_and_precision_in_aggregation. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7f96f2d7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7f96f2d7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7f96f2d7 Branch: refs/heads/master Commit: 7f96f2d7f2d5abf81dd7f8ca27fea35cf798fd65 Parents: 16fab6b Author: Yan Facai (é¢åæ) Authored: Wed May 3 10:54:40 2017 +0100 Committer: Sean Owen Committed: Wed May 3 10:54:40 2017 +0100 -- .../spark/ml/tree/impl/RandomForest.scala | 15 --- .../spark/ml/tree/impl/RandomForestSuite.scala | 41 +--- python/pyspark/mllib/tree.py| 12 +++--- 3 files changed, 51 insertions(+), 17 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7f96f2d7/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala index 008dd19..82e1ed8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala @@ -996,7 +996,7 @@ private[spark] object RandomForest extends Logging { require(metadata.isContinuous(featureIndex), "findSplitsForContinuousFeature can only be used to find splits for a continuous feature.") -val splits = if (featureSamples.isEmpty) { +val splits: Array[Double] = if (featureSamples.isEmpty) { Array.empty[Double] } else { val numSplits = metadata.numSplits(featureIndex) @@ -1009,10 +1009,15 @@ private[spark] object RandomForest extends Logging { // sort distinct values val valueCounts = valueCountMap.toSeq.sortBy(_._1).toArray - // if possible splits is not enough or just enough, just return all possible splits val possibleSplits = valueCounts.length - 1 - if (possibleSplits <= numSplits) { -valueCounts.map(_._1).init + if (possibleSplits == 0) { +// constant feature +Array.empty[Double] + } else if (possibleSplits <= numSplits) { +// if possible splits is not enough or just enough, just return all possible splits +(1 to possibleSplits) + .map(index => (valueCounts(index - 1)._1 + valueCounts(index)._1) / 2.0) + .toArray } else { // stride between splits val stride: Double = numSamples.toDouble / (numSplits + 1) @@ -1037,7 +1042,7 @@ private[spark] object RandomForest extends Logging { // makes the gap between currentCount and targetCount smaller, // previous value is a split threshold. if (previousGap < currentGap) { -splitsBuilder += valueCounts(index - 1)._1 +splitsBuilder += (valueCounts(index - 1)._1 + valueCounts(index)._1) / 2.0 targetCount += stride } index += 1 http://git-wip-us.apache.org/repos/asf/spark/blob/7f96f2d7/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala index e1ab7c2..df155b4 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala @@ -104,6 +104,31 @@ class RandomForestSuite extends SparkFunSuite with MLlibTestSparkContext { assert(splits.distinct.length === splits.length) } +// SPARK-16957: Use midpoints for split values. +{ + val fakeMetadata = new DecisionTreeMetadata(1, 0, 0, 0, +Map(), Set(), +Array(3), Gini, QuantileStrategy.Sort, +0, 0, 0.0, 0, 0 + ) + + // possibleSplits <= numSplits + { +val featureSamples = Array(0, 1, 0, 0, 1, 0, 1, 1).map(_.toDouble) +val splits = RandomForest.findSplitsForContinuousFeature(featureSamples, fakeMetadata, 0) +val expectedSplits = Array((0.0 + 1.0) / 2) +assert(splits === expectedSplits) + } + + // possibl
spark git commit: [INFRA] Close stale PRs
Repository: spark Updated Branches: refs/heads/master 0d16faab9 -> 4411ac705 [INFRA] Close stale PRs ## What changes were proposed in this pull request? This PR proposes to close a stale PR, several PRs suggested to be closed by a committer and obviously inappropriate PRs. Closes #9 Closes #17853 Closes #17732 Closes #17456 Closes #17410 Closes #17314 Closes #17362 Closes #17542 ## How was this patch tested? N/A Author: hyukjinkwon Closes #17855 from HyukjinKwon/close-pr. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4411ac70 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4411ac70 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4411ac70 Branch: refs/heads/master Commit: 4411ac70524ced901f7807d492fb0ad2480a8841 Parents: 0d16faa Author: hyukjinkwon Authored: Fri May 5 09:50:40 2017 +0100 Committer: Sean Owen Committed: Fri May 5 09:50:40 2017 +0100 -- -- - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-19660][SQL] Replace the deprecated property name fs.default.name to fs.defaultFS that newly introduced
Repository: spark Updated Branches: refs/heads/master 4411ac705 -> 37cdf077c [SPARK-19660][SQL] Replace the deprecated property name fs.default.name to fs.defaultFS that newly introduced ## What changes were proposed in this pull request? Replace the deprecated property name `fs.default.name` to `fs.defaultFS` that newly introduced. ## How was this patch tested? Existing tests Author: Yuming Wang Closes #17856 from wangyum/SPARK-19660. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/37cdf077 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/37cdf077 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/37cdf077 Branch: refs/heads/master Commit: 37cdf077cd3f436f777562df311e3827b0727ce7 Parents: 4411ac7 Author: Yuming Wang Authored: Fri May 5 11:31:59 2017 +0100 Committer: Sean Owen Committed: Fri May 5 11:31:59 2017 +0100 -- .../spark/sql/execution/streaming/state/StateStoreSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/37cdf077/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala index ebb7422..cc09b2d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala @@ -314,7 +314,7 @@ class StateStoreSuite extends SparkFunSuite with BeforeAndAfter with PrivateMeth test("SPARK-19677: Committing a delta file atop an existing one should not fail on HDFS") { val conf = new Configuration() conf.set("fs.fake.impl", classOf[RenameLikeHDFSFileSystem].getName) -conf.set("fs.default.name", "fake:///") +conf.set("fs.defaultFS", "fake:///") val provider = newStoreProvider(hadoopConf = conf) provider.getStore(0).commit() - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-19660][SQL] Replace the deprecated property name fs.default.name to fs.defaultFS that newly introduced
Repository: spark Updated Branches: refs/heads/branch-2.2 c8756288d -> 7cb566abc [SPARK-19660][SQL] Replace the deprecated property name fs.default.name to fs.defaultFS that newly introduced ## What changes were proposed in this pull request? Replace the deprecated property name `fs.default.name` to `fs.defaultFS` that newly introduced. ## How was this patch tested? Existing tests Author: Yuming Wang Closes #17856 from wangyum/SPARK-19660. (cherry picked from commit 37cdf077cd3f436f777562df311e3827b0727ce7) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7cb566ab Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7cb566ab Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7cb566ab Branch: refs/heads/branch-2.2 Commit: 7cb566abc27d41d5816dee16c6ecb749da2adf46 Parents: c875628 Author: Yuming Wang Authored: Fri May 5 11:31:59 2017 +0100 Committer: Sean Owen Committed: Fri May 5 11:32:07 2017 +0100 -- .../spark/sql/execution/streaming/state/StateStoreSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7cb566ab/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala index ebb7422..cc09b2d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala @@ -314,7 +314,7 @@ class StateStoreSuite extends SparkFunSuite with BeforeAndAfter with PrivateMeth test("SPARK-19677: Committing a delta file atop an existing one should not fail on HDFS") { val conf = new Configuration() conf.set("fs.fake.impl", classOf[RenameLikeHDFSFileSystem].getName) -conf.set("fs.default.name", "fake:///") +conf.set("fs.defaultFS", "fake:///") val provider = newStoreProvider(hadoopConf = conf) provider.getStore(0).commit() - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20546][DEPLOY] spark-class gets syntax error in posix mode
Repository: spark Updated Branches: refs/heads/master 37cdf077c -> 5773ab121 [SPARK-20546][DEPLOY] spark-class gets syntax error in posix mode ## What changes were proposed in this pull request? Updated spark-class to turn off posix mode so the process substitution doesn't cause a syntax error. ## How was this patch tested? Existing unit tests, manual spark-shell testing with posix mode on Author: jyu00 Closes #17852 from jyu00/master. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5773ab12 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5773ab12 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5773ab12 Branch: refs/heads/master Commit: 5773ab121d5d7cbefeef17ff4ac6f8af36cc1251 Parents: 37cdf07 Author: jyu00 Authored: Fri May 5 11:36:51 2017 +0100 Committer: Sean Owen Committed: Fri May 5 11:36:51 2017 +0100 -- bin/spark-class | 2 ++ 1 file changed, 2 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5773ab12/bin/spark-class -- diff --git a/bin/spark-class b/bin/spark-class index 77ea40c..65d3b96 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -72,6 +72,8 @@ build_command() { printf "%d\0" $? } +# Turn off posix mode since it does not allow process substitution +set +o posix CMD=() while IFS= read -d '' -r ARG; do CMD+=("$ARG") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20546][DEPLOY] spark-class gets syntax error in posix mode
Repository: spark Updated Branches: refs/heads/branch-2.2 7cb566abc -> dbb54a7b3 [SPARK-20546][DEPLOY] spark-class gets syntax error in posix mode ## What changes were proposed in this pull request? Updated spark-class to turn off posix mode so the process substitution doesn't cause a syntax error. ## How was this patch tested? Existing unit tests, manual spark-shell testing with posix mode on Author: jyu00 Closes #17852 from jyu00/master. (cherry picked from commit 5773ab121d5d7cbefeef17ff4ac6f8af36cc1251) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dbb54a7b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dbb54a7b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dbb54a7b Branch: refs/heads/branch-2.2 Commit: dbb54a7b39568cc9e8046a86113b98c3c69b7d11 Parents: 7cb566a Author: jyu00 Authored: Fri May 5 11:36:51 2017 +0100 Committer: Sean Owen Committed: Fri May 5 11:36:58 2017 +0100 -- bin/spark-class | 2 ++ 1 file changed, 2 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/dbb54a7b/bin/spark-class -- diff --git a/bin/spark-class b/bin/spark-class index 77ea40c..65d3b96 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -72,6 +72,8 @@ build_command() { printf "%d\0" $? } +# Turn off posix mode since it does not allow process substitution +set +o posix CMD=() while IFS= read -d '' -r ARG; do CMD+=("$ARG") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20546][DEPLOY] spark-class gets syntax error in posix mode
Repository: spark Updated Branches: refs/heads/branch-2.1 d10b0f654 -> 179f5370e [SPARK-20546][DEPLOY] spark-class gets syntax error in posix mode ## What changes were proposed in this pull request? Updated spark-class to turn off posix mode so the process substitution doesn't cause a syntax error. ## How was this patch tested? Existing unit tests, manual spark-shell testing with posix mode on Author: jyu00 Closes #17852 from jyu00/master. (cherry picked from commit 5773ab121d5d7cbefeef17ff4ac6f8af36cc1251) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/179f5370 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/179f5370 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/179f5370 Branch: refs/heads/branch-2.1 Commit: 179f5370e68aa3c1f035f8ac400129c3935e96f8 Parents: d10b0f6 Author: jyu00 Authored: Fri May 5 11:36:51 2017 +0100 Committer: Sean Owen Committed: Fri May 5 11:37:10 2017 +0100 -- bin/spark-class | 2 ++ 1 file changed, 2 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/179f5370/bin/spark-class -- diff --git a/bin/spark-class b/bin/spark-class index 77ea40c..65d3b96 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -72,6 +72,8 @@ build_command() { printf "%d\0" $? } +# Turn off posix mode since it does not allow process substitution +set +o posix CMD=() while IFS= read -d '' -r ARG; do CMD+=("$ARG") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20518][CORE] Supplement the new blockidsuite unit tests
Repository: spark Updated Branches: refs/heads/master 63d90e7da -> 37f963ac1 [SPARK-20518][CORE] Supplement the new blockidsuite unit tests ## What changes were proposed in this pull request? This PR adds the new unit tests to support ShuffleDataBlockId , ShuffleIndexBlockId , TempShuffleBlockId , TempLocalBlockId ## How was this patch tested? The new unit test. Author: caoxuewen Closes #17794 from heary-cao/blockidsuite. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/37f963ac Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/37f963ac Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/37f963ac Branch: refs/heads/master Commit: 37f963ac13ec1bd958c44c7c15b5e8cb6c06cbbc Parents: 63d90e7 Author: caoxuewen Authored: Sun May 7 10:08:06 2017 +0100 Committer: Sean Owen Committed: Sun May 7 10:08:06 2017 +0100 -- .../org/apache/spark/storage/BlockIdSuite.scala | 52 1 file changed, 52 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/37f963ac/core/src/test/scala/org/apache/spark/storage/BlockIdSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/storage/BlockIdSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockIdSuite.scala index 89ed031..f0c521b 100644 --- a/core/src/test/scala/org/apache/spark/storage/BlockIdSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/BlockIdSuite.scala @@ -17,6 +17,8 @@ package org.apache.spark.storage +import java.util.UUID + import org.apache.spark.SparkFunSuite class BlockIdSuite extends SparkFunSuite { @@ -67,6 +69,32 @@ class BlockIdSuite extends SparkFunSuite { assertSame(id, BlockId(id.toString)) } + test("shuffle data") { +val id = ShuffleDataBlockId(4, 5, 6) +assertSame(id, ShuffleDataBlockId(4, 5, 6)) +assertDifferent(id, ShuffleDataBlockId(6, 5, 6)) +assert(id.name === "shuffle_4_5_6.data") +assert(id.asRDDId === None) +assert(id.shuffleId === 4) +assert(id.mapId === 5) +assert(id.reduceId === 6) +assert(!id.isShuffle) +assertSame(id, BlockId(id.toString)) + } + + test("shuffle index") { +val id = ShuffleIndexBlockId(7, 8, 9) +assertSame(id, ShuffleIndexBlockId(7, 8, 9)) +assertDifferent(id, ShuffleIndexBlockId(9, 8, 9)) +assert(id.name === "shuffle_7_8_9.index") +assert(id.asRDDId === None) +assert(id.shuffleId === 7) +assert(id.mapId === 8) +assert(id.reduceId === 9) +assert(!id.isShuffle) +assertSame(id, BlockId(id.toString)) + } + test("broadcast") { val id = BroadcastBlockId(42) assertSame(id, BroadcastBlockId(42)) @@ -101,6 +129,30 @@ class BlockIdSuite extends SparkFunSuite { assertSame(id, BlockId(id.toString)) } + test("temp local") { +val id = TempLocalBlockId(new UUID(5, 2)) +assertSame(id, TempLocalBlockId(new UUID(5, 2))) +assertDifferent(id, TempLocalBlockId(new UUID(5, 3))) +assert(id.name === "temp_local_--0005--0002") +assert(id.asRDDId === None) +assert(id.isBroadcast === false) +assert(id.id.getMostSignificantBits() === 5) +assert(id.id.getLeastSignificantBits() === 2) +assert(!id.isShuffle) + } + + test("temp shuffle") { +val id = TempShuffleBlockId(new UUID(1, 2)) +assertSame(id, TempShuffleBlockId(new UUID(1, 2))) +assertDifferent(id, TempShuffleBlockId(new UUID(1, 3))) +assert(id.name === "temp_shuffle_--0001--0002") +assert(id.asRDDId === None) +assert(id.isBroadcast === false) +assert(id.id.getMostSignificantBits() === 1) +assert(id.id.getLeastSignificantBits() === 2) +assert(!id.isShuffle) + } + test("test") { val id = TestBlockId("abc") assertSame(id, TestBlockId("abc")) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20484][MLLIB] Add documentation to ALS code
Repository: spark Updated Branches: refs/heads/master 37f963ac1 -> 88e6d7507 [SPARK-20484][MLLIB] Add documentation to ALS code ## What changes were proposed in this pull request? This PR adds documentation to the ALS code. ## How was this patch tested? Existing tests were used. mengxr srowen This contribution is my original work. I have the license to work on this project under the Spark projectâs open source license. Author: Daniel Li Closes #17793 from danielyli/spark-20484. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/88e6d750 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/88e6d750 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/88e6d750 Branch: refs/heads/master Commit: 88e6d75072c23fa99d4df00d087d03d8c38e8c69 Parents: 37f963a Author: Daniel Li Authored: Sun May 7 10:09:58 2017 +0100 Committer: Sean Owen Committed: Sun May 7 10:09:58 2017 +0100 -- .../apache/spark/ml/recommendation/ALS.scala| 236 --- 1 file changed, 202 insertions(+), 34 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/88e6d750/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala index a20ef72..1562bf1 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala @@ -774,6 +774,28 @@ object ALS extends DefaultParamsReadable[ALS] with Logging { /** * :: DeveloperApi :: * Implementation of the ALS algorithm. + * + * This implementation of the ALS factorization algorithm partitions the two sets of factors among + * Spark workers so as to reduce network communication by only sending one copy of each factor + * vector to each Spark worker on each iteration, and only if needed. This is achieved by + * precomputing some information about the ratings matrix to determine which users require which + * item factors and vice versa. See the Scaladoc for `InBlock` for a detailed explanation of how + * the precomputation is done. + * + * In addition, since each iteration of calculating the factor matrices depends on the known + * ratings, which are spread across Spark partitions, a naive implementation would incur + * significant network communication overhead between Spark workers, as the ratings RDD would be + * repeatedly shuffled during each iteration. This implementation reduces that overhead by + * performing the shuffling operation up front, precomputing each partition's ratings dependencies + * and duplicating those values to the appropriate workers before starting iterations to solve for + * the factor matrices. See the Scaladoc for `OutBlock` for a detailed explanation of how the + * precomputation is done. + * + * Note that the term "rating block" is a bit of a misnomer, as the ratings are not partitioned by + * contiguous blocks from the ratings matrix but by a hash function on the rating's location in + * the matrix. If it helps you to visualize the partitions, it is easier to think of the term + * "block" as referring to a subset of an RDD containing the ratings rather than a contiguous + * submatrix of the ratings matrix. */ @DeveloperApi def train[ID: ClassTag]( // scalastyle:ignore @@ -791,32 +813,43 @@ object ALS extends DefaultParamsReadable[ALS] with Logging { checkpointInterval: Int = 10, seed: Long = 0L)( implicit ord: Ordering[ID]): (RDD[(ID, Array[Float])], RDD[(ID, Array[Float])]) = { + require(!ratings.isEmpty(), s"No ratings available from $ratings") require(intermediateRDDStorageLevel != StorageLevel.NONE, "ALS is not designed to run without persisting intermediate RDDs.") + val sc = ratings.sparkContext + +// Precompute the rating dependencies of each partition val userPart = new ALSPartitioner(numUserBlocks) val itemPart = new ALSPartitioner(numItemBlocks) -val userLocalIndexEncoder = new LocalIndexEncoder(userPart.numPartitions) -val itemLocalIndexEncoder = new LocalIndexEncoder(itemPart.numPartitions) -val solver = if (nonnegative) new NNLSSolver else new CholeskySolver val blockRatings = partitionRatings(ratings, userPart, itemPart) .persist(intermediateRDDStorageLevel) val (userInBlocks, userOutBlocks) = makeBlocks("user", blockRatings, userPart, itemPart, intermediateRDDStorageLevel) -// materialize blockRatings and user blocks -userOutBlocks.count() +userO
spark git commit: [SPARK-7481][BUILD] Add spark-hadoop-cloud module to pull in object store access.
Repository: spark Updated Branches: refs/heads/master 88e6d7507 -> 2cf83c478 [SPARK-7481][BUILD] Add spark-hadoop-cloud module to pull in object store access. ## What changes were proposed in this pull request? Add a new `spark-hadoop-cloud` module and maven profile to pull in object store support from `hadoop-openstack`, `hadoop-aws` and `hadoop-azure` (Hadoop 2.7+) JARs, along with their dependencies, fixing up the dependencies so that everything works, in particular Jackson. It restores `s3n://` access to S3, adds its `s3a://` replacement, OpenStack `swift://` and azure `wasb://`. There's a documentation page, `cloud_integration.md`, which covers the basic details of using Spark with object stores, referring the reader to the supplier's own documentation, with specific warnings on security and the possible mismatch between a store's behavior and that of a filesystem. In particular, users are advised be very cautious when trying to use an object store as the destination of data, and to consult the documentation of the storage supplier and the connector. (this is the successor to #12004; I can't re-open it) ## How was this patch tested? Downstream tests exist in [https://github.com/steveloughran/spark-cloud-examples/tree/master/cloud-examples](https://github.com/steveloughran/spark-cloud-examples/tree/master/cloud-examples) Those verify that the dependencies are sufficient to allow downstream applications to work with s3a, azure wasb and swift storage connectors, and perform basic IO & dataframe operations thereon. All seems well. Manually clean build & verify that assembly contains the relevant aws-* hadoop-* artifacts on Hadoop 2.6; azure on a hadoop-2.7 profile. SBT build: `build/sbt -Phadoop-cloud -Phadoop-2.7 package` maven build `mvn install -Phadoop-cloud -Phadoop-2.7` This PR *does not* update `dev/deps/spark-deps-hadoop-2.7` or `dev/deps/spark-deps-hadoop-2.6`, because unless the hadoop-cloud profile is enabled, no extra JARs show up in the dependency list. The dependency check in Jenkins isn't setting the property, so the new JARs aren't visible. Author: Steve Loughran Author: Steve Loughran Closes #17834 from steveloughran/cloud/SPARK-7481-current. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2cf83c47 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2cf83c47 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2cf83c47 Branch: refs/heads/master Commit: 2cf83c47838115f71419ba5b9296c69ec1d746cd Parents: 88e6d75 Author: Steve Loughran Authored: Sun May 7 10:15:31 2017 +0100 Committer: Sean Owen Committed: Sun May 7 10:15:31 2017 +0100 -- assembly/pom.xml| 14 +++ docs/cloud-integration.md | 200 +++ docs/index.md | 1 + docs/rdd-programming-guide.md | 6 +- docs/storage-openstack-swift.md | 38 +++ hadoop-cloud/pom.xml| 185 pom.xml | 7 ++ project/SparkBuild.scala| 4 +- 8 files changed, 424 insertions(+), 31 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2cf83c47/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 742a4a1..464af16 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -226,5 +226,19 @@ provided + + + + hadoop-cloud + + + org.apache.spark + spark-hadoop-cloud_${scala.binary.version} + ${project.version} + + + http://git-wip-us.apache.org/repos/asf/spark/blob/2cf83c47/docs/cloud-integration.md -- diff --git a/docs/cloud-integration.md b/docs/cloud-integration.md new file mode 100644 index 000..751a192 --- /dev/null +++ b/docs/cloud-integration.md @@ -0,0 +1,200 @@ +--- +layout: global +displayTitle: Integration with Cloud Infrastructures +title: Integration with Cloud Infrastructures +description: Introduction to cloud storage support in Apache Spark SPARK_VERSION_SHORT +--- + + +* This will become a table of contents (this text will be scraped). +{:toc} + +## Introduction + + +All major cloud providers offer persistent data storage in *object stores*. +These are not classic "POSIX" file systems. +In order to store hundreds of petabytes of data without any single points of failure, +object stores replace the classic filesystem directory tree +with a simpler model of `object-name => data`. To enable remote access, operations +on objects are usually offered as (slow) HTTP REST operations. + +Spark can read and write data in object stores through filesystem connectors implement
spark git commit: [SPARK-20519][SQL][CORE] Modify to prevent some possible runtime exceptions
Repository: spark Updated Branches: refs/heads/master 2fdaeb52b -> 0f820e2b6 [SPARK-20519][SQL][CORE] Modify to prevent some possible runtime exceptions Signed-off-by: liuxian ## What changes were proposed in this pull request? When the input parameter is null, may be a runtime exception occurs ## How was this patch tested? Existing unit tests Author: liuxian Closes #17796 from 10110346/wip_lx_0428. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0f820e2b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0f820e2b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0f820e2b Branch: refs/heads/master Commit: 0f820e2b6c507dc4156703862ce65e598ca41cca Parents: 2fdaeb5 Author: liuxian Authored: Mon May 8 10:00:58 2017 +0100 Committer: Sean Owen Committed: Mon May 8 10:00:58 2017 +0100 -- .../main/scala/org/apache/spark/api/python/PythonRDD.scala | 2 +- .../main/scala/org/apache/spark/deploy/DeployMessage.scala | 8 .../main/scala/org/apache/spark/deploy/master/Master.scala | 2 +- .../org/apache/spark/deploy/master/MasterArguments.scala| 4 ++-- .../scala/org/apache/spark/deploy/master/WorkerInfo.scala | 2 +- .../main/scala/org/apache/spark/deploy/worker/Worker.scala | 2 +- .../org/apache/spark/deploy/worker/WorkerArguments.scala| 4 ++-- .../src/main/scala/org/apache/spark/executor/Executor.scala | 2 +- .../scala/org/apache/spark/storage/BlockManagerId.scala | 2 +- core/src/main/scala/org/apache/spark/util/RpcUtils.scala| 2 +- core/src/main/scala/org/apache/spark/util/Utils.scala | 9 + .../deploy/mesos/MesosClusterDispatcherArguments.scala | 2 +- 12 files changed, 21 insertions(+), 20 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0f820e2b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala -- diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala index b0dd2fc..fb0405b 100644 --- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala +++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala @@ -879,7 +879,7 @@ private[spark] class PythonAccumulatorV2( private val serverPort: Int) extends CollectionAccumulator[Array[Byte]] { - Utils.checkHost(serverHost, "Expected hostname") + Utils.checkHost(serverHost) val bufferSize = SparkEnv.get.conf.getInt("spark.buffer.size", 65536) http://git-wip-us.apache.org/repos/asf/spark/blob/0f820e2b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala index ac09c6c..b5cb3f0 100644 --- a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala @@ -43,7 +43,7 @@ private[deploy] object DeployMessages { memory: Int, workerWebUiUrl: String) extends DeployMessage { -Utils.checkHost(host, "Required hostname") +Utils.checkHost(host) assert (port > 0) } @@ -131,7 +131,7 @@ private[deploy] object DeployMessages { // TODO(matei): replace hostPort with host case class ExecutorAdded(id: Int, workerId: String, hostPort: String, cores: Int, memory: Int) { -Utils.checkHostPort(hostPort, "Required hostport") +Utils.checkHostPort(hostPort) } case class ExecutorUpdated(id: Int, state: ExecutorState, message: Option[String], @@ -183,7 +183,7 @@ private[deploy] object DeployMessages { completedDrivers: Array[DriverInfo], status: MasterState) { -Utils.checkHost(host, "Required hostname") +Utils.checkHost(host) assert (port > 0) def uri: String = "spark://" + host + ":" + port @@ -201,7 +201,7 @@ private[deploy] object DeployMessages { drivers: List[DriverRunner], finishedDrivers: List[DriverRunner], masterUrl: String, cores: Int, memory: Int, coresUsed: Int, memoryUsed: Int, masterWebUiUrl: String) { -Utils.checkHost(host, "Required hostname") +Utils.checkHost(host) assert (port > 0) } http://git-wip-us.apache.org/repos/asf/spark/blob/0f820e2b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala index 816bf37..e061939 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala +++ b/core/src/main/scala/org/apache/spark/
spark git commit: [SPARK-20615][ML][TEST] SparseVector.argmax throws IndexOutOfBoundsException
Repository: spark Updated Branches: refs/heads/master 10b00abad -> be53a7835 [SPARK-20615][ML][TEST] SparseVector.argmax throws IndexOutOfBoundsException ## What changes were proposed in this pull request? Added a check for for the number of defined values. Previously the argmax function assumed that at least one value was defined if the vector size was greater than zero. ## How was this patch tested? Tests were added to the existing VectorsSuite to cover this case. Author: Jon McLean Closes #17877 from jonmclean/vectorArgmaxIndexBug. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/be53a783 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/be53a783 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/be53a783 Branch: refs/heads/master Commit: be53a78352ae7c70d8a07d0df24574b3e3129b4a Parents: 10b00ab Author: Jon McLean Authored: Tue May 9 09:47:50 2017 +0100 Committer: Sean Owen Committed: Tue May 9 09:47:50 2017 +0100 -- .../src/main/scala/org/apache/spark/ml/linalg/Vectors.scala | 2 ++ .../test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala | 7 +++ .../main/scala/org/apache/spark/mllib/linalg/Vectors.scala| 2 ++ .../scala/org/apache/spark/mllib/linalg/VectorsSuite.scala| 7 +++ 4 files changed, 18 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/be53a783/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala -- diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala index 8e166ba..3fbc095 100644 --- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala +++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala @@ -657,6 +657,8 @@ class SparseVector @Since("2.0.0") ( override def argmax: Int = { if (size == 0) { -1 +} else if (numActives == 0) { + 0 } else { // Find the max active entry. var maxIdx = indices(0) http://git-wip-us.apache.org/repos/asf/spark/blob/be53a783/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala -- diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala index dfbdaf1..4cd91af 100644 --- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala +++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala @@ -125,6 +125,13 @@ class VectorsSuite extends SparkMLFunSuite { val vec8 = Vectors.sparse(5, Array(1, 2), Array(0.0, -1.0)) assert(vec8.argmax === 0) + +// Check for case when sparse vector is non-empty but the values are empty +val vec9 = Vectors.sparse(100, Array.empty[Int], Array.empty[Double]).asInstanceOf[SparseVector] +assert(vec9.argmax === 0) + +val vec10 = Vectors.sparse(1, Array.empty[Int], Array.empty[Double]).asInstanceOf[SparseVector] +assert(vec10.argmax === 0) } test("vector equals") { http://git-wip-us.apache.org/repos/asf/spark/blob/be53a783/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index 723addc..f063420 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -846,6 +846,8 @@ class SparseVector @Since("1.0.0") ( override def argmax: Int = { if (size == 0) { -1 +} else if (numActives == 0) { + 0 } else { // Find the max active entry. var maxIdx = indices(0) http://git-wip-us.apache.org/repos/asf/spark/blob/be53a783/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala index 71a3cea..6172cff 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala @@ -122,6 +122,13 @@ class VectorsSuite extends SparkFunSuite with Logging { val vec8 = Vectors.sparse(5, Array(1, 2), Array(0.0, -1.0)) assert(vec8.argmax === 0) + +// Check for case when sparse vector is non-empty but the values are empty +val vec9 = Vectors.sparse(100, Array.empty[Int], Arra
spark git commit: [SPARK-20615][ML][TEST] SparseVector.argmax throws IndexOutOfBoundsException
Repository: spark Updated Branches: refs/heads/branch-2.1 a1112c615 -> f7a91a17e [SPARK-20615][ML][TEST] SparseVector.argmax throws IndexOutOfBoundsException ## What changes were proposed in this pull request? Added a check for for the number of defined values. Previously the argmax function assumed that at least one value was defined if the vector size was greater than zero. ## How was this patch tested? Tests were added to the existing VectorsSuite to cover this case. Author: Jon McLean Closes #17877 from jonmclean/vectorArgmaxIndexBug. (cherry picked from commit be53a78352ae7c70d8a07d0df24574b3e3129b4a) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f7a91a17 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f7a91a17 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f7a91a17 Branch: refs/heads/branch-2.1 Commit: f7a91a17e8e20965b3e634e611690a96f72cec6b Parents: a1112c6 Author: Jon McLean Authored: Tue May 9 09:47:50 2017 +0100 Committer: Sean Owen Committed: Tue May 9 09:48:09 2017 +0100 -- .../src/main/scala/org/apache/spark/ml/linalg/Vectors.scala | 2 ++ .../test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala | 7 +++ .../main/scala/org/apache/spark/mllib/linalg/Vectors.scala| 2 ++ .../scala/org/apache/spark/mllib/linalg/VectorsSuite.scala| 7 +++ 4 files changed, 18 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f7a91a17/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala -- diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala index 22e4ec6..7bc2cb1 100644 --- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala +++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala @@ -657,6 +657,8 @@ class SparseVector @Since("2.0.0") ( override def argmax: Int = { if (size == 0) { -1 +} else if (numActives == 0) { + 0 } else { // Find the max active entry. var maxIdx = indices(0) http://git-wip-us.apache.org/repos/asf/spark/blob/f7a91a17/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala -- diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala index ea22c27..bd71656 100644 --- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala +++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala @@ -125,6 +125,13 @@ class VectorsSuite extends SparkMLFunSuite { val vec8 = Vectors.sparse(5, Array(1, 2), Array(0.0, -1.0)) assert(vec8.argmax === 0) + +// Check for case when sparse vector is non-empty but the values are empty +val vec9 = Vectors.sparse(100, Array.empty[Int], Array.empty[Double]).asInstanceOf[SparseVector] +assert(vec9.argmax === 0) + +val vec10 = Vectors.sparse(1, Array.empty[Int], Array.empty[Double]).asInstanceOf[SparseVector] +assert(vec10.argmax === 0) } test("vector equals") { http://git-wip-us.apache.org/repos/asf/spark/blob/f7a91a17/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index 63ea9d3..5282849 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -846,6 +846,8 @@ class SparseVector @Since("1.0.0") ( override def argmax: Int = { if (size == 0) { -1 +} else if (numActives == 0) { + 0 } else { // Find the max active entry. var maxIdx = indices(0) http://git-wip-us.apache.org/repos/asf/spark/blob/f7a91a17/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala index 71a3cea..6172cff 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala @@ -122,6 +122,13 @@ class VectorsSuite extends SparkFunSuite with Logging { val vec8 = Vectors.sparse(5, Array(1, 2), Array(0.0, -1.0)) assert(vec8.argmax === 0) + +// Check for case when sparse
spark git commit: [SPARK-20615][ML][TEST] SparseVector.argmax throws IndexOutOfBoundsException
Repository: spark Updated Branches: refs/heads/branch-2.2 ca3f7edba -> 4bbfad44e [SPARK-20615][ML][TEST] SparseVector.argmax throws IndexOutOfBoundsException ## What changes were proposed in this pull request? Added a check for for the number of defined values. Previously the argmax function assumed that at least one value was defined if the vector size was greater than zero. ## How was this patch tested? Tests were added to the existing VectorsSuite to cover this case. Author: Jon McLean Closes #17877 from jonmclean/vectorArgmaxIndexBug. (cherry picked from commit be53a78352ae7c70d8a07d0df24574b3e3129b4a) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4bbfad44 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4bbfad44 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4bbfad44 Branch: refs/heads/branch-2.2 Commit: 4bbfad44e426365ad9f4941d68c110523b17ea6d Parents: ca3f7ed Author: Jon McLean Authored: Tue May 9 09:47:50 2017 +0100 Committer: Sean Owen Committed: Tue May 9 09:47:58 2017 +0100 -- .../src/main/scala/org/apache/spark/ml/linalg/Vectors.scala | 2 ++ .../test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala | 7 +++ .../main/scala/org/apache/spark/mllib/linalg/Vectors.scala| 2 ++ .../scala/org/apache/spark/mllib/linalg/VectorsSuite.scala| 7 +++ 4 files changed, 18 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4bbfad44/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala -- diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala index 8e166ba..3fbc095 100644 --- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala +++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala @@ -657,6 +657,8 @@ class SparseVector @Since("2.0.0") ( override def argmax: Int = { if (size == 0) { -1 +} else if (numActives == 0) { + 0 } else { // Find the max active entry. var maxIdx = indices(0) http://git-wip-us.apache.org/repos/asf/spark/blob/4bbfad44/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala -- diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala index dfbdaf1..4cd91af 100644 --- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala +++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/VectorsSuite.scala @@ -125,6 +125,13 @@ class VectorsSuite extends SparkMLFunSuite { val vec8 = Vectors.sparse(5, Array(1, 2), Array(0.0, -1.0)) assert(vec8.argmax === 0) + +// Check for case when sparse vector is non-empty but the values are empty +val vec9 = Vectors.sparse(100, Array.empty[Int], Array.empty[Double]).asInstanceOf[SparseVector] +assert(vec9.argmax === 0) + +val vec10 = Vectors.sparse(1, Array.empty[Int], Array.empty[Double]).asInstanceOf[SparseVector] +assert(vec10.argmax === 0) } test("vector equals") { http://git-wip-us.apache.org/repos/asf/spark/blob/4bbfad44/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index 723addc..f063420 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -846,6 +846,8 @@ class SparseVector @Since("1.0.0") ( override def argmax: Int = { if (size == 0) { -1 +} else if (numActives == 0) { + 0 } else { // Find the max active entry. var maxIdx = indices(0) http://git-wip-us.apache.org/repos/asf/spark/blob/4bbfad44/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala index 71a3cea..6172cff 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala @@ -122,6 +122,13 @@ class VectorsSuite extends SparkFunSuite with Logging { val vec8 = Vectors.sparse(5, Array(1, 2), Array(0.0, -1.0)) assert(vec8.argmax === 0) + +// Check for case when sparse
spark-website git commit: Direct 2.1.0, 2.0.1 downloads to archive; use https links for download; Apache Hadoop; remove stale download logic
Repository: spark-website Updated Branches: refs/heads/asf-site 7b32b181f -> b54c4f3fa Direct 2.1.0, 2.0.1 downloads to archive; use https links for download; Apache Hadoop; remove stale download logic Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/b54c4f3f Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/b54c4f3f Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/b54c4f3f Branch: refs/heads/asf-site Commit: b54c4f3faf837a3e772af989eb7df0b64698e557 Parents: 7b32b18 Author: Sean Owen Authored: Tue May 9 10:09:21 2017 +0100 Committer: Sean Owen Committed: Tue May 9 10:09:21 2017 +0100 -- js/downloads.js | 41 - 1 file changed, 16 insertions(+), 25 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/b54c4f3f/js/downloads.js -- diff --git a/js/downloads.js b/js/downloads.js index 81dcbfc..d308389 100644 --- a/js/downloads.js +++ b/js/downloads.js @@ -8,14 +8,14 @@ function addRelease(version, releaseDate, packages, stable) { } var sources = {pretty: "Source Code", tag: "sources"}; -var hadoopFree = {pretty: "Pre-build with user-provided Hadoop [can use with most Hadoop distributions]", tag: "without-hadoop"}; -var hadoop1 = {pretty: "Pre-built for Hadoop 1.X", tag: "hadoop1"}; +var hadoopFree = {pretty: "Pre-build with user-provided Apache Hadoop", tag: "without-hadoop"}; +var hadoop1 = {pretty: "Pre-built for Apache Hadoop 1.X", tag: "hadoop1"}; var cdh4 = {pretty: "Pre-built for CDH 4", tag: "cdh4"}; -var hadoop2 = {pretty: "Pre-built for Hadoop 2.2", tag: "hadoop2"}; -var hadoop2p3 = {pretty: "Pre-built for Hadoop 2.3", tag: "hadoop2.3"}; -var hadoop2p4 = {pretty: "Pre-built for Hadoop 2.4", tag: "hadoop2.4"}; -var hadoop2p6 = {pretty: "Pre-built for Hadoop 2.6", tag: "hadoop2.6"}; -var hadoop2p7 = {pretty: "Pre-built for Hadoop 2.7 and later", tag: "hadoop2.7"}; +var hadoop2 = {pretty: "Pre-built for Apache Hadoop 2.2", tag: "hadoop2"}; +var hadoop2p3 = {pretty: "Pre-built for Apache Hadoop 2.3", tag: "hadoop2.3"}; +var hadoop2p4 = {pretty: "Pre-built for Apache Hadoop 2.4", tag: "hadoop2.4"}; +var hadoop2p6 = {pretty: "Pre-built for Apache Hadoop 2.6", tag: "hadoop2.6"}; +var hadoop2p7 = {pretty: "Pre-built for Apache Hadoop 2.7 and later", tag: "hadoop2.7"}; // 1.4.0+ var packagesV6 = [hadoop2p6, hadoop2p4, hadoop2p3, hadoopFree, hadoop1, cdh4, sources]; @@ -135,7 +135,7 @@ function onVersionSelect() { append(packageSelect, option); } - var href = "http://www.apache.org/dist/spark/spark-"; + version + "/"; + var href = "https://www.apache.org/dist/spark/spark-"; + version + "/"; var link = "" + versionShort(version) + " signatures and checksums"; append(verifyLink, link); @@ -152,13 +152,8 @@ function onPackageSelect() { var pkg = getSelectedValue(packageSelect); - //if (pkg.toLowerCase().indexOf("mapr") > -1) { - // var external = "External Download (MAY CONTAIN INCOMPATIBLE LICENSES)"; - // append(downloadSelect, "" + external + ""); - //} else { -append(downloadSelect, "Direct Download"); -append(downloadSelect, "Select Apache Mirror"); - //} + append(downloadSelect, "Direct Download"); + append(downloadSelect, "Select Apache Mirror"); updateDownloadLink(); } @@ -184,18 +179,14 @@ function updateDownloadLink() { .replace(/\$pkg/g, pkg) .replace(/-bin-sources/, ""); // special case for source packages - var link = "http://d3kbcqa49mib13.cloudfront.net/$artifact";; - if (version < "0.8.0") { -link = "http://spark-project.org/download/$artifact";; - } - if (pkg.toLowerCase().indexOf("mapr") > -1) { -link = "http://package.mapr.com/tools/apache-spark/$ver/$artifact"; - } else if (download == "apache") { + var link = "https://d3kbcqa49mib13.cloudfront.net/$artifact";; + if (download == "apache") { if (version < "1.6.3" || -(version >= "2.0.0" && version < "2.0.1")) { - link = "http://archive.apache.org/dist/spark/spark-$ver/$artifact";; +(version >= "2.0.0" && version <= "2.0.1") || +(version >= "2.1.0" && version <= "2.1.0")) { + link = "https://archive.apache.org/dist/spark/spark-$ver/$artifact";; } else { - link = "http://www.apache.org/dyn/closer.lua/spark/spark-$ver/$artifact";; + link = "https://www.apache.org/dyn/closer.lua/spark/spark-$ver/$artifact";; } } link = link - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark-website git commit: Trigger git sync
Repository: spark-website Updated Branches: refs/heads/asf-site b54c4f3fa -> 01e0279a0 Trigger git sync Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/01e0279a Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/01e0279a Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/01e0279a Branch: refs/heads/asf-site Commit: 01e0279a0e9faf317b8b471b8af6520ff65c615e Parents: b54c4f3 Author: Sean Owen Authored: Tue May 9 10:33:06 2017 +0100 Committer: Sean Owen Committed: Tue May 9 10:33:06 2017 +0100 -- -- - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20630][WEB UI] Fixed column visibility in Executor Tab
Repository: spark Updated Branches: refs/heads/master 804949c6b -> ca4625e0e [SPARK-20630][WEB UI] Fixed column visibility in Executor Tab ## What changes were proposed in this pull request? #14617 added new columns to the executor table causing the visibility checks for the logs and threadDump columns to toggle the wrong columns since they used hard-coded column numbers. I've updated the checks to use column names instead of numbers so future updates don't accidentally break this again. Note: This will also need to be back ported into 2.2 since #14617 was merged there ## How was this patch tested? Manually tested Author: Alex Bozarth Closes #17904 from ajbozarth/spark20630. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ca4625e0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ca4625e0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ca4625e0 Branch: refs/heads/master Commit: ca4625e0e58df7f02346470d22a9478d9640709d Parents: 804949c Author: Alex Bozarth Authored: Wed May 10 10:20:10 2017 +0100 Committer: Sean Owen Committed: Wed May 10 10:20:10 2017 +0100 -- .../org/apache/spark/ui/static/executorspage.js | 12 1 file changed, 4 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ca4625e0/core/src/main/resources/org/apache/spark/ui/static/executorspage.js -- diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js index cb9922d..6643a8f 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js +++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js @@ -492,24 +492,20 @@ $(document).ready(function () { {data: 'totalInputBytes', render: formatBytes}, {data: 'totalShuffleRead', render: formatBytes}, {data: 'totalShuffleWrite', render: formatBytes}, -{data: 'executorLogs', render: formatLogsCells}, +{name: 'executorLogsCol', data: 'executorLogs', render: formatLogsCells}, { +name: 'threadDumpCol', data: 'id', render: function (data, type) { return type === 'display' ? ("Thread Dump" ) : data; } } ], -"columnDefs": [ -{ -"targets": [ 16 ], -"visible": getThreadDumpEnabled() -} -], "order": [[0, "asc"]] }; var dt = $(selector).DataTable(conf); -dt.column(15).visible(logsExist(response)); +dt.column('executorLogsCol:name').visible(logsExist(response)); + dt.column('threadDumpCol:name').visible(getThreadDumpEnabled()); $('#active-executors [data-toggle="tooltip"]').tooltip(); var sumSelector = "#summary-execs-table"; - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20630][WEB UI] Fixed column visibility in Executor Tab
Repository: spark Updated Branches: refs/heads/branch-2.2 3ed2f4d51 -> 7597a522b [SPARK-20630][WEB UI] Fixed column visibility in Executor Tab ## What changes were proposed in this pull request? #14617 added new columns to the executor table causing the visibility checks for the logs and threadDump columns to toggle the wrong columns since they used hard-coded column numbers. I've updated the checks to use column names instead of numbers so future updates don't accidentally break this again. Note: This will also need to be back ported into 2.2 since #14617 was merged there ## How was this patch tested? Manually tested Author: Alex Bozarth Closes #17904 from ajbozarth/spark20630. (cherry picked from commit ca4625e0e58df7f02346470d22a9478d9640709d) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7597a522 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7597a522 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7597a522 Branch: refs/heads/branch-2.2 Commit: 7597a522b7e5be43910e86cd6f805e7e9ee08ced Parents: 3ed2f4d Author: Alex Bozarth Authored: Wed May 10 10:20:10 2017 +0100 Committer: Sean Owen Committed: Wed May 10 10:20:19 2017 +0100 -- .../org/apache/spark/ui/static/executorspage.js | 12 1 file changed, 4 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7597a522/core/src/main/resources/org/apache/spark/ui/static/executorspage.js -- diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js index cb9922d..6643a8f 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js +++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js @@ -492,24 +492,20 @@ $(document).ready(function () { {data: 'totalInputBytes', render: formatBytes}, {data: 'totalShuffleRead', render: formatBytes}, {data: 'totalShuffleWrite', render: formatBytes}, -{data: 'executorLogs', render: formatLogsCells}, +{name: 'executorLogsCol', data: 'executorLogs', render: formatLogsCells}, { +name: 'threadDumpCol', data: 'id', render: function (data, type) { return type === 'display' ? ("Thread Dump" ) : data; } } ], -"columnDefs": [ -{ -"targets": [ 16 ], -"visible": getThreadDumpEnabled() -} -], "order": [[0, "asc"]] }; var dt = $(selector).DataTable(conf); -dt.column(15).visible(logsExist(response)); +dt.column('executorLogsCol:name').visible(logsExist(response)); + dt.column('threadDumpCol:name').visible(getThreadDumpEnabled()); $('#active-executors [data-toggle="tooltip"]').tooltip(); var sumSelector = "#summary-execs-table"; - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20637][CORE] Remove mention of old RDD classes from comments
Repository: spark Updated Branches: refs/heads/master ca4625e0e -> a4cbf26bc [SPARK-20637][CORE] Remove mention of old RDD classes from comments ## What changes were proposed in this pull request? A few comments around the code mention RDD classes that do not exist anymore. I'm not sure of the best way to replace these, so I've just removed them here. ## How was this patch tested? Only changes code comments, no testing required Author: Michael Mior Closes #17900 from michaelmior/remove-old-rdds. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a4cbf26b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a4cbf26b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a4cbf26b Branch: refs/heads/master Commit: a4cbf26bca349a63586777ad3e398717eb94473e Parents: ca4625e Author: Michael Mior Authored: Wed May 10 10:21:43 2017 +0100 Committer: Sean Owen Committed: Wed May 10 10:21:43 2017 +0100 -- .../main/scala/org/apache/spark/scheduler/DAGScheduler.scala | 2 +- core/src/test/scala/org/apache/spark/CheckpointSuite.scala | 6 +++--- .../scala/org/apache/spark/scheduler/SparkListenerSuite.scala | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a4cbf26b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala index aab177f..68178c7 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala @@ -58,7 +58,7 @@ import org.apache.spark.util._ * set of map output files, and another to read those files after a barrier). In the end, every * stage will have only shuffle dependencies on other stages, and may compute multiple operations * inside it. The actual pipelining of these operations happens in the RDD.compute() functions of - * various RDDs (MappedRDD, FilteredRDD, etc). + * various RDDs * * In addition to coming up with a DAG of stages, the DAGScheduler also determines the preferred * locations to run each task on, based on the current cache status, and passes these to the http://git-wip-us.apache.org/repos/asf/spark/blob/a4cbf26b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala index ee70a33..48408cc 100644 --- a/core/src/test/scala/org/apache/spark/CheckpointSuite.scala +++ b/core/src/test/scala/org/apache/spark/CheckpointSuite.scala @@ -114,7 +114,7 @@ trait RDDCheckpointTester { self: SparkFunSuite => * RDDs partitions. So even if the parent RDD is checkpointed and its partitions changed, * the generated RDD will remember the partitions and therefore potentially the whole lineage. * This function should be called only those RDD whose partitions refer to parent RDD's - * partitions (i.e., do not call it on simple RDD like MappedRDD). + * partitions (i.e., do not call it on simple RDDs). * * @param op an operation to run on the RDD * @param reliableCheckpoint if true, use reliable checkpoints, otherwise use local checkpoints @@ -388,7 +388,7 @@ class CheckpointSuite extends SparkFunSuite with RDDCheckpointTester with LocalS // the parent RDD has been checkpointed and parent partitions have been changed. // Note that this test is very specific to the current implementation of CartesianRDD. val ones = sc.makeRDD(1 to 100, 10).map(x => x) -checkpoint(ones, reliableCheckpoint) // checkpoint that MappedRDD +checkpoint(ones, reliableCheckpoint) val cartesian = new CartesianRDD(sc, ones, ones) val splitBeforeCheckpoint = serializeDeserialize(cartesian.partitions.head.asInstanceOf[CartesianPartition]) @@ -411,7 +411,7 @@ class CheckpointSuite extends SparkFunSuite with RDDCheckpointTester with LocalS // Note that this test is very specific to the current implementation of // CoalescedRDDPartitions. val ones = sc.makeRDD(1 to 100, 10).map(x => x) -checkpoint(ones, reliableCheckpoint) // checkpoint that MappedRDD +checkpoint(ones, reliableCheckpoint) val coalesced = new CoalescedRDD(ones, 2) val splitBeforeCheckpoint = serializeDeserialize(coalesced.partitions.head.asInstanceOf[CoalescedRDDPartition]) http://git-wip-us.apache.org/repos/asf/spark/blob/a4cbf26b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala -
spark git commit: [SPARK-20393][WEBU UI] Strengthen Spark to prevent XSS vulnerabilities
Repository: spark Updated Branches: refs/heads/master a4cbf26bc -> b512233a4 [SPARK-20393][WEBU UI] Strengthen Spark to prevent XSS vulnerabilities ## What changes were proposed in this pull request? Add stripXSS and stripXSSMap to Spark Core's UIUtils. Calling these functions at any point that getParameter is called against a HttpServletRequest. ## How was this patch tested? Unit tests, IBM Security AppScan Standard no longer showing vulnerabilities, manual verification of WebUI pages. Author: NICHOLAS T. MARION Closes #17686 from n-marion/xss-fix. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b512233a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b512233a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b512233a Branch: refs/heads/master Commit: b512233a457092b0e2a39d0b42cb021abc69d375 Parents: a4cbf26 Author: NICHOLAS T. MARION Authored: Wed May 10 10:59:57 2017 +0100 Committer: Sean Owen Committed: Wed May 10 10:59:57 2017 +0100 -- .../spark/deploy/history/HistoryPage.scala | 3 +- .../deploy/master/ui/ApplicationPage.scala | 3 +- .../spark/deploy/master/ui/MasterPage.scala | 6 ++- .../apache/spark/deploy/worker/ui/LogPage.scala | 30 +-- .../scala/org/apache/spark/ui/UIUtils.scala | 21 +++ .../spark/ui/exec/ExecutorThreadDumpPage.scala | 4 +- .../org/apache/spark/ui/jobs/AllJobsPage.scala | 14 --- .../org/apache/spark/ui/jobs/JobPage.scala | 3 +- .../org/apache/spark/ui/jobs/JobsTab.scala | 5 ++- .../org/apache/spark/ui/jobs/PoolPage.scala | 3 +- .../org/apache/spark/ui/jobs/StagePage.scala| 15 .../org/apache/spark/ui/jobs/StageTable.scala | 15 .../org/apache/spark/ui/jobs/StagesTab.scala| 5 ++- .../org/apache/spark/ui/storage/RDDPage.scala | 13 --- .../org/apache/spark/ui/UIUtilsSuite.scala | 39 .../spark/deploy/mesos/ui/DriverPage.scala | 3 +- .../spark/sql/execution/ui/ExecutionPage.scala | 3 +- .../ui/ThriftServerSessionPage.scala| 4 +- .../apache/spark/streaming/ui/BatchPage.scala | 5 ++- 19 files changed, 140 insertions(+), 54 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b512233a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala index 0e7a6c2..af14717 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala @@ -26,8 +26,9 @@ import org.apache.spark.ui.{UIUtils, WebUIPage} private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("") { def render(request: HttpServletRequest): Seq[Node] = { +// stripXSS is called first to remove suspicious characters used in XSS attacks val requestedIncomplete = - Option(request.getParameter("showIncomplete")).getOrElse("false").toBoolean + Option(UIUtils.stripXSS(request.getParameter("showIncomplete"))).getOrElse("false").toBoolean val allAppsSize = parent.getApplicationList().count(_.completed != requestedIncomplete) val eventLogsUnderProcessCount = parent.getEventLogsUnderProcess() http://git-wip-us.apache.org/repos/asf/spark/blob/b512233a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala index a8d721f..94ff81c 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala @@ -33,7 +33,8 @@ private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app") /** Executor details for a particular application */ def render(request: HttpServletRequest): Seq[Node] = { -val appId = request.getParameter("appId") +// stripXSS is called first to remove suspicious characters used in XSS attacks +val appId = UIUtils.stripXSS(request.getParameter("appId")) val state = master.askSync[MasterStateResponse](RequestMasterState) val app = state.activeApps.find(_.id == appId) .getOrElse(state.completedApps.find(_.id == appId).orNull) http://git-wip-us.apache.org/repos/asf/spark/blob/b512233a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala -
spark git commit: [MINOR][BUILD] Fix lint-java breaks.
Repository: spark Updated Branches: refs/heads/master 76e4a5566 -> fcb88f921 [MINOR][BUILD] Fix lint-java breaks. ## What changes were proposed in this pull request? This PR proposes to fix the lint-breaks as below: ``` [ERROR] src/main/java/org/apache/spark/unsafe/Platform.java:[51] (regexp) RegexpSingleline: No trailing whitespace allowed. [ERROR] src/main/scala/org/apache/spark/sql/streaming/Trigger.java:[45,25] (naming) MethodName: Method name 'ProcessingTime' must match pattern '^[a-z][a-z0-9][a-zA-Z0-9_]*$'. [ERROR] src/main/scala/org/apache/spark/sql/streaming/Trigger.java:[62,25] (naming) MethodName: Method name 'ProcessingTime' must match pattern '^[a-z][a-z0-9][a-zA-Z0-9_]*$'. [ERROR] src/main/scala/org/apache/spark/sql/streaming/Trigger.java:[78,25] (naming) MethodName: Method name 'ProcessingTime' must match pattern '^[a-z][a-z0-9][a-zA-Z0-9_]*$'. [ERROR] src/main/scala/org/apache/spark/sql/streaming/Trigger.java:[92,25] (naming) MethodName: Method name 'ProcessingTime' must match pattern '^[a-z][a-z0-9][a-zA-Z0-9_]*$'. [ERROR] src/main/scala/org/apache/spark/sql/streaming/Trigger.java:[102,25] (naming) MethodName: Method name 'Once' must match pattern '^[a-z][a-z0-9][a-zA-Z0-9_]*$'. [ERROR] src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisInputDStreamBuilderSuite.java:[28,8] (imports) UnusedImports: Unused import - org.apache.spark.streaming.api.java.JavaDStream. ``` after: ``` dev/lint-java Checkstyle checks passed. ``` [Test Result](https://travis-ci.org/ConeyLiu/spark/jobs/229666169) ## How was this patch tested? Travis CI Author: Xianyang Liu Closes #17890 from ConeyLiu/codestyle. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fcb88f92 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fcb88f92 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fcb88f92 Branch: refs/heads/master Commit: fcb88f9211e39c705073db5300c96ceeb3f227d7 Parents: 76e4a55 Author: Xianyang Liu Authored: Wed May 10 13:56:34 2017 +0100 Committer: Sean Owen Committed: Wed May 10 13:56:34 2017 +0100 -- common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java | 2 +- core/src/main/scala/org/apache/spark/storage/BlockManager.scala | 3 --- dev/checkstyle-suppressions.xml | 2 +- .../streaming/kinesis/JavaKinesisInputDStreamBuilderSuite.java| 1 - 4 files changed, 2 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fcb88f92/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java -- diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java index 4ab5b68..aca6fca 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java @@ -48,7 +48,7 @@ public final class Platform { boolean _unaligned; String arch = System.getProperty("os.arch", ""); if (arch.equals("ppc64le") || arch.equals("ppc64")) { - // Since java.nio.Bits.unaligned() doesn't return true on ppc (See JDK-8165231), but + // Since java.nio.Bits.unaligned() doesn't return true on ppc (See JDK-8165231), but // ppc64 and ppc64le support it _unaligned = true; } else { http://git-wip-us.apache.org/repos/asf/spark/blob/fcb88f92/core/src/main/scala/org/apache/spark/storage/BlockManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala index 33ce30c..b3e4584 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala @@ -29,8 +29,6 @@ import scala.reflect.ClassTag import scala.util.Random import scala.util.control.NonFatal -import com.google.common.io.ByteStreams - import org.apache.spark._ import org.apache.spark.executor.{DataReadMethod, ShuffleWriteMetrics} import org.apache.spark.internal.Logging @@ -41,7 +39,6 @@ import org.apache.spark.network.netty.SparkTransportConf import org.apache.spark.network.shuffle.ExternalShuffleClient import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo import org.apache.spark.rpc.RpcEnv -import org.apache.spark.security.CryptoStreamUtils import org.apache.spark.serializer.{SerializerInstance, SerializerManager} import org.apache.spark.shuffle.ShuffleManager import org.apache.spark.storage.memory._ http://git-wip-us.apache.org/repos/asf/spark/blob/fcb88f92/dev/checkstyle-suppressions.xml -
spark git commit: [MINOR][BUILD] Fix lint-java breaks.
Repository: spark Updated Branches: refs/heads/branch-2.2 5f6029c75 -> 358516dcb [MINOR][BUILD] Fix lint-java breaks. ## What changes were proposed in this pull request? This PR proposes to fix the lint-breaks as below: ``` [ERROR] src/main/java/org/apache/spark/unsafe/Platform.java:[51] (regexp) RegexpSingleline: No trailing whitespace allowed. [ERROR] src/main/scala/org/apache/spark/sql/streaming/Trigger.java:[45,25] (naming) MethodName: Method name 'ProcessingTime' must match pattern '^[a-z][a-z0-9][a-zA-Z0-9_]*$'. [ERROR] src/main/scala/org/apache/spark/sql/streaming/Trigger.java:[62,25] (naming) MethodName: Method name 'ProcessingTime' must match pattern '^[a-z][a-z0-9][a-zA-Z0-9_]*$'. [ERROR] src/main/scala/org/apache/spark/sql/streaming/Trigger.java:[78,25] (naming) MethodName: Method name 'ProcessingTime' must match pattern '^[a-z][a-z0-9][a-zA-Z0-9_]*$'. [ERROR] src/main/scala/org/apache/spark/sql/streaming/Trigger.java:[92,25] (naming) MethodName: Method name 'ProcessingTime' must match pattern '^[a-z][a-z0-9][a-zA-Z0-9_]*$'. [ERROR] src/main/scala/org/apache/spark/sql/streaming/Trigger.java:[102,25] (naming) MethodName: Method name 'Once' must match pattern '^[a-z][a-z0-9][a-zA-Z0-9_]*$'. [ERROR] src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisInputDStreamBuilderSuite.java:[28,8] (imports) UnusedImports: Unused import - org.apache.spark.streaming.api.java.JavaDStream. ``` after: ``` dev/lint-java Checkstyle checks passed. ``` [Test Result](https://travis-ci.org/ConeyLiu/spark/jobs/229666169) ## How was this patch tested? Travis CI Author: Xianyang Liu Closes #17890 from ConeyLiu/codestyle. (cherry picked from commit fcb88f9211e39c705073db5300c96ceeb3f227d7) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/358516dc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/358516dc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/358516dc Branch: refs/heads/branch-2.2 Commit: 358516dcbef5178cdc6cb4387d7f6837359946ba Parents: 5f6029c Author: Xianyang Liu Authored: Wed May 10 13:56:34 2017 +0100 Committer: Sean Owen Committed: Wed May 10 13:56:42 2017 +0100 -- common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java | 2 +- core/src/main/scala/org/apache/spark/storage/BlockManager.scala | 3 --- dev/checkstyle-suppressions.xml | 2 +- .../streaming/kinesis/JavaKinesisInputDStreamBuilderSuite.java| 1 - 4 files changed, 2 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/358516dc/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java -- diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java index 4ab5b68..aca6fca 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/Platform.java @@ -48,7 +48,7 @@ public final class Platform { boolean _unaligned; String arch = System.getProperty("os.arch", ""); if (arch.equals("ppc64le") || arch.equals("ppc64")) { - // Since java.nio.Bits.unaligned() doesn't return true on ppc (See JDK-8165231), but + // Since java.nio.Bits.unaligned() doesn't return true on ppc (See JDK-8165231), but // ppc64 and ppc64le support it _unaligned = true; } else { http://git-wip-us.apache.org/repos/asf/spark/blob/358516dc/core/src/main/scala/org/apache/spark/storage/BlockManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala index 3219969..ad0dc3c 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala @@ -29,8 +29,6 @@ import scala.reflect.ClassTag import scala.util.Random import scala.util.control.NonFatal -import com.google.common.io.ByteStreams - import org.apache.spark._ import org.apache.spark.executor.{DataReadMethod, ShuffleWriteMetrics} import org.apache.spark.internal.Logging @@ -41,7 +39,6 @@ import org.apache.spark.network.netty.SparkTransportConf import org.apache.spark.network.shuffle.ExternalShuffleClient import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo import org.apache.spark.rpc.RpcEnv -import org.apache.spark.security.CryptoStreamUtils import org.apache.spark.serializer.{SerializerInstance, SerializerManager} import org.apache.spark.shuffle.ShuffleManager import org.apache.spark.storage.memory._ http://git-wip-us.apache
spark-website git commit: Trigger git sync
Repository: spark-website Updated Branches: refs/heads/asf-site 01e0279a0 -> c2c0905b4 Trigger git sync Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/c2c0905b Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/c2c0905b Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/c2c0905b Branch: refs/heads/asf-site Commit: c2c0905b446b7272dec0147cabb593814d486efa Parents: 01e0279 Author: Sean Owen Authored: Wed May 10 18:47:55 2017 +0100 Committer: Sean Owen Committed: Wed May 10 18:47:55 2017 +0100 -- -- - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/2] spark-website git commit: Replace most http links with https as a best practice, where possible
Replace most http links with https as a best practice, where possible Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/62cf4a16 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/62cf4a16 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/62cf4a16 Branch: refs/heads/asf-site Commit: 62cf4a16daae3cf1b68745b8f676dbb29c167af2 Parents: c2c0905 Author: Sean Owen Authored: Wed May 10 10:56:35 2017 +0100 Committer: Sean Owen Committed: Wed May 10 19:02:39 2017 +0100 -- _config.yml| 2 +- community.md | 8 +- contributing.md| 10 +- developer-tools.md | 8 +- documentation.md | 40 ++--- downloads.md | 4 +- examples.md| 10 +- faq.md | 6 +- index.md | 12 +- mllib/index.md | 4 +- powered-by.md | 12 +- release-process.md | 6 +- robots.txt | 2 +- site/community.html| 8 +- site/contributing.html | 10 +- site/developer-tools.html | 8 +- site/documentation.html| 40 ++--- site/downloads.html| 4 +- site/examples.html | 10 +- site/faq.html | 6 +- site/index.html| 12 +- site/mailing-lists.html| 2 +- site/mllib/index.html | 4 +- site/powered-by.html | 15 +- site/release-process.html | 6 +- site/robots.txt| 2 +- site/sitemap.xml | 332 ++-- site/streaming/index.html | 8 +- site/third-party-projects.html | 8 +- site/trademarks.html | 2 +- sitemap.xml| 52 +++--- streaming/index.md | 8 +- third-party-projects.md| 8 +- trademarks.md | 2 +- 34 files changed, 332 insertions(+), 339 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/62cf4a16/_config.yml -- diff --git a/_config.yml b/_config.yml index 18ba30f..9a3934e 100644 --- a/_config.yml +++ b/_config.yml @@ -6,4 +6,4 @@ permalink: none destination: site exclude: ['README.md','content'] keep_files: ['docs'] -url: http://spark.apache.org \ No newline at end of file +url: https://spark.apache.org \ No newline at end of file http://git-wip-us.apache.org/repos/asf/spark-website/blob/62cf4a16/community.md -- diff --git a/community.md b/community.md index 9fcb2b5..9fc6136 100644 --- a/community.md +++ b/community.md @@ -15,18 +15,18 @@ navigation: StackOverflow For usage questions and help (e.g. how to use this Spark API), it is recommended you use the -StackOverflow tag http://stackoverflow.com/questions/tagged/apache-spark";>`apache-spark` +StackOverflow tag https://stackoverflow.com/questions/tagged/apache-spark";>`apache-spark` as it is an active forum for Spark users' questions and answers. Some quick tips when using StackOverflow: - Prior to asking submitting questions, please: - Search StackOverflow's - http://stackoverflow.com/questions/tagged/apache-spark";>`apache-spark` tag to see if + https://stackoverflow.com/questions/tagged/apache-spark";>`apache-spark` tag to see if your question has already been answered - Search the nabble archive for http://apache-spark-user-list.1001560.n3.nabble.com/";>us...@spark.apache.org -- Please follow the StackOverflow http://stackoverflow.com/help/how-to-ask";>code of conduct +- Please follow the StackOverflow https://stackoverflow.com/help/how-to-ask";>code of conduct - Always use the `apache-spark` tag when asking questions - Please also use a secondary tag to specify components so subject matter experts can more easily find them. Examples include: `pyspark`, `spark-dataframe`, `spark-streaming`, `spark-r`, `spark-mllib`, @@ -58,7 +58,7 @@ project, and scenarios, it is recommended you use the u...@spark.apache.org mail Some quick tips when using email: - Prior to asking submitting questions, please: - - Search StackOverflow at http://stackoverflow.com/questions/tagged/apache-spark";>`apache-spark` + - Search StackOverflow at https://stackoverflow.com/questions/tagged/apache-spark";>`apache-spark` to see if your question has already been answered - Search the nabble archive for http://apache-spark-user-list.1001560.n3.nabble.com/";>us...@spark.apache.org http://git-wip-us.apache.org/repos/asf/spark-website/blob/62cf4a16/contributing.md -
[1/2] spark-website git commit: Replace most http links with https as a best practice, where possible
Repository: spark-website Updated Branches: refs/heads/asf-site c2c0905b4 -> 62cf4a16d http://git-wip-us.apache.org/repos/asf/spark-website/blob/62cf4a16/site/sitemap.xml -- diff --git a/site/sitemap.xml b/site/sitemap.xml index bc93fb7..eb4e705 100644 --- a/site/sitemap.xml +++ b/site/sitemap.xml @@ -6,698 +6,698 @@ http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd";> - http://spark.apache.org/ + https://spark.apache.org/ daily 1.0 - http://spark.apache.org/docs/latest/index.html + https://spark.apache.org/docs/latest/index.html daily 1.0 - http://spark.apache.org/docs/latest/quick-start.html + https://spark.apache.org/docs/latest/quick-start.html daily 1.0 - http://spark.apache.org/docs/latest/programming-guide.html + https://spark.apache.org/docs/latest/programming-guide.html daily 1.0 - http://spark.apache.org/docs/latest/streaming-programming-guide.html + https://spark.apache.org/docs/latest/streaming-programming-guide.html daily 1.0 - http://spark.apache.org/docs/latest/sql-programming-guide.html + https://spark.apache.org/docs/latest/sql-programming-guide.html daily 1.0 - http://spark.apache.org/docs/latest/structured-streaming-programming-guide.html + https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html daily 1.0 - http://spark.apache.org/docs/latest/ml-guide.html + https://spark.apache.org/docs/latest/ml-guide.html daily 1.0 - http://spark.apache.org/docs/latest/graphx-programming-guide.html + https://spark.apache.org/docs/latest/graphx-programming-guide.html daily 1.0 - http://spark.apache.org/docs/latest/sparkr.html + https://spark.apache.org/docs/latest/sparkr.html daily 1.0 - http://spark.apache.org/docs/latest/cluster-overview.html + https://spark.apache.org/docs/latest/cluster-overview.html daily 1.0 - http://spark.apache.org/docs/latest/submitting-applications.html + https://spark.apache.org/docs/latest/submitting-applications.html daily 1.0 - http://spark.apache.org/docs/latest/spark-standalone.html + https://spark.apache.org/docs/latest/spark-standalone.html daily 1.0 - http://spark.apache.org/docs/latest/running-on-mesos.html + https://spark.apache.org/docs/latest/running-on-mesos.html daily 1.0 - http://spark.apache.org/docs/latest/running-on-yarn.html + https://spark.apache.org/docs/latest/running-on-yarn.html daily 1.0 - http://spark.apache.org/docs/latest/configuration.html + https://spark.apache.org/docs/latest/configuration.html daily 1.0 - http://spark.apache.org/docs/latest/monitoring.html + https://spark.apache.org/docs/latest/monitoring.html daily 1.0 - http://spark.apache.org/docs/latest/tuning.html + https://spark.apache.org/docs/latest/tuning.html daily 1.0 - http://spark.apache.org/docs/latest/job-scheduling.html + https://spark.apache.org/docs/latest/job-scheduling.html daily 1.0 - http://spark.apache.org/docs/latest/security.html + https://spark.apache.org/docs/latest/security.html daily 1.0 - http://spark.apache.org/docs/latest/hardware-provisioning.html + https://spark.apache.org/docs/latest/hardware-provisioning.html daily 1.0 - http://spark.apache.org/docs/latest/building-spark.html + https://spark.apache.org/docs/latest/building-spark.html daily 1.0 - http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.package + https://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.package daily 1.0 - http://spark.apache.org/docs/latest/api/java/index.html + https://spark.apache.org/docs/latest/api/java/index.html weekly 1.0 - http://spark.apache.org/docs/latest/api/python/index.html + https://spark.apache.org/docs/latest/api/python/index.html weekly 1.0 - http://spark.apache.org/docs/latest/api/R/index.html + https://spark.apache.org/docs/latest/api/R/index.html weekly 1.0 - http://spark.apache.org/releases/spark-release-2-1-1.html + https://spark.apache.org/releases/spark-release-2-1-1.html weekly - http://spark.apache.org/news/spark-2-1-1-released.html + https://spark.apache.org/news/spark-2-1-1-released.html weekly - http://spark.apache.org/news/spark-summit-june-2017-agenda-posted.html + https://spark.apache.org/news/spark-summit-june-2017-agenda-posted.html weekly - http://spark.apache.org/news/spark-summit-east-2017-agenda-posted.html + https://spark.apache.org/news/spark-summit-east-2017-agenda-posted.html weekly - http://spark.apache.org/releases/spark-release-2-1-0.html + https://spark.apache.org/releases/spark-release-2-1-0.html weekly - http://spark.apache.org/news/spark-2-1-0-released.html + https://spark.apache.org/news/spark-2-1-0-release
spark-website git commit: More dead link fixing
Repository: spark-website Updated Branches: refs/heads/asf-site 62cf4a16d -> 5ed41c8d8 More dead link fixing Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/5ed41c8d Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/5ed41c8d Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/5ed41c8d Branch: refs/heads/asf-site Commit: 5ed41c8d8a6bbf03ce84f987ee9c57f6292e1aa6 Parents: 62cf4a1 Author: Sean Owen Authored: Thu May 11 11:02:31 2017 +0100 Committer: Sean Owen Committed: Thu May 11 11:02:31 2017 +0100 -- faq.md | 2 +- ...6-21-spark-accepted-into-apache-incubator.md | 2 +- news/_posts/2016-05-26-spark-2.0.0-preview.md | 2 +- powered-by.md | 9 ++--- site/faq.html | 2 +- site/js/downloads.js| 41 site/news/index.html| 4 +- site/news/spark-2-1-1-released.html | 1 + site/news/spark-2.0.0-preview.html | 2 +- .../spark-accepted-into-apache-incubator.html | 2 +- site/powered-by.html| 9 ++--- 11 files changed, 31 insertions(+), 45 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/5ed41c8d/faq.md -- diff --git a/faq.md b/faq.md index 281d7ca..614664c 100644 --- a/faq.md +++ b/faq.md @@ -71,4 +71,4 @@ Please also refer to our Where can I get more help? -Please post on StackOverflow's https://stackoverflow.com/questions/tagged/apache-spark";>apache-spark tag or https://apache-spark-user-list.1001560.n3.nabble.com";>Spark Users mailing list. For more information, please refer to https://spark.apache.org/community.html#have-questions";>Have Questions?. We'll be glad to help! +Please post on StackOverflow's https://stackoverflow.com/questions/tagged/apache-spark";>apache-spark tag or http://apache-spark-user-list.1001560.n3.nabble.com";>Spark Users mailing list. For more information, please refer to https://spark.apache.org/community.html#have-questions";>Have Questions?. We'll be glad to help! http://git-wip-us.apache.org/repos/asf/spark-website/blob/5ed41c8d/news/_posts/2013-06-21-spark-accepted-into-apache-incubator.md -- diff --git a/news/_posts/2013-06-21-spark-accepted-into-apache-incubator.md b/news/_posts/2013-06-21-spark-accepted-into-apache-incubator.md index 1a6ca6d..a0ff02a 100644 --- a/news/_posts/2013-06-21-spark-accepted-into-apache-incubator.md +++ b/news/_posts/2013-06-21-spark-accepted-into-apache-incubator.md @@ -11,4 +11,4 @@ meta: _edit_last: '4' _wpas_done_all: '1' --- -Spark was recently http://mail-archives.apache.org/mod_mbox/incubator-general/201306.mbox/%3CCDE7B773.E9A48%25chris.a.mattmann%40jpl.nasa.gov%3E";>accepted into the http://incubator.apache.org";>Apache Incubator, which will serve as the long-term home for the project. While moving the source code and issue tracking to Apache will take some time, we are excited to be joining the community at Apache. Stay tuned on this site for updates on how the project hosting will change. +Spark was recently http://mail-archives.apache.org/mod_mbox/incubator-general/201306.mbox/%3ccde7b773.e9a48%25chris.a.mattm...@jpl.nasa.gov%3E";>accepted into the http://incubator.apache.org";>Apache Incubator, which will serve as the long-term home for the project. While moving the source code and issue tracking to Apache will take some time, we are excited to be joining the community at Apache. Stay tuned on this site for updates on how the project hosting will change. http://git-wip-us.apache.org/repos/asf/spark-website/blob/5ed41c8d/news/_posts/2016-05-26-spark-2.0.0-preview.md -- diff --git a/news/_posts/2016-05-26-spark-2.0.0-preview.md b/news/_posts/2016-05-26-spark-2.0.0-preview.md index 053beb6..fb642f7 100644 --- a/news/_posts/2016-05-26-spark-2.0.0-preview.md +++ b/news/_posts/2016-05-26-spark-2.0.0-preview.md @@ -11,6 +11,6 @@ meta: _edit_last: '4' _wpas_done_all: '1' --- -To enable wide-scale community testing of the upcoming Spark 2.0 release, the Apache Spark team has posted a https://dist.apache.org/repos/dist/release/spark/spark-2.0.0-preview/";>preview release of Spark 2.0. This preview is not a stable release in terms of either API or functionality, but it is meant to give the community early access to try the code that will become Spark 2.0. If you would like to test the release, simply download it, and send feedback using either the https://spark.apache.org/commu
svn commit: r19618 - in /release/spark: spark-1.6.2/ spark-2.0.1/ spark-2.1.0/
Author: srowen Date: Thu May 11 10:08:00 2017 New Revision: 19618 Log: Delete non-current Spark releases Removed: release/spark/spark-1.6.2/ release/spark/spark-2.0.1/ release/spark/spark-2.1.0/ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20554][BUILD] Remove usage of scala.language.reflectiveCalls
Repository: spark Updated Branches: refs/heads/master 720708ccd -> fc8a2b6ee [SPARK-20554][BUILD] Remove usage of scala.language.reflectiveCalls ## What changes were proposed in this pull request? Remove uses of scala.language.reflectiveCalls that are either unnecessary or probably resulting in more complex code. This turned out to be less significant than I thought, but, still worth a touch-up. ## How was this patch tested? Existing tests. Author: Sean Owen Closes #17949 from srowen/SPARK-20554. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fc8a2b6e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fc8a2b6e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fc8a2b6e Branch: refs/heads/master Commit: fc8a2b6ee6f1041345f9ce9701fac496c3c3b1e6 Parents: 720708c Author: Sean Owen Authored: Fri May 12 09:55:04 2017 +0100 Committer: Sean Owen Committed: Fri May 12 09:55:04 2017 +0100 -- .../spark/storage/DiskBlockManagerSuite.scala | 2 -- .../spark/util/random/XORShiftRandomSuite.scala | 16 -- .../spark/examples/ml/DecisionTreeExample.scala | 1 - .../apache/spark/examples/ml/GBTExample.scala | 1 - .../examples/ml/LinearRegressionExample.scala | 2 -- .../examples/ml/LogisticRegressionExample.scala | 1 - .../spark/examples/ml/RandomForestExample.scala | 1 - .../sql/execution/QueryExecutionSuite.scala | 31 +--- .../spark/sql/hive/client/HiveClientImpl.scala | 1 - .../sql/hive/client/IsolatedClientLoader.scala | 1 - .../receiver/BlockGeneratorSuite.scala | 14 - 11 files changed, 25 insertions(+), 46 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fc8a2b6e/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala index bbfd6df..7859b0b 100644 --- a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala @@ -19,8 +19,6 @@ package org.apache.spark.storage import java.io.{File, FileWriter} -import scala.language.reflectiveCalls - import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach} import org.apache.spark.{SparkConf, SparkFunSuite} http://git-wip-us.apache.org/repos/asf/spark/blob/fc8a2b6e/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala b/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala index 83eba36..df34838 100644 --- a/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala @@ -17,8 +17,6 @@ package org.apache.spark.util.random -import scala.language.reflectiveCalls - import org.apache.commons.math3.stat.inference.ChiSquareTest import org.scalatest.Matchers @@ -27,26 +25,22 @@ import org.apache.spark.util.Utils.times class XORShiftRandomSuite extends SparkFunSuite with Matchers { - private def fixture = new { -val seed = 1L -val xorRand = new XORShiftRandom(seed) -val hundMil = 1e8.toInt - } - /* * This test is based on a chi-squared test for randomness. */ test ("XORShift generates valid random numbers") { -val f = fixture +val xorRand = new XORShiftRandom(1L) val numBins = 10 // create 10 bins val numRows = 5 // create 5 rows val bins = Array.ofDim[Long](numRows, numBins) // populate bins based on modulus of the random number for each row -for (r <- 0 to numRows-1) { - times(f.hundMil) {bins(r)(math.abs(f.xorRand.nextInt) % numBins) += 1} +for (r <- 0 until numRows) { + times(1) { +bins(r)(math.abs(xorRand.nextInt) % numBins) += 1 + } } /* http://git-wip-us.apache.org/repos/asf/spark/blob/fc8a2b6e/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala -- diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala index f736cee..b03701e 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala @@ -21,7 +21,6 @@ package org.apache.spark.examples.ml impo
spark git commit: [SPARK-20554][BUILD] Remove usage of scala.language.reflectiveCalls
Repository: spark Updated Branches: refs/heads/branch-2.2 c1e5ac267 -> 21764f8d1 [SPARK-20554][BUILD] Remove usage of scala.language.reflectiveCalls ## What changes were proposed in this pull request? Remove uses of scala.language.reflectiveCalls that are either unnecessary or probably resulting in more complex code. This turned out to be less significant than I thought, but, still worth a touch-up. ## How was this patch tested? Existing tests. Author: Sean Owen Closes #17949 from srowen/SPARK-20554. (cherry picked from commit fc8a2b6ee6f1041345f9ce9701fac496c3c3b1e6) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/21764f8d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/21764f8d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/21764f8d Branch: refs/heads/branch-2.2 Commit: 21764f8d1ccb903c89a15cb9b7ee4703ae055e14 Parents: c1e5ac2 Author: Sean Owen Authored: Fri May 12 09:55:04 2017 +0100 Committer: Sean Owen Committed: Fri May 12 09:55:14 2017 +0100 -- .../spark/storage/DiskBlockManagerSuite.scala | 2 -- .../spark/util/random/XORShiftRandomSuite.scala | 16 -- .../spark/examples/ml/DecisionTreeExample.scala | 1 - .../apache/spark/examples/ml/GBTExample.scala | 1 - .../examples/ml/LinearRegressionExample.scala | 2 -- .../examples/ml/LogisticRegressionExample.scala | 1 - .../spark/examples/ml/RandomForestExample.scala | 1 - .../sql/execution/QueryExecutionSuite.scala | 31 +--- .../spark/sql/hive/client/HiveClientImpl.scala | 1 - .../sql/hive/client/IsolatedClientLoader.scala | 1 - .../receiver/BlockGeneratorSuite.scala | 14 - 11 files changed, 25 insertions(+), 46 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/21764f8d/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala index bbfd6df..7859b0b 100644 --- a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala +++ b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala @@ -19,8 +19,6 @@ package org.apache.spark.storage import java.io.{File, FileWriter} -import scala.language.reflectiveCalls - import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach} import org.apache.spark.{SparkConf, SparkFunSuite} http://git-wip-us.apache.org/repos/asf/spark/blob/21764f8d/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala b/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala index 83eba36..df34838 100644 --- a/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomSuite.scala @@ -17,8 +17,6 @@ package org.apache.spark.util.random -import scala.language.reflectiveCalls - import org.apache.commons.math3.stat.inference.ChiSquareTest import org.scalatest.Matchers @@ -27,26 +25,22 @@ import org.apache.spark.util.Utils.times class XORShiftRandomSuite extends SparkFunSuite with Matchers { - private def fixture = new { -val seed = 1L -val xorRand = new XORShiftRandom(seed) -val hundMil = 1e8.toInt - } - /* * This test is based on a chi-squared test for randomness. */ test ("XORShift generates valid random numbers") { -val f = fixture +val xorRand = new XORShiftRandom(1L) val numBins = 10 // create 10 bins val numRows = 5 // create 5 rows val bins = Array.ofDim[Long](numRows, numBins) // populate bins based on modulus of the random number for each row -for (r <- 0 to numRows-1) { - times(f.hundMil) {bins(r)(math.abs(f.xorRand.nextInt) % numBins) += 1} +for (r <- 0 until numRows) { + times(1) { +bins(r)(math.abs(xorRand.nextInt) % numBins) += 1 + } } /* http://git-wip-us.apache.org/repos/asf/spark/blob/21764f8d/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala -- diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala index f736cee..b03701e 100644 --- a/examples/src/main/scala/org/apache/spark/examples/ml/DecisionTreeExample.scala +++ b/examples/src/main/scala/org/apach
spark git commit: [SPARK-20705][WEB-UI] The sort function can not be used in the master page when you use Firefox or Google Chrome.
Repository: spark Updated Branches: refs/heads/branch-2.2 5842eeca5 -> cb64064dc [SPARK-20705][WEB-UI] The sort function can not be used in the master page when you use Firefox or Google Chrome. ## What changes were proposed in this pull request? When you open the master page, when you use Firefox or Google Chrom, the console of Firefox or Google Chrome is wrong. But The IE is no problem. e.g. ![error](https://cloud.githubusercontent.com/assets/26266482/25946143/74467a5c-367c-11e7-8f9f-d3585b1aea88.png) My Firefox version is 48.0.2. My Google Chrome version is 49.0.2623.75 m. ## How was this patch tested? manual tests Please review http://spark.apache.org/contributing.html before opening a pull request. Author: guoxiaolong Author: éå°é¾ 10207633 Author: guoxiaolongzte Closes #17952 from guoxiaolongzte/SPARK-20705. (cherry picked from commit 99d5799927301e7dfceb9405e2829af3433f104b) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cb64064d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cb64064d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cb64064d Branch: refs/heads/branch-2.2 Commit: cb64064dc61ed657ed2071a3a442e384ad3f995e Parents: 5842eec Author: guoxiaolong Authored: Mon May 15 07:51:50 2017 +0100 Committer: Sean Owen Committed: Mon May 15 07:51:58 2017 +0100 -- core/src/main/resources/org/apache/spark/ui/static/sorttable.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/cb64064d/core/src/main/resources/org/apache/spark/ui/static/sorttable.js -- diff --git a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js index ff24147..9960d5c 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js +++ b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js @@ -207,8 +207,8 @@ sorttable = { hasInputs = (typeof node.getElementsByTagName == 'function') && node.getElementsByTagName('input').length; - -if (node.getAttribute("sorttable_customkey") != null) { + +if (node.nodeType == 1 && node.getAttribute("sorttable_customkey") != null) { return node.getAttribute("sorttable_customkey"); } else if (typeof node.textContent != 'undefined' && !hasInputs) { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20705][WEB-UI] The sort function can not be used in the master page when you use Firefox or Google Chrome.
Repository: spark Updated Branches: refs/heads/master 5a799fd8c -> 99d579992 [SPARK-20705][WEB-UI] The sort function can not be used in the master page when you use Firefox or Google Chrome. ## What changes were proposed in this pull request? When you open the master page, when you use Firefox or Google Chrom, the console of Firefox or Google Chrome is wrong. But The IE is no problem. e.g. ![error](https://cloud.githubusercontent.com/assets/26266482/25946143/74467a5c-367c-11e7-8f9f-d3585b1aea88.png) My Firefox version is 48.0.2. My Google Chrome version is 49.0.2623.75 m. ## How was this patch tested? manual tests Please review http://spark.apache.org/contributing.html before opening a pull request. Author: guoxiaolong Author: éå°é¾ 10207633 Author: guoxiaolongzte Closes #17952 from guoxiaolongzte/SPARK-20705. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/99d57999 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/99d57999 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/99d57999 Branch: refs/heads/master Commit: 99d5799927301e7dfceb9405e2829af3433f104b Parents: 5a799fd Author: guoxiaolong Authored: Mon May 15 07:51:50 2017 +0100 Committer: Sean Owen Committed: Mon May 15 07:51:50 2017 +0100 -- core/src/main/resources/org/apache/spark/ui/static/sorttable.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/99d57999/core/src/main/resources/org/apache/spark/ui/static/sorttable.js -- diff --git a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js index ff24147..9960d5c 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js +++ b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js @@ -207,8 +207,8 @@ sorttable = { hasInputs = (typeof node.getElementsByTagName == 'function') && node.getElementsByTagName('input').length; - -if (node.getAttribute("sorttable_customkey") != null) { + +if (node.nodeType == 1 && node.getAttribute("sorttable_customkey") != null) { return node.getAttribute("sorttable_customkey"); } else if (typeof node.textContent != 'undefined' && !hasInputs) { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20705][WEB-UI] The sort function can not be used in the master page when you use Firefox or Google Chrome.
Repository: spark Updated Branches: refs/heads/branch-2.1 95de4672e -> 62969e9be [SPARK-20705][WEB-UI] The sort function can not be used in the master page when you use Firefox or Google Chrome. ## What changes were proposed in this pull request? When you open the master page, when you use Firefox or Google Chrom, the console of Firefox or Google Chrome is wrong. But The IE is no problem. e.g. ![error](https://cloud.githubusercontent.com/assets/26266482/25946143/74467a5c-367c-11e7-8f9f-d3585b1aea88.png) My Firefox version is 48.0.2. My Google Chrome version is 49.0.2623.75 m. ## How was this patch tested? manual tests Please review http://spark.apache.org/contributing.html before opening a pull request. Author: guoxiaolong Author: éå°é¾ 10207633 Author: guoxiaolongzte Closes #17952 from guoxiaolongzte/SPARK-20705. (cherry picked from commit 99d5799927301e7dfceb9405e2829af3433f104b) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/62969e9b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/62969e9b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/62969e9b Branch: refs/heads/branch-2.1 Commit: 62969e9be01b55e1cf5ab2c9b273a9e08d6bc2c3 Parents: 95de467 Author: guoxiaolong Authored: Mon May 15 07:51:50 2017 +0100 Committer: Sean Owen Committed: Mon May 15 07:52:09 2017 +0100 -- core/src/main/resources/org/apache/spark/ui/static/sorttable.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/62969e9b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js -- diff --git a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js index ff24147..9960d5c 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/sorttable.js +++ b/core/src/main/resources/org/apache/spark/ui/static/sorttable.js @@ -207,8 +207,8 @@ sorttable = { hasInputs = (typeof node.getElementsByTagName == 'function') && node.getElementsByTagName('input').length; - -if (node.getAttribute("sorttable_customkey") != null) { + +if (node.nodeType == 1 && node.getAttribute("sorttable_customkey") != null) { return node.getAttribute("sorttable_customkey"); } else if (typeof node.textContent != 'undefined' && !hasInputs) { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20720][WEB-UI] Executor Summary' should show the exact number, 'Removed Executors' should display the specific number, in the Application Page
Repository: spark Updated Branches: refs/heads/master 99d579992 -> 8da6e8b1f [SPARK-20720][WEB-UI] Executor Summary' should show the exact number, 'Removed Executors' should display the specific number, in the Application Page ## What changes were proposed in this pull request? When the number of spark worker executors is large, if the specific number is displayed, will better help us to analyze and observe by spark ui. Although this is a small improvement, but it is indeed very valuable. After fix: ![executor1](https://cloud.githubusercontent.com/assets/26266482/25986597/2d8e4386-3723-11e7-9c24-e5bff17c26e2.png) ## How was this patch tested? manual tests Please review http://spark.apache.org/contributing.html before opening a pull request. Author: guoxiaolong Author: éå°é¾ 10207633 Author: guoxiaolongzte Closes #17961 from guoxiaolongzte/SPARK-20720. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8da6e8b1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8da6e8b1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8da6e8b1 Branch: refs/heads/master Commit: 8da6e8b1f3e8f40cbacd323904d73fa0cb962689 Parents: 99d5799 Author: guoxiaolong Authored: Mon May 15 07:53:57 2017 +0100 Committer: Sean Owen Committed: Mon May 15 07:53:57 2017 +0100 -- .../apache/spark/deploy/master/ui/ApplicationPage.scala | 4 ++-- .../org/apache/spark/deploy/master/ui/MasterPage.scala| 10 +- 2 files changed, 7 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8da6e8b1/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala index 94ff81c..f408964 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala @@ -100,11 +100,11 @@ private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app") - Executor Summary + Executor Summary ({allExecutors.length}) {executorsTable} { if (removedExecutors.nonEmpty) { - Removed Executors ++ + Removed Executors ({removedExecutors.length}) ++ removedExecutorsTable } } http://git-wip-us.apache.org/repos/asf/spark/blob/8da6e8b1/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala index ce71300..bc0bf6a 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala @@ -128,14 +128,14 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") { - Workers + Workers ({workers.length}) {workerTable} - Running Applications + Running Applications ({activeApps.length}) {activeAppsTable} @@ -144,7 +144,7 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") { {if (hasDrivers) { - Running Drivers + Running Drivers ({activeDrivers.length}) {activeDriversTable} @@ -154,7 +154,7 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") { - Completed Applications + Completed Applications ({completedApps.length}) {completedAppsTable} @@ -164,7 +164,7 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") { if (hasDrivers) { - Completed Drivers + Completed Drivers ({completedDrivers.length}) {completedDriversTable} - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20769][DOC] Incorrect documentation for using Jupyter notebook
Repository: spark Updated Branches: refs/heads/master 30e0557db -> 199541769 [SPARK-20769][DOC] Incorrect documentation for using Jupyter notebook ## What changes were proposed in this pull request? SPARK-13973 incorrectly removed the required PYSPARK_DRIVER_PYTHON_OPTS=notebook from documentation to use pyspark with Jupyter notebook. This patch corrects the documentation error. ## How was this patch tested? Tested invocation locally with ```bash PYSPARK_DRIVER_PYTHON=jupyter PYSPARK_DRIVER_PYTHON_OPTS=notebook ./bin/pyspark ``` Author: Andrew Ray Closes #18001 from aray/patch-1. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/19954176 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/19954176 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/19954176 Branch: refs/heads/master Commit: 1995417696a028f8a4fa7f706a77537c7182528d Parents: 30e0557 Author: Andrew Ray Authored: Wed May 17 10:06:01 2017 +0100 Committer: Sean Owen Committed: Wed May 17 10:06:01 2017 +0100 -- docs/rdd-programming-guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/19954176/docs/rdd-programming-guide.md -- diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md index 52e59df..483acd3 100644 --- a/docs/rdd-programming-guide.md +++ b/docs/rdd-programming-guide.md @@ -247,7 +247,7 @@ $ PYSPARK_DRIVER_PYTHON=ipython ./bin/pyspark To use the Jupyter notebook (previously known as the IPython notebook), {% highlight bash %} -$ PYSPARK_DRIVER_PYTHON=jupyter ./bin/pyspark +$ PYSPARK_DRIVER_PYTHON=jupyter PYSPARK_DRIVER_PYTHON_OPTS=notebook ./bin/pyspark {% endhighlight %} You can customize the `ipython` or `jupyter` commands by setting `PYSPARK_DRIVER_PYTHON_OPTS`. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20769][DOC] Incorrect documentation for using Jupyter notebook
Repository: spark Updated Branches: refs/heads/branch-2.2 d42c67a1f -> dac0b50b6 [SPARK-20769][DOC] Incorrect documentation for using Jupyter notebook ## What changes were proposed in this pull request? SPARK-13973 incorrectly removed the required PYSPARK_DRIVER_PYTHON_OPTS=notebook from documentation to use pyspark with Jupyter notebook. This patch corrects the documentation error. ## How was this patch tested? Tested invocation locally with ```bash PYSPARK_DRIVER_PYTHON=jupyter PYSPARK_DRIVER_PYTHON_OPTS=notebook ./bin/pyspark ``` Author: Andrew Ray Closes #18001 from aray/patch-1. (cherry picked from commit 1995417696a028f8a4fa7f706a77537c7182528d) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dac0b50b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dac0b50b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dac0b50b Branch: refs/heads/branch-2.2 Commit: dac0b50b68d18c95a9968bc90a013396a42cc526 Parents: d42c67a Author: Andrew Ray Authored: Wed May 17 10:06:01 2017 +0100 Committer: Sean Owen Committed: Wed May 17 10:06:08 2017 +0100 -- docs/rdd-programming-guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/dac0b50b/docs/rdd-programming-guide.md -- diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md index e2bf2d7..f7cfd5f 100644 --- a/docs/rdd-programming-guide.md +++ b/docs/rdd-programming-guide.md @@ -247,7 +247,7 @@ $ PYSPARK_DRIVER_PYTHON=ipython ./bin/pyspark To use the Jupyter notebook (previously known as the IPython notebook), {% highlight bash %} -$ PYSPARK_DRIVER_PYTHON=jupyter ./bin/pyspark +$ PYSPARK_DRIVER_PYTHON=jupyter PYSPARK_DRIVER_PYTHON_OPTS=notebook ./bin/pyspark {% endhighlight %} You can customize the `ipython` or `jupyter` commands by setting `PYSPARK_DRIVER_PYTHON_OPTS`. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20769][DOC] Incorrect documentation for using Jupyter notebook
Repository: spark Updated Branches: refs/heads/branch-2.1 14b6a9d34 -> ba35c6bd3 [SPARK-20769][DOC] Incorrect documentation for using Jupyter notebook ## What changes were proposed in this pull request? SPARK-13973 incorrectly removed the required PYSPARK_DRIVER_PYTHON_OPTS=notebook from documentation to use pyspark with Jupyter notebook. This patch corrects the documentation error. ## How was this patch tested? Tested invocation locally with ```bash PYSPARK_DRIVER_PYTHON=jupyter PYSPARK_DRIVER_PYTHON_OPTS=notebook ./bin/pyspark ``` Author: Andrew Ray Closes #18001 from aray/patch-1. (cherry picked from commit 1995417696a028f8a4fa7f706a77537c7182528d) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ba35c6bd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ba35c6bd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ba35c6bd Branch: refs/heads/branch-2.1 Commit: ba35c6bd320cda6fbb8ff92a6779986cdc464267 Parents: 14b6a9d Author: Andrew Ray Authored: Wed May 17 10:06:01 2017 +0100 Committer: Sean Owen Committed: Wed May 17 10:06:19 2017 +0100 -- docs/programming-guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ba35c6bd/docs/programming-guide.md -- diff --git a/docs/programming-guide.md b/docs/programming-guide.md index 353730c..b9c7a95 100644 --- a/docs/programming-guide.md +++ b/docs/programming-guide.md @@ -247,7 +247,7 @@ $ PYSPARK_DRIVER_PYTHON=ipython ./bin/pyspark To use the Jupyter notebook (previously known as the IPython notebook), {% highlight bash %} -$ PYSPARK_DRIVER_PYTHON=jupyter ./bin/pyspark +$ PYSPARK_DRIVER_PYTHON=jupyter PYSPARK_DRIVER_PYTHON_OPTS=notebook ./bin/pyspark {% endhighlight %} You can customize the `ipython` or `jupyter` commands by setting `PYSPARK_DRIVER_PYTHON_OPTS`. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [INFRA] Close stale PRs
Repository: spark Updated Branches: refs/heads/master b7aac15d5 -> 5d2750aa2 [INFRA] Close stale PRs ## What changes were proposed in this pull request? This PR proposes to close PRs ... - inactive to the review comments more than a month - WIP and inactive more than a month - with Jenkins build failure but inactive more than a month - suggested to be closed and no comment against that - obviously looking inappropriate (e.g., Branch 0.5) To make sure, I left a comment for each PR about a week ago and I could not have a response back from the author in these PRs below: Closes #11129 Closes #12085 Closes #12162 Closes #12419 Closes #12420 Closes #12491 Closes #13762 Closes #13837 Closes #13851 Closes #13881 Closes #13891 Closes #13959 Closes #14091 Closes #14481 Closes #14547 Closes #14557 Closes #14686 Closes #15594 Closes #15652 Closes #15850 Closes #15914 Closes #15918 Closes #16285 Closes #16389 Closes #16652 Closes #16743 Closes #16893 Closes #16975 Closes #17001 Closes #17088 Closes #17119 Closes #17272 Closes #17971 Added: Closes #17778 Closes #17303 Closes #17872 ## How was this patch tested? N/A Author: hyukjinkwon Closes #18017 from HyukjinKwon/close-inactive-prs. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5d2750aa Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5d2750aa Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5d2750aa Branch: refs/heads/master Commit: 5d2750aa2d5407589ee825950df96f08cfed73c1 Parents: b7aac15 Author: hyukjinkwon Authored: Thu May 18 08:58:23 2017 +0100 Committer: Sean Owen Committed: Thu May 18 08:58:23 2017 +0100 -- -- - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20779][EXAMPLES] The ASF header placed in an incorrect location in some files.
Repository: spark Updated Branches: refs/heads/master 5d2750aa2 -> 4779b86b5 [SPARK-20779][EXAMPLES] The ASF header placed in an incorrect location in some files. ## What changes were proposed in this pull request? The license is not at the top in some files. and it will be best if we update these places of the ASF header to be consistent with other files. ## How was this patch tested? manual tests Author: zuotingbing Closes #18012 from zuotingbing/spark-license. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4779b86b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4779b86b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4779b86b Branch: refs/heads/master Commit: 4779b86b5a83beab98cdef191d4174bea2de34fe Parents: 5d2750a Author: zuotingbing Authored: Thu May 18 17:28:14 2017 +0100 Committer: Sean Owen Committed: Thu May 18 17:28:14 2017 +0100 -- examples/src/main/python/parquet_inputformat.py | 3 ++- examples/src/main/python/pi.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4779b86b/examples/src/main/python/parquet_inputformat.py -- diff --git a/examples/src/main/python/parquet_inputformat.py b/examples/src/main/python/parquet_inputformat.py index 29a1ac2..52e9662 100644 --- a/examples/src/main/python/parquet_inputformat.py +++ b/examples/src/main/python/parquet_inputformat.py @@ -1,4 +1,3 @@ -from __future__ import print_function # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with @@ -16,6 +15,8 @@ from __future__ import print_function # limitations under the License. # +from __future__ import print_function + import sys from pyspark.sql import SparkSession http://git-wip-us.apache.org/repos/asf/spark/blob/4779b86b/examples/src/main/python/pi.py -- diff --git a/examples/src/main/python/pi.py b/examples/src/main/python/pi.py index 37029b7..5839cc2 100755 --- a/examples/src/main/python/pi.py +++ b/examples/src/main/python/pi.py @@ -1,4 +1,3 @@ -from __future__ import print_function # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with @@ -16,6 +15,8 @@ from __future__ import print_function # limitations under the License. # +from __future__ import print_function + import sys from random import random from operator import add - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20796] the location of start-master.sh in spark-standalone.md is wrong
Repository: spark Updated Branches: refs/heads/branch-2.2 c708b1480 -> db821fe55 [SPARK-20796] the location of start-master.sh in spark-standalone.md is wrong [https://issues.apache.org/jira/browse/SPARK-20796](https://issues.apache.org/jira/browse/SPARK-20796) the location of start-master.sh in spark-standalone.md should be "sbin/start-master.sh" rather than "bin/start-master.sh". Author: liuzhaokun Closes #18027 from liu-zhaokun/sbin. (cherry picked from commit 99452df44fb98c2721d427da4c97f549793615fe) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/db821fe5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/db821fe5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/db821fe5 Branch: refs/heads/branch-2.2 Commit: db821fe55c99e29dc246c2c3156a1fff3a7ec2a5 Parents: c708b14 Author: liuzhaokun Authored: Thu May 18 17:44:40 2017 +0100 Committer: Sean Owen Committed: Thu May 18 17:44:48 2017 +0100 -- docs/spark-standalone.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/db821fe5/docs/spark-standalone.md -- diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md index 34ced9e..edefbef 100644 --- a/docs/spark-standalone.md +++ b/docs/spark-standalone.md @@ -83,7 +83,7 @@ Once you've set up this file, you can launch or stop your cluster with the follo - `sbin/start-slaves.sh` - Starts a slave instance on each machine specified in the `conf/slaves` file. - `sbin/start-slave.sh` - Starts a slave instance on the machine the script is executed on. - `sbin/start-all.sh` - Starts both a master and a number of slaves as described above. -- `sbin/stop-master.sh` - Stops the master that was started via the `bin/start-master.sh` script. +- `sbin/stop-master.sh` - Stops the master that was started via the `sbin/start-master.sh` script. - `sbin/stop-slaves.sh` - Stops all slave instances on the machines specified in the `conf/slaves` file. - `sbin/stop-all.sh` - Stops both the master and the slaves as described above. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20796] the location of start-master.sh in spark-standalone.md is wrong
Repository: spark Updated Branches: refs/heads/master 4779b86b5 -> 99452df44 [SPARK-20796] the location of start-master.sh in spark-standalone.md is wrong [https://issues.apache.org/jira/browse/SPARK-20796](https://issues.apache.org/jira/browse/SPARK-20796) the location of start-master.sh in spark-standalone.md should be "sbin/start-master.sh" rather than "bin/start-master.sh". Author: liuzhaokun Closes #18027 from liu-zhaokun/sbin. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/99452df4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/99452df4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/99452df4 Branch: refs/heads/master Commit: 99452df44fb98c2721d427da4c97f549793615fe Parents: 4779b86 Author: liuzhaokun Authored: Thu May 18 17:44:40 2017 +0100 Committer: Sean Owen Committed: Thu May 18 17:44:40 2017 +0100 -- docs/spark-standalone.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/99452df4/docs/spark-standalone.md -- diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md index 34ced9e..edefbef 100644 --- a/docs/spark-standalone.md +++ b/docs/spark-standalone.md @@ -83,7 +83,7 @@ Once you've set up this file, you can launch or stop your cluster with the follo - `sbin/start-slaves.sh` - Starts a slave instance on each machine specified in the `conf/slaves` file. - `sbin/start-slave.sh` - Starts a slave instance on the machine the script is executed on. - `sbin/start-all.sh` - Starts both a master and a number of slaves as described above. -- `sbin/stop-master.sh` - Stops the master that was started via the `bin/start-master.sh` script. +- `sbin/stop-master.sh` - Stops the master that was started via the `sbin/start-master.sh` script. - `sbin/stop-slaves.sh` - Stops all slave instances on the machines specified in the `conf/slaves` file. - `sbin/stop-all.sh` - Stops both the master and the slaves as described above. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20796] the location of start-master.sh in spark-standalone.md is wrong
Repository: spark Updated Branches: refs/heads/branch-2.1 ba35c6bd3 -> e06d9367f [SPARK-20796] the location of start-master.sh in spark-standalone.md is wrong [https://issues.apache.org/jira/browse/SPARK-20796](https://issues.apache.org/jira/browse/SPARK-20796) the location of start-master.sh in spark-standalone.md should be "sbin/start-master.sh" rather than "bin/start-master.sh". Author: liuzhaokun Closes #18027 from liu-zhaokun/sbin. (cherry picked from commit 99452df44fb98c2721d427da4c97f549793615fe) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e06d9367 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e06d9367 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e06d9367 Branch: refs/heads/branch-2.1 Commit: e06d9367f619c1b66c8778af61725a069ba7eb71 Parents: ba35c6b Author: liuzhaokun Authored: Thu May 18 17:44:40 2017 +0100 Committer: Sean Owen Committed: Thu May 18 17:44:58 2017 +0100 -- docs/spark-standalone.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e06d9367/docs/spark-standalone.md -- diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md index 1c0b60f..084bd70 100644 --- a/docs/spark-standalone.md +++ b/docs/spark-standalone.md @@ -83,7 +83,7 @@ Once you've set up this file, you can launch or stop your cluster with the follo - `sbin/start-slaves.sh` - Starts a slave instance on each machine specified in the `conf/slaves` file. - `sbin/start-slave.sh` - Starts a slave instance on the machine the script is executed on. - `sbin/start-all.sh` - Starts both a master and a number of slaves as described above. -- `sbin/stop-master.sh` - Stops the master that was started via the `bin/start-master.sh` script. +- `sbin/stop-master.sh` - Stops the master that was started via the `sbin/start-master.sh` script. - `sbin/stop-slaves.sh` - Stops all slave instances on the machines specified in the `conf/slaves` file. - `sbin/stop-all.sh` - Stops both the master and the slaves as described above. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20607][CORE] Add new unit tests to ShuffleSuite
Repository: spark Updated Branches: refs/heads/master 3f2cd51ee -> f398640da [SPARK-20607][CORE] Add new unit tests to ShuffleSuite ## What changes were proposed in this pull request? This PR update to two: 1.adds the new unit tests. testing would be performed when there is no shuffle stage, shuffle will not generate the data file and the index files. 2.Modify the '[SPARK-4085] rerun map stage if reduce stage cannot find its local shuffle file' unit test, parallelize is 1 but not is 2, Check the index file and delete. ## How was this patch tested? The new unit test. Author: caoxuewen Closes #17868 from heary-cao/ShuffleSuite. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f398640d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f398640d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f398640d Branch: refs/heads/master Commit: f398640daa2ba8033f9a31c8f71cad39924e5eac Parents: 3f2cd51 Author: caoxuewen Authored: Fri May 19 15:25:03 2017 +0100 Committer: Sean Owen Committed: Fri May 19 15:25:03 2017 +0100 -- .../scala/org/apache/spark/ShuffleSuite.scala | 30 ++-- 1 file changed, 28 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f398640d/core/src/test/scala/org/apache/spark/ShuffleSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala index 58b8659..622f798 100644 --- a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala +++ b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala @@ -28,7 +28,7 @@ import org.apache.spark.rdd.{CoGroupedRDD, OrderedRDDFunctions, RDD, ShuffledRDD import org.apache.spark.scheduler.{MapStatus, MyRDD, SparkListener, SparkListenerTaskEnd} import org.apache.spark.serializer.KryoSerializer import org.apache.spark.shuffle.ShuffleWriter -import org.apache.spark.storage.{ShuffleBlockId, ShuffleDataBlockId} +import org.apache.spark.storage.{ShuffleBlockId, ShuffleDataBlockId, ShuffleIndexBlockId} import org.apache.spark.util.{MutablePair, Utils} abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkContext { @@ -277,7 +277,8 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkC // Delete one of the local shuffle blocks. val hashFile = sc.env.blockManager.diskBlockManager.getFile(new ShuffleBlockId(0, 0, 0)) val sortFile = sc.env.blockManager.diskBlockManager.getFile(new ShuffleDataBlockId(0, 0, 0)) -assert(hashFile.exists() || sortFile.exists()) +val indexFile = sc.env.blockManager.diskBlockManager.getFile(new ShuffleIndexBlockId(0, 0, 0)) +assert(hashFile.exists() || (sortFile.exists() && indexFile.exists())) if (hashFile.exists()) { hashFile.delete() @@ -285,11 +286,36 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalSparkC if (sortFile.exists()) { sortFile.delete() } +if (indexFile.exists()) { + indexFile.delete() +} // This count should retry the execution of the previous stage and rerun shuffle. rdd.count() } + test("cannot find its local shuffle file if no execution of the stage and rerun shuffle") { +sc = new SparkContext("local", "test", conf.clone()) +val rdd = sc.parallelize(1 to 10, 1).map((_, 1)).reduceByKey(_ + _) + +// Cannot find one of the local shuffle blocks. +val hashFile = sc.env.blockManager.diskBlockManager.getFile(new ShuffleBlockId(0, 0, 0)) +val sortFile = sc.env.blockManager.diskBlockManager.getFile(new ShuffleDataBlockId(0, 0, 0)) +val indexFile = sc.env.blockManager.diskBlockManager.getFile(new ShuffleIndexBlockId(0, 0, 0)) +assert(!hashFile.exists() && !sortFile.exists() && !indexFile.exists()) + +rdd.count() + +// Can find one of the local shuffle blocks. +val hashExistsFile = sc.env.blockManager.diskBlockManager + .getFile(new ShuffleBlockId(0, 0, 0)) +val sortExistsFile = sc.env.blockManager.diskBlockManager + .getFile(new ShuffleDataBlockId(0, 0, 0)) +val indexExistsFile = sc.env.blockManager.diskBlockManager + .getFile(new ShuffleIndexBlockId(0, 0, 0)) +assert(hashExistsFile.exists() || (sortExistsFile.exists() && indexExistsFile.exists())) + } + test("metrics for shuffle without aggregation") { sc = new SparkContext("local", "test", conf.clone()) val numRecords = 1 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20759] SCALA_VERSION in _config.yml should be consistent with pom.xml
Repository: spark Updated Branches: refs/heads/master f398640da -> dba2ca2c1 [SPARK-20759] SCALA_VERSION in _config.yml should be consistent with pom.xml [https://issues.apache.org/jira/browse/SPARK-20759](https://issues.apache.org/jira/browse/SPARK-20759) SCALA_VERSION in _config.yml is 2.11.7, but 2.11.8 in pom.xml. So I think SCALA_VERSION in _config.yml should be consistent with pom.xml. Author: liuzhaokun Closes #17992 from liu-zhaokun/new. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dba2ca2c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dba2ca2c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dba2ca2c Branch: refs/heads/master Commit: dba2ca2c129b6d2597f1707e0315d4e238c40ed6 Parents: f398640 Author: liuzhaokun Authored: Fri May 19 15:26:39 2017 +0100 Committer: Sean Owen Committed: Fri May 19 15:26:39 2017 +0100 -- LICENSE| 10 +- docs/_config.yml | 2 +- external/docker/spark-test/base/Dockerfile | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/dba2ca2c/LICENSE -- diff --git a/LICENSE b/LICENSE index c21032a..66a2e8f 100644 --- a/LICENSE +++ b/LICENSE @@ -249,11 +249,11 @@ The text of each license is also included at licenses/LICENSE-[project].txt. (Interpreter classes (all .scala files in repl/src/main/scala except for Main.Scala, SparkHelper.scala and ExecutorClassLoader.scala), and for SerializableMapWrapper in JavaUtils.scala) - (BSD-like) Scala Actors library (org.scala-lang:scala-actors:2.11.7 - http://www.scala-lang.org/) - (BSD-like) Scala Compiler (org.scala-lang:scala-compiler:2.11.7 - http://www.scala-lang.org/) - (BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.11.7 - http://www.scala-lang.org/) - (BSD-like) Scala Library (org.scala-lang:scala-library:2.11.7 - http://www.scala-lang.org/) - (BSD-like) Scalap (org.scala-lang:scalap:2.11.7 - http://www.scala-lang.org/) + (BSD-like) Scala Actors library (org.scala-lang:scala-actors:2.11.8 - http://www.scala-lang.org/) + (BSD-like) Scala Compiler (org.scala-lang:scala-compiler:2.11.8 - http://www.scala-lang.org/) + (BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.11.8 - http://www.scala-lang.org/) + (BSD-like) Scala Library (org.scala-lang:scala-library:2.11.8 - http://www.scala-lang.org/) + (BSD-like) Scalap (org.scala-lang:scalap:2.11.8 - http://www.scala-lang.org/) (BSD-style) scalacheck (org.scalacheck:scalacheck_2.11:1.10.0 - http://www.scalacheck.org) (BSD-style) spire (org.spire-math:spire_2.11:0.7.1 - http://spire-math.org) (BSD-style) spire-macros (org.spire-math:spire-macros_2.11:0.7.1 - http://spire-math.org) http://git-wip-us.apache.org/repos/asf/spark/blob/dba2ca2c/docs/_config.yml -- diff --git a/docs/_config.yml b/docs/_config.yml index 21255ef..dcc2112 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -17,7 +17,7 @@ include: SPARK_VERSION: 2.3.0-SNAPSHOT SPARK_VERSION_SHORT: 2.3.0 SCALA_BINARY_VERSION: "2.11" -SCALA_VERSION: "2.11.7" +SCALA_VERSION: "2.11.8" MESOS_VERSION: 1.0.0 SPARK_ISSUE_TRACKER_URL: https://issues.apache.org/jira/browse/SPARK SPARK_GITHUB_URL: https://github.com/apache/spark http://git-wip-us.apache.org/repos/asf/spark/blob/dba2ca2c/external/docker/spark-test/base/Dockerfile -- diff --git a/external/docker/spark-test/base/Dockerfile b/external/docker/spark-test/base/Dockerfile index 76f550f..5a95a93 100644 --- a/external/docker/spark-test/base/Dockerfile +++ b/external/docker/spark-test/base/Dockerfile @@ -25,7 +25,7 @@ RUN apt-get update && \ apt-get install -y less openjdk-7-jre-headless net-tools vim-tiny sudo openssh-server && \ rm -rf /var/lib/apt/lists/* -ENV SCALA_VERSION 2.11.7 +ENV SCALA_VERSION 2.11.8 ENV CDH_VERSION cdh4 ENV SCALA_HOME /opt/scala-$SCALA_VERSION ENV SPARK_HOME /opt/spark - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20759] SCALA_VERSION in _config.yml should be consistent with pom.xml
Repository: spark Updated Branches: refs/heads/branch-2.2 939b9536f -> 001b82c18 [SPARK-20759] SCALA_VERSION in _config.yml should be consistent with pom.xml [https://issues.apache.org/jira/browse/SPARK-20759](https://issues.apache.org/jira/browse/SPARK-20759) SCALA_VERSION in _config.yml is 2.11.7, but 2.11.8 in pom.xml. So I think SCALA_VERSION in _config.yml should be consistent with pom.xml. Author: liuzhaokun Closes #17992 from liu-zhaokun/new. (cherry picked from commit dba2ca2c129b6d2597f1707e0315d4e238c40ed6) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/001b82c1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/001b82c1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/001b82c1 Branch: refs/heads/branch-2.2 Commit: 001b82c18cd6518e9e6ae2e6f6d0de3dbc639943 Parents: 939b953 Author: liuzhaokun Authored: Fri May 19 15:26:39 2017 +0100 Committer: Sean Owen Committed: Fri May 19 15:26:47 2017 +0100 -- LICENSE| 10 +- docs/_config.yml | 2 +- external/docker/spark-test/base/Dockerfile | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/001b82c1/LICENSE -- diff --git a/LICENSE b/LICENSE index c21032a..66a2e8f 100644 --- a/LICENSE +++ b/LICENSE @@ -249,11 +249,11 @@ The text of each license is also included at licenses/LICENSE-[project].txt. (Interpreter classes (all .scala files in repl/src/main/scala except for Main.Scala, SparkHelper.scala and ExecutorClassLoader.scala), and for SerializableMapWrapper in JavaUtils.scala) - (BSD-like) Scala Actors library (org.scala-lang:scala-actors:2.11.7 - http://www.scala-lang.org/) - (BSD-like) Scala Compiler (org.scala-lang:scala-compiler:2.11.7 - http://www.scala-lang.org/) - (BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.11.7 - http://www.scala-lang.org/) - (BSD-like) Scala Library (org.scala-lang:scala-library:2.11.7 - http://www.scala-lang.org/) - (BSD-like) Scalap (org.scala-lang:scalap:2.11.7 - http://www.scala-lang.org/) + (BSD-like) Scala Actors library (org.scala-lang:scala-actors:2.11.8 - http://www.scala-lang.org/) + (BSD-like) Scala Compiler (org.scala-lang:scala-compiler:2.11.8 - http://www.scala-lang.org/) + (BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.11.8 - http://www.scala-lang.org/) + (BSD-like) Scala Library (org.scala-lang:scala-library:2.11.8 - http://www.scala-lang.org/) + (BSD-like) Scalap (org.scala-lang:scalap:2.11.8 - http://www.scala-lang.org/) (BSD-style) scalacheck (org.scalacheck:scalacheck_2.11:1.10.0 - http://www.scalacheck.org) (BSD-style) spire (org.spire-math:spire_2.11:0.7.1 - http://spire-math.org) (BSD-style) spire-macros (org.spire-math:spire-macros_2.11:0.7.1 - http://spire-math.org) http://git-wip-us.apache.org/repos/asf/spark/blob/001b82c1/docs/_config.yml -- diff --git a/docs/_config.yml b/docs/_config.yml index 4b35605..b61455e 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -17,7 +17,7 @@ include: SPARK_VERSION: 2.2.1-SNAPSHOT SPARK_VERSION_SHORT: 2.2.1 SCALA_BINARY_VERSION: "2.11" -SCALA_VERSION: "2.11.7" +SCALA_VERSION: "2.11.8" MESOS_VERSION: 1.0.0 SPARK_ISSUE_TRACKER_URL: https://issues.apache.org/jira/browse/SPARK SPARK_GITHUB_URL: https://github.com/apache/spark http://git-wip-us.apache.org/repos/asf/spark/blob/001b82c1/external/docker/spark-test/base/Dockerfile -- diff --git a/external/docker/spark-test/base/Dockerfile b/external/docker/spark-test/base/Dockerfile index 76f550f..5a95a93 100644 --- a/external/docker/spark-test/base/Dockerfile +++ b/external/docker/spark-test/base/Dockerfile @@ -25,7 +25,7 @@ RUN apt-get update && \ apt-get install -y less openjdk-7-jre-headless net-tools vim-tiny sudo openssh-server && \ rm -rf /var/lib/apt/lists/* -ENV SCALA_VERSION 2.11.7 +ENV SCALA_VERSION 2.11.8 ENV CDH_VERSION cdh4 ENV SCALA_HOME /opt/scala-$SCALA_VERSION ENV SPARK_HOME /opt/spark - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20759] SCALA_VERSION in _config.yml should be consistent with pom.xml
Repository: spark Updated Branches: refs/heads/branch-2.1 e326de485 -> c53fe793d [SPARK-20759] SCALA_VERSION in _config.yml should be consistent with pom.xml [https://issues.apache.org/jira/browse/SPARK-20759](https://issues.apache.org/jira/browse/SPARK-20759) SCALA_VERSION in _config.yml is 2.11.7, but 2.11.8 in pom.xml. So I think SCALA_VERSION in _config.yml should be consistent with pom.xml. Author: liuzhaokun Closes #17992 from liu-zhaokun/new. (cherry picked from commit dba2ca2c129b6d2597f1707e0315d4e238c40ed6) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c53fe793 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c53fe793 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c53fe793 Branch: refs/heads/branch-2.1 Commit: c53fe793dbe17a3598b0466ec644130cc251c30f Parents: e326de4 Author: liuzhaokun Authored: Fri May 19 15:26:39 2017 +0100 Committer: Sean Owen Committed: Fri May 19 15:27:48 2017 +0100 -- LICENSE| 10 +- docs/_config.yml | 2 +- external/docker/spark-test/base/Dockerfile | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c53fe793/LICENSE -- diff --git a/LICENSE b/LICENSE index 7950dd6..119ecbe 100644 --- a/LICENSE +++ b/LICENSE @@ -249,11 +249,11 @@ The text of each license is also included at licenses/LICENSE-[project].txt. (Interpreter classes (all .scala files in repl/src/main/scala except for Main.Scala, SparkHelper.scala and ExecutorClassLoader.scala), and for SerializableMapWrapper in JavaUtils.scala) - (BSD-like) Scala Actors library (org.scala-lang:scala-actors:2.11.7 - http://www.scala-lang.org/) - (BSD-like) Scala Compiler (org.scala-lang:scala-compiler:2.11.7 - http://www.scala-lang.org/) - (BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.11.7 - http://www.scala-lang.org/) - (BSD-like) Scala Library (org.scala-lang:scala-library:2.11.7 - http://www.scala-lang.org/) - (BSD-like) Scalap (org.scala-lang:scalap:2.11.7 - http://www.scala-lang.org/) + (BSD-like) Scala Actors library (org.scala-lang:scala-actors:2.11.8 - http://www.scala-lang.org/) + (BSD-like) Scala Compiler (org.scala-lang:scala-compiler:2.11.8 - http://www.scala-lang.org/) + (BSD-like) Scala Compiler (org.scala-lang:scala-reflect:2.11.8 - http://www.scala-lang.org/) + (BSD-like) Scala Library (org.scala-lang:scala-library:2.11.8 - http://www.scala-lang.org/) + (BSD-like) Scalap (org.scala-lang:scalap:2.11.8 - http://www.scala-lang.org/) (BSD-style) scalacheck (org.scalacheck:scalacheck_2.11:1.10.0 - http://www.scalacheck.org) (BSD-style) spire (org.spire-math:spire_2.11:0.7.1 - http://spire-math.org) (BSD-style) spire-macros (org.spire-math:spire-macros_2.11:0.7.1 - http://spire-math.org) http://git-wip-us.apache.org/repos/asf/spark/blob/c53fe793/docs/_config.yml -- diff --git a/docs/_config.yml b/docs/_config.yml index e21d011..ee38a4c 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -17,7 +17,7 @@ include: SPARK_VERSION: 2.1.2-SNAPSHOT SPARK_VERSION_SHORT: 2.1.2 SCALA_BINARY_VERSION: "2.11" -SCALA_VERSION: "2.11.7" +SCALA_VERSION: "2.11.8" MESOS_VERSION: 1.0.0 SPARK_ISSUE_TRACKER_URL: https://issues.apache.org/jira/browse/SPARK SPARK_GITHUB_URL: https://github.com/apache/spark http://git-wip-us.apache.org/repos/asf/spark/blob/c53fe793/external/docker/spark-test/base/Dockerfile -- diff --git a/external/docker/spark-test/base/Dockerfile b/external/docker/spark-test/base/Dockerfile index 76f550f..5a95a93 100644 --- a/external/docker/spark-test/base/Dockerfile +++ b/external/docker/spark-test/base/Dockerfile @@ -25,7 +25,7 @@ RUN apt-get update && \ apt-get install -y less openjdk-7-jre-headless net-tools vim-tiny sudo openssh-server && \ rm -rf /var/lib/apt/lists/* -ENV SCALA_VERSION 2.11.7 +ENV SCALA_VERSION 2.11.8 ENV CDH_VERSION cdh4 ENV SCALA_HOME /opt/scala-$SCALA_VERSION ENV SPARK_HOME /opt/spark - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20781] the location of Dockerfile in docker.properties.templat is wrong
Repository: spark Updated Branches: refs/heads/branch-2.1 c53fe793d -> e9804b3d4 [SPARK-20781] the location of Dockerfile in docker.properties.templat is wrong [https://issues.apache.org/jira/browse/SPARK-20781](https://issues.apache.org/jira/browse/SPARK-20781) the location of Dockerfile in docker.properties.template should be "../external/docker/spark-mesos/Dockerfile" Author: liuzhaokun Closes #18013 from liu-zhaokun/dockerfile_location. (cherry picked from commit 749418d285461958a0f22ed355edafd87f1ee913) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e9804b3d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e9804b3d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e9804b3d Branch: refs/heads/branch-2.1 Commit: e9804b3d4442e4c353af4a171e8f3c1915104bd0 Parents: c53fe79 Author: liuzhaokun Authored: Fri May 19 20:47:30 2017 +0100 Committer: Sean Owen Committed: Fri May 19 20:47:48 2017 +0100 -- conf/docker.properties.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e9804b3d/conf/docker.properties.template -- diff --git a/conf/docker.properties.template b/conf/docker.properties.template index 55cb094..2ecb4f1 100644 --- a/conf/docker.properties.template +++ b/conf/docker.properties.template @@ -15,6 +15,6 @@ # limitations under the License. # -spark.mesos.executor.docker.image: +spark.mesos.executor.docker.image: spark.mesos.executor.docker.volumes: /usr/local/lib:/host/usr/local/lib:ro spark.mesos.executor.home: /opt/spark - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20781] the location of Dockerfile in docker.properties.templat is wrong
Repository: spark Updated Branches: refs/heads/master b5d8d9ba1 -> 749418d28 [SPARK-20781] the location of Dockerfile in docker.properties.templat is wrong [https://issues.apache.org/jira/browse/SPARK-20781](https://issues.apache.org/jira/browse/SPARK-20781) the location of Dockerfile in docker.properties.template should be "../external/docker/spark-mesos/Dockerfile" Author: liuzhaokun Closes #18013 from liu-zhaokun/dockerfile_location. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/749418d2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/749418d2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/749418d2 Branch: refs/heads/master Commit: 749418d285461958a0f22ed355edafd87f1ee913 Parents: b5d8d9b Author: liuzhaokun Authored: Fri May 19 20:47:30 2017 +0100 Committer: Sean Owen Committed: Fri May 19 20:47:30 2017 +0100 -- conf/docker.properties.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/749418d2/conf/docker.properties.template -- diff --git a/conf/docker.properties.template b/conf/docker.properties.template index 55cb094..2ecb4f1 100644 --- a/conf/docker.properties.template +++ b/conf/docker.properties.template @@ -15,6 +15,6 @@ # limitations under the License. # -spark.mesos.executor.docker.image: +spark.mesos.executor.docker.image: spark.mesos.executor.docker.volumes: /usr/local/lib:/host/usr/local/lib:ro spark.mesos.executor.home: /opt/spark - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20781] the location of Dockerfile in docker.properties.templat is wrong
Repository: spark Updated Branches: refs/heads/branch-2.2 4fcd52b48 -> 3aad5982a [SPARK-20781] the location of Dockerfile in docker.properties.templat is wrong [https://issues.apache.org/jira/browse/SPARK-20781](https://issues.apache.org/jira/browse/SPARK-20781) the location of Dockerfile in docker.properties.template should be "../external/docker/spark-mesos/Dockerfile" Author: liuzhaokun Closes #18013 from liu-zhaokun/dockerfile_location. (cherry picked from commit 749418d285461958a0f22ed355edafd87f1ee913) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3aad5982 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3aad5982 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3aad5982 Branch: refs/heads/branch-2.2 Commit: 3aad5982a80c300a6c86b876340da85c64cd6ac6 Parents: 4fcd52b Author: liuzhaokun Authored: Fri May 19 20:47:30 2017 +0100 Committer: Sean Owen Committed: Fri May 19 20:47:37 2017 +0100 -- conf/docker.properties.template | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3aad5982/conf/docker.properties.template -- diff --git a/conf/docker.properties.template b/conf/docker.properties.template index 55cb094..2ecb4f1 100644 --- a/conf/docker.properties.template +++ b/conf/docker.properties.template @@ -15,6 +15,6 @@ # limitations under the License. # -spark.mesos.executor.docker.image: +spark.mesos.executor.docker.image: spark.mesos.executor.docker.volumes: /usr/local/lib:/host/usr/local/lib:ro spark.mesos.executor.home: /opt/spark - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20806][DEPLOY] Launcher: redundant check for Spark lib dir
Repository: spark Updated Branches: refs/heads/master 749418d28 -> bbd8d7def [SPARK-20806][DEPLOY] Launcher: redundant check for Spark lib dir ## What changes were proposed in this pull request? Remove redundant check for libdir in CommandBuilderUtils ## How was this patch tested? Existing tests Author: Sean Owen Closes #18032 from srowen/SPARK-20806. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bbd8d7de Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bbd8d7de Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bbd8d7de Branch: refs/heads/master Commit: bbd8d7def149e787a33ed0decf332d33c593e93c Parents: 749418d Author: Sean Owen Authored: Sat May 20 15:27:13 2017 +0100 Committer: Sean Owen Committed: Sat May 20 15:27:13 2017 +0100 -- .../org/apache/spark/launcher/CommandBuilderUtils.java | 13 - 1 file changed, 4 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/bbd8d7de/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java -- diff --git a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java index e14c8aa..47d2f8e 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java +++ b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java @@ -335,22 +335,17 @@ class CommandBuilderUtils { */ static String findJarsDir(String sparkHome, String scalaVersion, boolean failIfNotFound) { // TODO: change to the correct directory once the assembly build is changed. -File libdir; -if (new File(sparkHome, "jars").isDirectory()) { - libdir = new File(sparkHome, "jars"); - checkState(!failIfNotFound || libdir.isDirectory(), -"Library directory '%s' does not exist.", -libdir.getAbsolutePath()); -} else { +File libdir = new File(sparkHome, "jars"); +if (!libdir.isDirectory()) { libdir = new File(sparkHome, String.format("assembly/target/scala-%s/jars", scalaVersion)); if (!libdir.isDirectory()) { checkState(!failIfNotFound, "Library directory '%s' does not exist; make sure Spark is built.", libdir.getAbsolutePath()); -libdir = null; +return null; } } -return libdir != null ? libdir.getAbsolutePath() : null; +return libdir.getAbsolutePath(); } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20687][MLLIB] mllib.Matrices.fromBreeze may crash when converting from Breeze sparse matrix
Repository: spark Updated Branches: refs/heads/master a2b3b6762 -> 06dda1d58 [SPARK-20687][MLLIB] mllib.Matrices.fromBreeze may crash when converting from Breeze sparse matrix ## What changes were proposed in this pull request? When two Breeze SparseMatrices are operated, the result matrix may contain provisional 0 values extra in rowIndices and data arrays. This causes an incoherence with the colPtrs data, but Breeze get away with this incoherence by keeping a counter of the valid data. In spark, when this matrices are converted to SparseMatrices, Sparks relies solely on rowIndices, data, and colPtrs, but these might be incorrect because of breeze internal hacks. Therefore, we need to slice both rowIndices and data, using their counter of active data This method is at least called by BlockMatrix when performing distributed block operations, causing exceptions on valid operations. See http://stackoverflow.com/questions/33528555/error-thrown-when-using-blockmatrix-add ## How was this patch tested? Added a test to MatricesSuite that verifies that the conversions are valid and that code doesn't crash. Originally the same code would crash on Spark. Bugfix for https://issues.apache.org/jira/browse/SPARK-20687 Author: Ignacio Bermudez Author: Ignacio Bermudez Corrales Closes #17940 from ghoto/bug-fix/SPARK-20687. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/06dda1d5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/06dda1d5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/06dda1d5 Branch: refs/heads/master Commit: 06dda1d58f8670e996921e935d5f5402d664699e Parents: a2b3b67 Author: Ignacio Bermudez Authored: Mon May 22 10:27:28 2017 +0100 Committer: Sean Owen Committed: Mon May 22 10:27:28 2017 +0100 -- .../apache/spark/mllib/linalg/Matrices.scala| 11 ++- .../spark/mllib/linalg/MatricesSuite.scala | 20 2 files changed, 30 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/06dda1d5/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index 6c39fe5..2b2b5fe 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -992,7 +992,16 @@ object Matrices { new DenseMatrix(dm.rows, dm.cols, dm.data, dm.isTranspose) case sm: BSM[Double] => // There is no isTranspose flag for sparse matrices in Breeze -new SparseMatrix(sm.rows, sm.cols, sm.colPtrs, sm.rowIndices, sm.data) +val nsm = if (sm.rowIndices.length > sm.activeSize) { + // This sparse matrix has trailing zeros. + // Remove them by compacting the matrix. + val csm = sm.copy + csm.compact() + csm +} else { + sm +} +new SparseMatrix(nsm.rows, nsm.cols, nsm.colPtrs, nsm.rowIndices, nsm.data) case _ => throw new UnsupportedOperationException( s"Do not support conversion from type ${breeze.getClass.getName}.") http://git-wip-us.apache.org/repos/asf/spark/blob/06dda1d5/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala index 5637569..93c00d8 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala @@ -513,6 +513,26 @@ class MatricesSuite extends SparkFunSuite { Matrices.fromBreeze(sum) } + test("Test FromBreeze when Breeze.CSCMatrix.rowIndices has trailing zeros. - SPARK-20687") { +// (2, 0, 0) +// (2, 0, 0) +val mat1Brz = Matrices.sparse(2, 3, Array(0, 2, 2, 2), Array(0, 1), Array(2, 2)).asBreeze +// (2, 1E-15, 1E-15) +// (2, 1E-15, 1E-15) +val mat2Brz = Matrices.sparse(2, 3, + Array(0, 2, 4, 6), + Array(0, 0, 0, 1, 1, 1), + Array(2, 1E-15, 1E-15, 2, 1E-15, 1E-15)).asBreeze +val t1Brz = mat1Brz - mat2Brz +val t2Brz = mat2Brz - mat1Brz +// The following operations raise exceptions on un-patch Matrices.fromBreeze +val t1 = Matrices.fromBreeze(t1Brz) +val t2 = Matrices.fromBreeze(t2Brz) +// t1 == t1Brz && t2 == t2Brz +assert((t1.asBreeze - t1Brz).iterator.map((x) => math.abs(x._2)).sum < 1E-15) +assert((t2.asBreeze - t2Brz).iterator.map((x) => math.abs(x._2
spark git commit: [SPARK-20687][MLLIB] mllib.Matrices.fromBreeze may crash when converting from Breeze sparse matrix
Repository: spark Updated Branches: refs/heads/branch-2.2 41d8d2165 -> af1ff8b00 [SPARK-20687][MLLIB] mllib.Matrices.fromBreeze may crash when converting from Breeze sparse matrix ## What changes were proposed in this pull request? When two Breeze SparseMatrices are operated, the result matrix may contain provisional 0 values extra in rowIndices and data arrays. This causes an incoherence with the colPtrs data, but Breeze get away with this incoherence by keeping a counter of the valid data. In spark, when this matrices are converted to SparseMatrices, Sparks relies solely on rowIndices, data, and colPtrs, but these might be incorrect because of breeze internal hacks. Therefore, we need to slice both rowIndices and data, using their counter of active data This method is at least called by BlockMatrix when performing distributed block operations, causing exceptions on valid operations. See http://stackoverflow.com/questions/33528555/error-thrown-when-using-blockmatrix-add ## How was this patch tested? Added a test to MatricesSuite that verifies that the conversions are valid and that code doesn't crash. Originally the same code would crash on Spark. Bugfix for https://issues.apache.org/jira/browse/SPARK-20687 Author: Ignacio Bermudez Author: Ignacio Bermudez Corrales Closes #17940 from ghoto/bug-fix/SPARK-20687. (cherry picked from commit 06dda1d58f8670e996921e935d5f5402d664699e) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/af1ff8b0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/af1ff8b0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/af1ff8b0 Branch: refs/heads/branch-2.2 Commit: af1ff8b00ac7271ddf4cef87013e591e46de79e9 Parents: 41d8d21 Author: Ignacio Bermudez Authored: Mon May 22 10:27:28 2017 +0100 Committer: Sean Owen Committed: Mon May 22 10:27:37 2017 +0100 -- .../apache/spark/mllib/linalg/Matrices.scala| 11 ++- .../spark/mllib/linalg/MatricesSuite.scala | 20 2 files changed, 30 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/af1ff8b0/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index 6c39fe5..2b2b5fe 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -992,7 +992,16 @@ object Matrices { new DenseMatrix(dm.rows, dm.cols, dm.data, dm.isTranspose) case sm: BSM[Double] => // There is no isTranspose flag for sparse matrices in Breeze -new SparseMatrix(sm.rows, sm.cols, sm.colPtrs, sm.rowIndices, sm.data) +val nsm = if (sm.rowIndices.length > sm.activeSize) { + // This sparse matrix has trailing zeros. + // Remove them by compacting the matrix. + val csm = sm.copy + csm.compact() + csm +} else { + sm +} +new SparseMatrix(nsm.rows, nsm.cols, nsm.colPtrs, nsm.rowIndices, nsm.data) case _ => throw new UnsupportedOperationException( s"Do not support conversion from type ${breeze.getClass.getName}.") http://git-wip-us.apache.org/repos/asf/spark/blob/af1ff8b0/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala index 5637569..93c00d8 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala @@ -513,6 +513,26 @@ class MatricesSuite extends SparkFunSuite { Matrices.fromBreeze(sum) } + test("Test FromBreeze when Breeze.CSCMatrix.rowIndices has trailing zeros. - SPARK-20687") { +// (2, 0, 0) +// (2, 0, 0) +val mat1Brz = Matrices.sparse(2, 3, Array(0, 2, 2, 2), Array(0, 1), Array(2, 2)).asBreeze +// (2, 1E-15, 1E-15) +// (2, 1E-15, 1E-15) +val mat2Brz = Matrices.sparse(2, 3, + Array(0, 2, 4, 6), + Array(0, 0, 0, 1, 1, 1), + Array(2, 1E-15, 1E-15, 2, 1E-15, 1E-15)).asBreeze +val t1Brz = mat1Brz - mat2Brz +val t2Brz = mat2Brz - mat1Brz +// The following operations raise exceptions on un-patch Matrices.fromBreeze +val t1 = Matrices.fromBreeze(t1Brz) +val t2 = Matrices.fromBreeze(t2Brz) +// t1 == t1Brz && t2 == t2Brz +assert((t1.asBreeze - t1Brz).iterator.map(
spark git commit: [SPARK-20687][MLLIB] mllib.Matrices.fromBreeze may crash when converting from Breeze sparse matrix
Repository: spark Updated Branches: refs/heads/branch-2.1 e9804b3d4 -> c3a986b19 [SPARK-20687][MLLIB] mllib.Matrices.fromBreeze may crash when converting from Breeze sparse matrix ## What changes were proposed in this pull request? When two Breeze SparseMatrices are operated, the result matrix may contain provisional 0 values extra in rowIndices and data arrays. This causes an incoherence with the colPtrs data, but Breeze get away with this incoherence by keeping a counter of the valid data. In spark, when this matrices are converted to SparseMatrices, Sparks relies solely on rowIndices, data, and colPtrs, but these might be incorrect because of breeze internal hacks. Therefore, we need to slice both rowIndices and data, using their counter of active data This method is at least called by BlockMatrix when performing distributed block operations, causing exceptions on valid operations. See http://stackoverflow.com/questions/33528555/error-thrown-when-using-blockmatrix-add ## How was this patch tested? Added a test to MatricesSuite that verifies that the conversions are valid and that code doesn't crash. Originally the same code would crash on Spark. Bugfix for https://issues.apache.org/jira/browse/SPARK-20687 Author: Ignacio Bermudez Author: Ignacio Bermudez Corrales Closes #17940 from ghoto/bug-fix/SPARK-20687. (cherry picked from commit 06dda1d58f8670e996921e935d5f5402d664699e) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c3a986b1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c3a986b1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c3a986b1 Branch: refs/heads/branch-2.1 Commit: c3a986b19616217b4b0d8e10e52fcc87fb5356ba Parents: e9804b3 Author: Ignacio Bermudez Authored: Mon May 22 10:27:28 2017 +0100 Committer: Sean Owen Committed: Mon May 22 10:27:48 2017 +0100 -- .../apache/spark/mllib/linalg/Matrices.scala| 11 ++- .../spark/mllib/linalg/MatricesSuite.scala | 20 2 files changed, 30 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c3a986b1/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index 6c39fe5..2b2b5fe 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -992,7 +992,16 @@ object Matrices { new DenseMatrix(dm.rows, dm.cols, dm.data, dm.isTranspose) case sm: BSM[Double] => // There is no isTranspose flag for sparse matrices in Breeze -new SparseMatrix(sm.rows, sm.cols, sm.colPtrs, sm.rowIndices, sm.data) +val nsm = if (sm.rowIndices.length > sm.activeSize) { + // This sparse matrix has trailing zeros. + // Remove them by compacting the matrix. + val csm = sm.copy + csm.compact() + csm +} else { + sm +} +new SparseMatrix(nsm.rows, nsm.cols, nsm.colPtrs, nsm.rowIndices, nsm.data) case _ => throw new UnsupportedOperationException( s"Do not support conversion from type ${breeze.getClass.getName}.") http://git-wip-us.apache.org/repos/asf/spark/blob/c3a986b1/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala index 5637569..93c00d8 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala @@ -513,6 +513,26 @@ class MatricesSuite extends SparkFunSuite { Matrices.fromBreeze(sum) } + test("Test FromBreeze when Breeze.CSCMatrix.rowIndices has trailing zeros. - SPARK-20687") { +// (2, 0, 0) +// (2, 0, 0) +val mat1Brz = Matrices.sparse(2, 3, Array(0, 2, 2, 2), Array(0, 1), Array(2, 2)).asBreeze +// (2, 1E-15, 1E-15) +// (2, 1E-15, 1E-15) +val mat2Brz = Matrices.sparse(2, 3, + Array(0, 2, 4, 6), + Array(0, 0, 0, 1, 1, 1), + Array(2, 1E-15, 1E-15, 2, 1E-15, 1E-15)).asBreeze +val t1Brz = mat1Brz - mat2Brz +val t2Brz = mat2Brz - mat1Brz +// The following operations raise exceptions on un-patch Matrices.fromBreeze +val t1 = Matrices.fromBreeze(t1Brz) +val t2 = Matrices.fromBreeze(t2Brz) +// t1 == t1Brz && t2 == t2Brz +assert((t1.asBreeze - t1Brz).iterator.map(
spark git commit: [SPARK-20591][WEB UI] Succeeded tasks num not equal in all jobs page and job detail page on spark web ui when speculative task(s) exist.
Repository: spark Updated Branches: refs/heads/master be846db48 -> 190d8b0b6 [SPARK-20591][WEB UI] Succeeded tasks num not equal in all jobs page and job detail page on spark web ui when speculative task(s) exist. ## What changes were proposed in this pull request? Modified succeeded num in job detail page from "completed = stageData.completedIndices.size" to "completed = stageData.numCompleteTasks",which making succeeded tasks num in all jobs page and job detail page look more consistent, and more easily to find which stages the speculative task(s) were in. ## How was this patch tested? manual tests Author: fjh100456 Closes #17923 from fjh100456/master. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/190d8b0b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/190d8b0b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/190d8b0b Branch: refs/heads/master Commit: 190d8b0b6393290349ab834457f9e916fdaef530 Parents: be846db Author: fjh100456 Authored: Mon May 22 13:58:42 2017 +0100 Committer: Sean Owen Committed: Mon May 22 13:58:42 2017 +0100 -- .../src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala | 3 ++- .../scala/org/apache/spark/ui/jobs/JobProgressListener.scala | 1 + core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala | 1 + core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala | 7 +-- 4 files changed, 5 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/190d8b0b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala -- diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala index a0fd29c..cce7a76 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala @@ -631,7 +631,8 @@ private[ui] class JobPagedTable( {if (job.numSkippedStages > 0) s"(${job.numSkippedStages} skipped)"} -{UIUtils.makeProgressBar(started = job.numActiveTasks, completed = job.numCompletedTasks, +{UIUtils.makeProgressBar(started = job.numActiveTasks, +completed = job.completedIndices.size, failed = job.numFailedTasks, skipped = job.numSkippedTasks, reasonToNumKilled = job.reasonToNumKilled, total = job.numTasks - job.numSkippedTasks)} http://git-wip-us.apache.org/repos/asf/spark/blob/190d8b0b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala -- diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala index 7370f9f..1b10feb 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobProgressListener.scala @@ -423,6 +423,7 @@ class JobProgressListener(conf: SparkConf) extends SparkListener with Logging { jobData.numActiveTasks -= 1 taskEnd.reason match { case Success => +jobData.completedIndices.add((taskEnd.stageId, info.index)) jobData.numCompletedTasks += 1 case kill: TaskKilled => jobData.reasonToNumKilled = jobData.reasonToNumKilled.updated( http://git-wip-us.apache.org/repos/asf/spark/blob/190d8b0b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala -- diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala index 8d280bc..048c4ad 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/UIData.scala @@ -62,6 +62,7 @@ private[spark] object UIData { var numTasks: Int = 0, var numActiveTasks: Int = 0, var numCompletedTasks: Int = 0, +var completedIndices: OpenHashSet[(Int, Int)] = new OpenHashSet[(Int, Int)](), var numSkippedTasks: Int = 0, var numFailedTasks: Int = 0, var reasonToNumKilled: Map[String, Int] = Map.empty, http://git-wip-us.apache.org/repos/asf/spark/blob/190d8b0b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala index bdd1488..267c8dc 100644 --- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala @@ -320,12 +320,7
spark git commit: [SPARK-20609][CORE] Run the SortShuffleSuite unit tests have residual spark_* system directory
Repository: spark Updated Branches: refs/heads/master 190d8b0b6 -> f1ffc6e71 [SPARK-20609][CORE] Run the SortShuffleSuite unit tests have residual spark_* system directory ## What changes were proposed in this pull request? This PR solution to run the SortShuffleSuite unit tests have residual spark_* system directory For example: OS:Windows 7 After the running SortShuffleSuite unit tests, the system of TMP directory have '..\AppData\Local\Temp\spark-f64121f9-11b4-4ffd-a4f0-cfca66643503' not deleted ## How was this patch tested? Run SortShuffleSuite unit test. Author: caoxuewen Closes #17869 from heary-cao/SortShuffleSuite. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f1ffc6e7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f1ffc6e7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f1ffc6e7 Branch: refs/heads/master Commit: f1ffc6e71f5429464d4e98b00351427495c91e05 Parents: 190d8b0 Author: caoxuewen Authored: Mon May 22 14:23:23 2017 +0100 Committer: Sean Owen Committed: Mon May 22 14:23:23 2017 +0100 -- core/src/test/scala/org/apache/spark/SortShuffleSuite.scala | 5 + 1 file changed, 5 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f1ffc6e7/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala b/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala index 7a897c2..c0126e4 100644 --- a/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala +++ b/core/src/test/scala/org/apache/spark/SortShuffleSuite.scala @@ -38,6 +38,10 @@ class SortShuffleSuite extends ShuffleSuite with BeforeAndAfterAll { override def beforeAll() { super.beforeAll() +// Once 'spark.local.dir' is set, it is cached. Unless this is manually cleared +// before/after a test, it could return the same directory even if this property +// is configured. +Utils.clearLocalRootDirs() conf.set("spark.shuffle.manager", "sort") } @@ -50,6 +54,7 @@ class SortShuffleSuite extends ShuffleSuite with BeforeAndAfterAll { override def afterEach(): Unit = { try { Utils.deleteRecursively(tempDir) + Utils.clearLocalRootDirs() } finally { super.afterEach() } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20813][WEB UI] Fixed Web UI executor page tab search by status not working
Repository: spark Updated Branches: refs/heads/master f1ffc6e71 -> aea73be1b [SPARK-20813][WEB UI] Fixed Web UI executor page tab search by status not working ## What changes were proposed in this pull request? On status column of the table, I removed the condition that forced only the display value to take on values Active, Blacklisted and Dead. Before the removal, values used for sort and filter for that particular column was True and False. ## How was this patch tested? Tested with Active, Blacklisted and Dead present as current status. Author: John Lee Closes #18036 from yoonlee95/SPARK-20813. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/aea73be1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/aea73be1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/aea73be1 Branch: refs/heads/master Commit: aea73be1b436f5812dacc05c002c653f29e344de Parents: f1ffc6e Author: John Lee Authored: Mon May 22 14:24:49 2017 +0100 Committer: Sean Owen Committed: Mon May 22 14:24:49 2017 +0100 -- .../src/main/resources/org/apache/spark/ui/static/executorspage.js | 2 -- 1 file changed, 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/aea73be1/core/src/main/resources/org/apache/spark/ui/static/executorspage.js -- diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js index 6643a8f..d430d8c 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js +++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js @@ -26,7 +26,6 @@ function getThreadDumpEnabled() { } function formatStatus(status, type) { -if (type !== 'display') return status; if (status) { return "Active" } else { @@ -417,7 +416,6 @@ $(document).ready(function () { }, {data: 'hostPort'}, {data: 'isActive', render: function (data, type, row) { -if (type !== 'display') return data; if (row.isBlacklisted) return "Blacklisted"; else return formatStatus (data, type); } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20813][WEB UI] Fixed Web UI executor page tab search by status not working
Repository: spark Updated Branches: refs/heads/branch-2.2 50dba3053 -> c4b16dcca [SPARK-20813][WEB UI] Fixed Web UI executor page tab search by status not working ## What changes were proposed in this pull request? On status column of the table, I removed the condition that forced only the display value to take on values Active, Blacklisted and Dead. Before the removal, values used for sort and filter for that particular column was True and False. ## How was this patch tested? Tested with Active, Blacklisted and Dead present as current status. Author: John Lee Closes #18036 from yoonlee95/SPARK-20813. (cherry picked from commit aea73be1b436f5812dacc05c002c653f29e344de) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c4b16dcc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c4b16dcc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c4b16dcc Branch: refs/heads/branch-2.2 Commit: c4b16dcca95bd2f15bc5fd3e1048f8e5444bac90 Parents: 50dba30 Author: John Lee Authored: Mon May 22 14:24:49 2017 +0100 Committer: Sean Owen Committed: Mon May 22 14:24:57 2017 +0100 -- .../src/main/resources/org/apache/spark/ui/static/executorspage.js | 2 -- 1 file changed, 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c4b16dcc/core/src/main/resources/org/apache/spark/ui/static/executorspage.js -- diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js index 6643a8f..d430d8c 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js +++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js @@ -26,7 +26,6 @@ function getThreadDumpEnabled() { } function formatStatus(status, type) { -if (type !== 'display') return status; if (status) { return "Active" } else { @@ -417,7 +416,6 @@ $(document).ready(function () { }, {data: 'hostPort'}, {data: 'isActive', render: function (data, type, row) { -if (type !== 'display') return data; if (row.isBlacklisted) return "Blacklisted"; else return formatStatus (data, type); } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20741][SPARK SUBMIT] Added cleanup of JARs archive generated by SparkSubmit
Repository: spark Updated Branches: refs/heads/master 139da116f -> 7306d5569 [SPARK-20741][SPARK SUBMIT] Added cleanup of JARs archive generated by SparkSubmit ## What changes were proposed in this pull request? Deleted generated JARs archive after distribution to HDFS ## How was this patch tested? Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Lior Regev Closes #17986 from liorregev/master. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7306d556 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7306d556 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7306d556 Branch: refs/heads/master Commit: 7306d556903c832984c7f34f1e8fe738a4b2343c Parents: 139da11 Author: Lior Regev Authored: Thu May 25 17:08:19 2017 +0100 Committer: Sean Owen Committed: Thu May 25 17:08:19 2017 +0100 -- .../yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 1 + 1 file changed, 1 insertion(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7306d556/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala -- diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index b817570..9956071 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -545,6 +545,7 @@ private[spark] class Client( distribute(jarsArchive.toURI.getPath, resType = LocalResourceType.ARCHIVE, destName = Some(LOCALIZED_LIB_DIR)) + jarsArchive.delete() } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20741][SPARK SUBMIT] Added cleanup of JARs archive generated by SparkSubmit
Repository: spark Updated Branches: refs/heads/branch-2.2 e01f1f222 -> 022a4957d [SPARK-20741][SPARK SUBMIT] Added cleanup of JARs archive generated by SparkSubmit ## What changes were proposed in this pull request? Deleted generated JARs archive after distribution to HDFS ## How was this patch tested? Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Lior Regev Closes #17986 from liorregev/master. (cherry picked from commit 7306d556903c832984c7f34f1e8fe738a4b2343c) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/022a4957 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/022a4957 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/022a4957 Branch: refs/heads/branch-2.2 Commit: 022a4957d8dc8d6049e0a8c9191fcfd1bd95a4a4 Parents: e01f1f2 Author: Lior Regev Authored: Thu May 25 17:08:19 2017 +0100 Committer: Sean Owen Committed: Thu May 25 17:08:41 2017 +0100 -- .../yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala | 1 + 1 file changed, 1 insertion(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/022a4957/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala -- diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index b817570..9956071 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -545,6 +545,7 @@ private[spark] class Client( distribute(jarsArchive.toURI.getPath, resType = LocalResourceType.ARCHIVE, destName = Some(LOCALIZED_LIB_DIR)) + jarsArchive.delete() } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-19707][SPARK-18922][TESTS][SQL][CORE] Fix test failures/the invalid path check for sc.addJar on Windows
Repository: spark Updated Branches: refs/heads/branch-2.2 022a4957d -> 5ae1c6521 [SPARK-19707][SPARK-18922][TESTS][SQL][CORE] Fix test failures/the invalid path check for sc.addJar on Windows ## What changes were proposed in this pull request? This PR proposes two things: - A follow up for SPARK-19707 (Improving the invalid path check for sc.addJar on Windows as well). ``` org.apache.spark.SparkContextSuite: - add jar with invalid path *** FAILED *** (32 milliseconds) 2 was not equal to 1 (SparkContextSuite.scala:309) ... ``` - Fix path vs URI related test failures on Windows. ``` org.apache.spark.storage.LocalDirsSuite: - SPARK_LOCAL_DIRS override also affects driver *** FAILED *** (0 milliseconds) new java.io.File("/NONEXISTENT_PATH").exists() was true (LocalDirsSuite.scala:50) ... - Utils.getLocalDir() throws an exception if any temporary directory cannot be retrieved *** FAILED *** (15 milliseconds) Expected exception java.io.IOException to be thrown, but no exception was thrown. (LocalDirsSuite.scala:64) ... ``` ``` org.apache.spark.sql.hive.HiveSchemaInferenceSuite: - orc: schema should be inferred and saved when INFER_AND_SAVE is specified *** FAILED *** (203 milliseconds) java.net.URISyntaxException: Illegal character in opaque part at index 2: C:\projects\spark\target\tmp\spark-dae61ab3-a851-4dd3-bf4e-be97c501f254 ... - parquet: schema should be inferred and saved when INFER_AND_SAVE is specified *** FAILED *** (203 milliseconds) java.net.URISyntaxException: Illegal character in opaque part at index 2: C:\projects\spark\target\tmp\spark-fa3aff89-a66e-4376-9a37-2a9b87596939 ... - orc: schema should be inferred but not stored when INFER_ONLY is specified *** FAILED *** (141 milliseconds) java.net.URISyntaxException: Illegal character in opaque part at index 2: C:\projects\spark\target\tmp\spark-fb464e59-b049-481b-9c75-f53295c9fc2c ... - parquet: schema should be inferred but not stored when INFER_ONLY is specified *** FAILED *** (125 milliseconds) java.net.URISyntaxException: Illegal character in opaque part at index 2: C:\projects\spark\target\tmp\spark-9487568e-80a4-42b3-b0a5-d95314c4ccbc ... - orc: schema should not be inferred when NEVER_INFER is specified *** FAILED *** (156 milliseconds) java.net.URISyntaxException: Illegal character in opaque part at index 2: C:\projects\spark\target\tmp\spark-0d2dfa45-1b0f-4958-a8be-1074ed0135a ... - parquet: schema should not be inferred when NEVER_INFER is specified *** FAILED *** (547 milliseconds) java.net.URISyntaxException: Illegal character in opaque part at index 2: C:\projects\spark\target\tmp\spark-6d95d64e-613e-4a59-a0f6-d198c5aa51ee ... ``` ``` org.apache.spark.sql.execution.command.DDLSuite: - create temporary view using *** FAILED *** (15 milliseconds) org.apache.spark.sql.AnalysisException: Path does not exist: file:/C:projectsspark arget mpspark-3881d9ca-561b-488d-90b9-97587472b853 mp; ... - insert data to a data source table which has a non-existing location should succeed *** FAILED *** (109 milliseconds) file:/C:projectsspark%09arget%09mpspark-4cad3d19-6085-4b75-b407-fe5e9d21df54 did not equal file:///C:/projects/spark/target/tmp/spark-4cad3d19-6085-4b75-b407-fe5e9d21df54 (DDLSuite.scala:1869) ... - insert into a data source table with a non-existing partition location should succeed *** FAILED *** (94 milliseconds) file:/C:projectsspark%09arget%09mpspark-4b52e7de-e3aa-42fd-95d4-6d4d58d1d95d did not equal file:///C:/projects/spark/target/tmp/spark-4b52e7de-e3aa-42fd-95d4-6d4d58d1d95d (DDLSuite.scala:1910) ... - read data from a data source table which has a non-existing location should succeed *** FAILED *** (93 milliseconds) file:/C:projectsspark%09arget%09mpspark-f8c281e2-08c2-4f73-abbf-f3865b702c34 did not equal file:///C:/projects/spark/target/tmp/spark-f8c281e2-08c2-4f73-abbf-f3865b702c34 (DDLSuite.scala:1937) ... - read data from a data source table with non-existing partition location should succeed *** FAILED *** (110 milliseconds) java.lang.IllegalArgumentException: Can not create a Path from an empty string ... - create datasource table with a non-existing location *** FAILED *** (94 milliseconds) file:/C:projectsspark%09arget%09mpspark-387316ae-070c-4e78-9b78-19ebf7b29ec8 did not equal file:///C:/projects/spark/target/tmp/spark-387316ae-070c-4e78-9b78-19ebf7b29ec8 (DDLSuite.scala:1982) ... - CTAS for external data source table with a non-existing location *** FAILED *** (16 milliseconds) java.lang.IllegalArgumentException: Can not create a Path from an empty string ... - CTAS for external data source table with a existed location *** FAILED *** (15 milliseconds) java.lang.IllegalArgumentException: Can not create a Path from an empty string ... - data source table:partition column name containing a b *** FAILED *** (125 milliseconds) j
spark git commit: [SPARK-19707][SPARK-18922][TESTS][SQL][CORE] Fix test failures/the invalid path check for sc.addJar on Windows
Repository: spark Updated Branches: refs/heads/master 7306d5569 -> e9f983df2 [SPARK-19707][SPARK-18922][TESTS][SQL][CORE] Fix test failures/the invalid path check for sc.addJar on Windows ## What changes were proposed in this pull request? This PR proposes two things: - A follow up for SPARK-19707 (Improving the invalid path check for sc.addJar on Windows as well). ``` org.apache.spark.SparkContextSuite: - add jar with invalid path *** FAILED *** (32 milliseconds) 2 was not equal to 1 (SparkContextSuite.scala:309) ... ``` - Fix path vs URI related test failures on Windows. ``` org.apache.spark.storage.LocalDirsSuite: - SPARK_LOCAL_DIRS override also affects driver *** FAILED *** (0 milliseconds) new java.io.File("/NONEXISTENT_PATH").exists() was true (LocalDirsSuite.scala:50) ... - Utils.getLocalDir() throws an exception if any temporary directory cannot be retrieved *** FAILED *** (15 milliseconds) Expected exception java.io.IOException to be thrown, but no exception was thrown. (LocalDirsSuite.scala:64) ... ``` ``` org.apache.spark.sql.hive.HiveSchemaInferenceSuite: - orc: schema should be inferred and saved when INFER_AND_SAVE is specified *** FAILED *** (203 milliseconds) java.net.URISyntaxException: Illegal character in opaque part at index 2: C:\projects\spark\target\tmp\spark-dae61ab3-a851-4dd3-bf4e-be97c501f254 ... - parquet: schema should be inferred and saved when INFER_AND_SAVE is specified *** FAILED *** (203 milliseconds) java.net.URISyntaxException: Illegal character in opaque part at index 2: C:\projects\spark\target\tmp\spark-fa3aff89-a66e-4376-9a37-2a9b87596939 ... - orc: schema should be inferred but not stored when INFER_ONLY is specified *** FAILED *** (141 milliseconds) java.net.URISyntaxException: Illegal character in opaque part at index 2: C:\projects\spark\target\tmp\spark-fb464e59-b049-481b-9c75-f53295c9fc2c ... - parquet: schema should be inferred but not stored when INFER_ONLY is specified *** FAILED *** (125 milliseconds) java.net.URISyntaxException: Illegal character in opaque part at index 2: C:\projects\spark\target\tmp\spark-9487568e-80a4-42b3-b0a5-d95314c4ccbc ... - orc: schema should not be inferred when NEVER_INFER is specified *** FAILED *** (156 milliseconds) java.net.URISyntaxException: Illegal character in opaque part at index 2: C:\projects\spark\target\tmp\spark-0d2dfa45-1b0f-4958-a8be-1074ed0135a ... - parquet: schema should not be inferred when NEVER_INFER is specified *** FAILED *** (547 milliseconds) java.net.URISyntaxException: Illegal character in opaque part at index 2: C:\projects\spark\target\tmp\spark-6d95d64e-613e-4a59-a0f6-d198c5aa51ee ... ``` ``` org.apache.spark.sql.execution.command.DDLSuite: - create temporary view using *** FAILED *** (15 milliseconds) org.apache.spark.sql.AnalysisException: Path does not exist: file:/C:projectsspark arget mpspark-3881d9ca-561b-488d-90b9-97587472b853 mp; ... - insert data to a data source table which has a non-existing location should succeed *** FAILED *** (109 milliseconds) file:/C:projectsspark%09arget%09mpspark-4cad3d19-6085-4b75-b407-fe5e9d21df54 did not equal file:///C:/projects/spark/target/tmp/spark-4cad3d19-6085-4b75-b407-fe5e9d21df54 (DDLSuite.scala:1869) ... - insert into a data source table with a non-existing partition location should succeed *** FAILED *** (94 milliseconds) file:/C:projectsspark%09arget%09mpspark-4b52e7de-e3aa-42fd-95d4-6d4d58d1d95d did not equal file:///C:/projects/spark/target/tmp/spark-4b52e7de-e3aa-42fd-95d4-6d4d58d1d95d (DDLSuite.scala:1910) ... - read data from a data source table which has a non-existing location should succeed *** FAILED *** (93 milliseconds) file:/C:projectsspark%09arget%09mpspark-f8c281e2-08c2-4f73-abbf-f3865b702c34 did not equal file:///C:/projects/spark/target/tmp/spark-f8c281e2-08c2-4f73-abbf-f3865b702c34 (DDLSuite.scala:1937) ... - read data from a data source table with non-existing partition location should succeed *** FAILED *** (110 milliseconds) java.lang.IllegalArgumentException: Can not create a Path from an empty string ... - create datasource table with a non-existing location *** FAILED *** (94 milliseconds) file:/C:projectsspark%09arget%09mpspark-387316ae-070c-4e78-9b78-19ebf7b29ec8 did not equal file:///C:/projects/spark/target/tmp/spark-387316ae-070c-4e78-9b78-19ebf7b29ec8 (DDLSuite.scala:1982) ... - CTAS for external data source table with a non-existing location *** FAILED *** (16 milliseconds) java.lang.IllegalArgumentException: Can not create a Path from an empty string ... - CTAS for external data source table with a existed location *** FAILED *** (15 milliseconds) java.lang.IllegalArgumentException: Can not create a Path from an empty string ... - data source table:partition column name containing a b *** FAILED *** (125 milliseconds) java.
spark git commit: [MINOR] document edge case of updateFunc usage
Repository: spark Updated Branches: refs/heads/master d9ad78908 -> b6f2017a6 [MINOR] document edge case of updateFunc usage ## What changes were proposed in this pull request? Include documentation of the fact that the updateFunc is sometimes called with no new values. This is documented in the main documentation here: https://spark.apache.org/docs/latest/streaming-programming-guide.html#updatestatebykey-operation however from the docs included with the code it is not clear that this is the case. ## How was this patch tested? PR only changes comments. Confirmed code still builds. Author: Wil Selwood Closes #18088 from wselwood/note-edge-case-in-docs. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b6f2017a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b6f2017a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b6f2017a Branch: refs/heads/master Commit: b6f2017a6a5da5ce5aea85934b9df6bc6dcb32e1 Parents: d9ad789 Author: Wil Selwood Authored: Fri May 26 11:29:52 2017 +0100 Committer: Sean Owen Committed: Fri May 26 11:29:52 2017 +0100 -- .../apache/spark/streaming/dstream/PairDStreamFunctions.scala | 7 +++ 1 file changed, 7 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b6f2017a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala index f38c1e7..dcb51d7 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala @@ -389,6 +389,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K, V)]) /** * Return a new "state" DStream where the state for each key is updated by applying * the given function on the previous state of the key and the new values of each key. + * In every batch the updateFunc will be called for each state even if there are no new values. * Hash partitioning is used to generate the RDDs with Spark's default number of partitions. * @param updateFunc State update function. If `this` function returns None, then * corresponding state key-value pair will be eliminated. @@ -403,6 +404,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K, V)]) /** * Return a new "state" DStream where the state for each key is updated by applying * the given function on the previous state of the key and the new values of each key. + * In every batch the updateFunc will be called for each state even if there are no new values. * Hash partitioning is used to generate the RDDs with `numPartitions` partitions. * @param updateFunc State update function. If `this` function returns None, then * corresponding state key-value pair will be eliminated. @@ -419,6 +421,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K, V)]) /** * Return a new "state" DStream where the state for each key is updated by applying * the given function on the previous state of the key and the new values of the key. + * In every batch the updateFunc will be called for each state even if there are no new values. * [[org.apache.spark.Partitioner]] is used to control the partitioning of each RDD. * @param updateFunc State update function. If `this` function returns None, then * corresponding state key-value pair will be eliminated. @@ -440,6 +443,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K, V)]) /** * Return a new "state" DStream where the state for each key is updated by applying * the given function on the previous state of the key and the new values of each key. + * In every batch the updateFunc will be called for each state even if there are no new values. * [[org.apache.spark.Partitioner]] is used to control the partitioning of each RDD. * @param updateFunc State update function. Note, that this function may generate a different * tuple with a different key than the input key. Therefore keys may be removed @@ -464,6 +468,7 @@ class PairDStreamFunctions[K, V](self: DStream[(K, V)]) /** * Return a new "state" DStream where the state for each key is updated by applying * the given function on the previous state of the key and the new values of the key. + * In every batch the updateFunc will be called for each state even if there are no new values. * org.apache.spark.Partitioner is used to control the partitioning of each RDD. * @param upd
spark git commit: [SPARK-20835][CORE] It should exit directly when the --total-executor-cores parameter is setted less than 0 when submit a application
Repository: spark Updated Branches: refs/heads/master 629f38e17 -> 0fd84b05d [SPARK-20835][CORE] It should exit directly when the --total-executor-cores parameter is setted less than 0 when submit a application ## What changes were proposed in this pull request? In my test, the submitted app running with out an error when the --total-executor-cores less than 0 and given the warnings: "2017-05-22 17:19:36,319 WARN org.apache.spark.scheduler.TaskSchedulerImpl: Initial job has not accepted any resources; check your cluster UI to ensure that workers are registered and have sufficient resources"; It should exit directly when the --total-executor-cores parameter is setted less than 0 when submit a application (Please fill in changes proposed in this fix) ## How was this patch tested? Run the ut tests (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Please review http://spark.apache.org/contributing.html before opening a pull request. Author: 10129659 Closes #18060 from eatoncys/totalcores. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0fd84b05 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0fd84b05 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0fd84b05 Branch: refs/heads/master Commit: 0fd84b05dc9ac3de240791e2d4200d8bdffbb01a Parents: 629f38e Author: 10129659 Authored: Fri May 26 18:03:23 2017 +0100 Committer: Sean Owen Committed: Fri May 26 18:03:23 2017 +0100 -- .../spark/deploy/SparkSubmitArguments.scala | 20 1 file changed, 20 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0fd84b05/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 0144fd1..5100a17 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -27,11 +27,14 @@ import java.util.jar.JarFile import scala.collection.JavaConverters._ import scala.collection.mutable.{ArrayBuffer, HashMap} import scala.io.Source +import scala.util.Try import org.apache.spark.deploy.SparkSubmitAction._ import org.apache.spark.launcher.SparkSubmitArgumentsParser +import org.apache.spark.network.util.JavaUtils import org.apache.spark.util.Utils + /** * Parses and encapsulates arguments from the spark-submit script. * The env argument is used for testing. @@ -253,6 +256,23 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S if (mainClass == null && SparkSubmit.isUserJar(primaryResource)) { SparkSubmit.printErrorAndExit("No main class set in JAR; please specify one with --class") } +if (driverMemory != null +&& Try(JavaUtils.byteStringAsBytes(driverMemory)).getOrElse(-1L) <= 0) { + SparkSubmit.printErrorAndExit("Driver Memory must be a positive number") +} +if (executorMemory != null +&& Try(JavaUtils.byteStringAsBytes(executorMemory)).getOrElse(-1L) <= 0) { + SparkSubmit.printErrorAndExit("Executor Memory cores must be a positive number") +} +if (executorCores != null && Try(executorCores.toInt).getOrElse(-1) <= 0) { + SparkSubmit.printErrorAndExit("Executor cores must be a positive number") +} +if (totalExecutorCores != null && Try(totalExecutorCores.toInt).getOrElse(-1) <= 0) { + SparkSubmit.printErrorAndExit("Total executor cores must be a positive number") +} +if (numExecutors != null && Try(numExecutors.toInt).getOrElse(-1) <= 0) { + SparkSubmit.printErrorAndExit("Number of executors must be a positive number") +} if (pyFiles != null && !isPython) { SparkSubmit.printErrorAndExit("--py-files given but primary resource is not a Python script") } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark-website git commit: Add message directing security issues to priv...@spark.apache.org
Repository: spark-website Updated Branches: refs/heads/asf-site 5ed41c8d8 -> 80f50ecca Add message directing security issues to priv...@spark.apache.org Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/80f50ecc Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/80f50ecc Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/80f50ecc Branch: refs/heads/asf-site Commit: 80f50ecca9d8c958e6bbe65eca706abe42cc430a Parents: 5ed41c8 Author: Sean Owen Authored: Fri May 26 18:59:41 2017 +0100 Committer: Sean Owen Committed: Fri May 26 18:59:41 2017 +0100 -- community.md| 8 site/community.html | 8 2 files changed, 16 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/80f50ecc/community.md -- diff --git a/community.md b/community.md index 9fc6136..630bba6 100644 --- a/community.md +++ b/community.md @@ -72,6 +72,14 @@ Some quick tips when using email: and include only a few lines of the pertinent code / log within the email. - No jobs, sales, or solicitation is permitted on the Apache Spark mailing lists. +Reporting Security issues + +If you need to report a possible security vulnerability, please email `priv...@spark.apache.org`. This is a +non-public list that will reach the Spark PMC. Messages to `secur...@apache.org` will also reach the PMC. + +Please review the https://www.apache.org/security/";>Apache Security Team's guidance on reporting +vulnerabilities. + Chat Rooms http://git-wip-us.apache.org/repos/asf/spark-website/blob/80f50ecc/site/community.html -- diff --git a/site/community.html b/site/community.html index e6ec06a..1feee6a 100644 --- a/site/community.html +++ b/site/community.html @@ -271,6 +271,14 @@ and include only a few lines of the pertinent code / log within the email. No jobs, sales, or solicitation is permitted on the Apache Spark mailing lists. +Reporting Security issues + +If you need to report a possible security vulnerability, please email priv...@spark.apache.org. This is a +non-public list that will reach the Spark PMC. Messages to secur...@apache.org will also reach the PMC. + +Please review the https://www.apache.org/security/";>Apache Security Team’s guidance on reporting +vulnerabilities. + Chat Rooms - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20393][WEBU UI] Strengthen Spark to prevent XSS vulnerabilities
Repository: spark Updated Branches: refs/heads/branch-2.2 fafe28327 -> f99456b5f [SPARK-20393][WEBU UI] Strengthen Spark to prevent XSS vulnerabilities ## What changes were proposed in this pull request? Add stripXSS and stripXSSMap to Spark Core's UIUtils. Calling these functions at any point that getParameter is called against a HttpServletRequest. ## How was this patch tested? Unit tests, IBM Security AppScan Standard no longer showing vulnerabilities, manual verification of WebUI pages. Author: NICHOLAS T. MARION Closes #17686 from n-marion/xss-fix. (cherry picked from commit b512233a457092b0e2a39d0b42cb021abc69d375) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f99456b5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f99456b5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f99456b5 Branch: refs/heads/branch-2.2 Commit: f99456b5f6225a534ce52cf2b817285eb8853926 Parents: fafe283 Author: NICHOLAS T. MARION Authored: Wed May 10 10:59:57 2017 +0100 Committer: Sean Owen Committed: Fri May 26 19:11:18 2017 +0100 -- .../spark/deploy/history/HistoryPage.scala | 3 +- .../deploy/master/ui/ApplicationPage.scala | 3 +- .../spark/deploy/master/ui/MasterPage.scala | 6 ++- .../apache/spark/deploy/worker/ui/LogPage.scala | 30 +-- .../scala/org/apache/spark/ui/UIUtils.scala | 21 +++ .../spark/ui/exec/ExecutorThreadDumpPage.scala | 4 +- .../org/apache/spark/ui/jobs/AllJobsPage.scala | 14 --- .../org/apache/spark/ui/jobs/JobPage.scala | 3 +- .../org/apache/spark/ui/jobs/JobsTab.scala | 5 ++- .../org/apache/spark/ui/jobs/PoolPage.scala | 3 +- .../org/apache/spark/ui/jobs/StagePage.scala| 15 .../org/apache/spark/ui/jobs/StageTable.scala | 15 .../org/apache/spark/ui/jobs/StagesTab.scala| 5 ++- .../org/apache/spark/ui/storage/RDDPage.scala | 13 --- .../org/apache/spark/ui/UIUtilsSuite.scala | 39 .../spark/deploy/mesos/ui/DriverPage.scala | 3 +- .../spark/sql/execution/ui/ExecutionPage.scala | 3 +- .../ui/ThriftServerSessionPage.scala| 4 +- .../apache/spark/streaming/ui/BatchPage.scala | 5 ++- 19 files changed, 140 insertions(+), 54 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f99456b5/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala index 0e7a6c2..af14717 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala @@ -26,8 +26,9 @@ import org.apache.spark.ui.{UIUtils, WebUIPage} private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("") { def render(request: HttpServletRequest): Seq[Node] = { +// stripXSS is called first to remove suspicious characters used in XSS attacks val requestedIncomplete = - Option(request.getParameter("showIncomplete")).getOrElse("false").toBoolean + Option(UIUtils.stripXSS(request.getParameter("showIncomplete"))).getOrElse("false").toBoolean val allAppsSize = parent.getApplicationList().count(_.completed != requestedIncomplete) val eventLogsUnderProcessCount = parent.getEventLogsUnderProcess() http://git-wip-us.apache.org/repos/asf/spark/blob/f99456b5/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala index a8d721f..94ff81c 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala @@ -33,7 +33,8 @@ private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app") /** Executor details for a particular application */ def render(request: HttpServletRequest): Seq[Node] = { -val appId = request.getParameter("appId") +// stripXSS is called first to remove suspicious characters used in XSS attacks +val appId = UIUtils.stripXSS(request.getParameter("appId")) val state = master.askSync[MasterStateResponse](RequestMasterState) val app = state.activeApps.find(_.id == appId) .getOrElse(state.completedApps.find(_.id == appId).orNull) http://git-wip-us.apache.org/repos/asf/spark/blob/f99456b5/core/src/main/scala/org/apache/spark
spark git commit: [SPARK-20393][WEBU UI] Strengthen Spark to prevent XSS vulnerabilities
Repository: spark Updated Branches: refs/heads/branch-2.1 ebd72f453 -> 38f37c557 [SPARK-20393][WEBU UI] Strengthen Spark to prevent XSS vulnerabilities Add stripXSS and stripXSSMap to Spark Core's UIUtils. Calling these functions at any point that getParameter is called against a HttpServletRequest. Unit tests, IBM Security AppScan Standard no longer showing vulnerabilities, manual verification of WebUI pages. Author: NICHOLAS T. MARION Closes #17686 from n-marion/xss-fix. (cherry picked from commit b512233a457092b0e2a39d0b42cb021abc69d375) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/38f37c55 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/38f37c55 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/38f37c55 Branch: refs/heads/branch-2.1 Commit: 38f37c557367e474000d7d7ae659ab8a8df69097 Parents: ebd72f4 Author: NICHOLAS T. MARION Authored: Wed May 10 10:59:57 2017 +0100 Committer: Sean Owen Committed: Sat May 27 12:06:23 2017 +0100 -- .../spark/deploy/history/HistoryPage.scala | 3 +- .../deploy/master/ui/ApplicationPage.scala | 3 +- .../spark/deploy/master/ui/MasterPage.scala | 6 ++- .../apache/spark/deploy/worker/ui/LogPage.scala | 30 +-- .../scala/org/apache/spark/ui/UIUtils.scala | 21 +++ .../spark/ui/exec/ExecutorThreadDumpPage.scala | 4 +- .../org/apache/spark/ui/jobs/AllJobsPage.scala | 14 --- .../org/apache/spark/ui/jobs/JobPage.scala | 3 +- .../org/apache/spark/ui/jobs/JobsTab.scala | 5 ++- .../org/apache/spark/ui/jobs/PoolPage.scala | 3 +- .../org/apache/spark/ui/jobs/StagePage.scala| 15 .../org/apache/spark/ui/jobs/StageTable.scala | 15 .../org/apache/spark/ui/jobs/StagesTab.scala| 5 ++- .../org/apache/spark/ui/storage/RDDPage.scala | 13 --- .../org/apache/spark/ui/UIUtilsSuite.scala | 39 .../spark/deploy/mesos/ui/DriverPage.scala | 3 +- .../spark/sql/execution/ui/ExecutionPage.scala | 3 +- .../ui/ThriftServerSessionPage.scala| 4 +- .../apache/spark/streaming/ui/BatchPage.scala | 5 ++- 19 files changed, 140 insertions(+), 54 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/38f37c55/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala index 0e7a6c2..af14717 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala @@ -26,8 +26,9 @@ import org.apache.spark.ui.{UIUtils, WebUIPage} private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("") { def render(request: HttpServletRequest): Seq[Node] = { +// stripXSS is called first to remove suspicious characters used in XSS attacks val requestedIncomplete = - Option(request.getParameter("showIncomplete")).getOrElse("false").toBoolean + Option(UIUtils.stripXSS(request.getParameter("showIncomplete"))).getOrElse("false").toBoolean val allAppsSize = parent.getApplicationList().count(_.completed != requestedIncomplete) val eventLogsUnderProcessCount = parent.getEventLogsUnderProcess() http://git-wip-us.apache.org/repos/asf/spark/blob/38f37c55/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala index 18cff31..5777870 100644 --- a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala +++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala @@ -33,7 +33,8 @@ private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app") /** Executor details for a particular application */ def render(request: HttpServletRequest): Seq[Node] = { -val appId = request.getParameter("appId") +// stripXSS is called first to remove suspicious characters used in XSS attacks +val appId = UIUtils.stripXSS(request.getParameter("appId")) val state = master.askWithRetry[MasterStateResponse](RequestMasterState) val app = state.activeApps.find(_.id == appId) .getOrElse(state.completedApps.find(_.id == appId).orNull) http://git-wip-us.apache.org/repos/asf/spark/blob/38f37c55/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
spark git commit: [SPARK-20875] Spark should print the log when the directory has been deleted
Repository: spark Updated Branches: refs/heads/master 6c1dbd6fc -> 8faffc416 [SPARK-20875] Spark should print the log when the directory has been deleted [https://issues.apache.org/jira/browse/SPARK-20875](https://issues.apache.org/jira/browse/SPARK-20875) When the "deleteRecursively" method is invoked,spark doesn't print any log if the path was deleted.For example,spark only print "Removing directory" when the worker began cleaning spark.work.dir,but didn't print any log about "the path has been delete".So, I can't judge whether the path was deleted form the worker's logfile,If there is any accidents about Linux. Author: liuzhaokun Closes #18102 from liu-zhaokun/master_log. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8faffc41 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8faffc41 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8faffc41 Branch: refs/heads/master Commit: 8faffc41679cf545c0aea96b05d84f23da1b5eda Parents: 6c1dbd6 Author: liuzhaokun Authored: Sat May 27 13:26:01 2017 +0100 Committer: Sean Owen Committed: Sat May 27 13:26:01 2017 +0100 -- core/src/main/scala/org/apache/spark/util/Utils.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8faffc41/core/src/main/scala/org/apache/spark/util/Utils.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index ad39c74..bbb7999 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -1026,7 +1026,9 @@ private[spark] object Utils extends Logging { ShutdownHookManager.removeShutdownDeleteDir(file) } } finally { -if (!file.delete()) { +if (file.delete()) { + logTrace(s"${file.getAbsolutePath} has been deleted") +} else { // Delete can also fail if the file simply did not exist if (file.exists()) { throw new IOException("Failed to delete: " + file.getAbsolutePath) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR] Fix some indent issues.
Repository: spark Updated Branches: refs/heads/master d797ed0ef -> 80fb24b85 [MINOR] Fix some indent issues. ## What changes were proposed in this pull request? Fix some indent issues. ## How was this patch tested? existing tests. Author: Yuming Wang Closes #18133 from wangyum/IndentIssues. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/80fb24b8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/80fb24b8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/80fb24b8 Branch: refs/heads/master Commit: 80fb24b85ddcea768c5261e82449d673993e39af Parents: d797ed0 Author: Yuming Wang Authored: Tue May 30 12:15:54 2017 +0100 Committer: Sean Owen Committed: Tue May 30 12:15:54 2017 +0100 -- .../org/apache/spark/sql/catalyst/expressions/hash.scala | 2 +- .../spark/sql/catalyst/expressions/nullExpressions.scala | 6 +++--- .../spark/sql/catalyst/expressions/regexpExpressions.scala | 4 ++-- .../spark/sql/catalyst/expressions/stringExpressions.scala | 8 4 files changed, 10 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/80fb24b8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala index 2a5963d..ffd0e64 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala @@ -524,7 +524,7 @@ abstract class InterpretedHashFunction { extended = """ Examples: > SELECT _FUNC_('Spark', array(123), 2); --1321691492 + -1321691492 """) case class Murmur3Hash(children: Seq[Expression], seed: Int) extends HashExpression[Int] { def this(arguments: Seq[Expression]) = this(arguments, 42) http://git-wip-us.apache.org/repos/asf/spark/blob/80fb24b8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala index 92036b7..0866b8d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala @@ -116,9 +116,9 @@ case class IfNull(left: Expression, right: Expression, child: Expression) @ExpressionDescription( usage = "_FUNC_(expr1, expr2) - Returns null if `expr1` equals to `expr2`, or `expr1` otherwise.", extended = """ - Examples: - > SELECT _FUNC_(2, 2); - NULL +Examples: + > SELECT _FUNC_(2, 2); + NULL """) case class NullIf(left: Expression, right: Expression, child: Expression) extends RuntimeReplaceable { http://git-wip-us.apache.org/repos/asf/spark/blob/80fb24b8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index aa5a1b5..5418ace 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -99,7 +99,7 @@ abstract class StringRegexExpression extends BinaryExpression See also: Use RLIKE to match with standard regular expressions. -""") + """) case class Like(left: Expression, right: Expression) extends StringRegexExpression { override def escape(v: String): String = StringUtils.escapeLikeRegex(v) @@ -175,7 +175,7 @@ case class Like(left: Expression, right: Expression) extends StringRegexExpressi See also: Use LIKE to match with simple string pattern. -""") + """) case class RLike(left: Expression, right: Expression) extends StringRegexExpression { override def escape(v: String): String = v http://git-wip-us.apache.org/repos/asf/spark/blob/80fb24b8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catal
spark git commit: [DOCS][MINOR] Scaladoc fixes (aka typo hunting)
Repository: spark Updated Branches: refs/heads/master 382fefd18 -> beed5e20a [DOCS][MINOR] Scaladoc fixes (aka typo hunting) ## What changes were proposed in this pull request? Minor changes to scaladoc ## How was this patch tested? Local build Author: Jacek Laskowski Closes #18074 from jaceklaskowski/scaladoc-fixes. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/beed5e20 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/beed5e20 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/beed5e20 Branch: refs/heads/master Commit: beed5e20af0a3935ef42beb3431c8630599bf27f Parents: 382fefd Author: Jacek Laskowski Authored: Wed May 31 11:24:37 2017 +0100 Committer: Sean Owen Committed: Wed May 31 11:24:37 2017 +0100 -- .../spark/sql/catalyst/ScalaReflection.scala| 6 +++-- .../spark/sql/catalyst/analysis/Analyzer.scala | 5 ++-- .../catalyst/encoders/ExpressionEncoder.scala | 5 ++-- .../expressions/codegen/CodeGenerator.scala | 2 +- .../codegen/GenerateUnsafeProjection.scala | 6 ++--- .../expressions/windowExpressions.scala | 8 +++--- .../sql/catalyst/planning/QueryPlanner.scala| 14 ++ .../spark/sql/catalyst/trees/TreeNode.scala | 2 +- .../scala/org/apache/spark/sql/Column.scala | 4 +-- .../spark/sql/RelationalGroupedDataset.scala| 24 - .../apache/spark/sql/execution/SparkPlan.scala | 28 +++- .../sql/execution/WholeStageCodegenExec.scala | 14 +- .../execution/window/AggregateProcessor.scala | 17 ++-- .../spark/sql/execution/window/WindowExec.scala | 4 +-- .../apache/spark/sql/expressions/Window.scala | 4 +-- .../spark/sql/expressions/WindowSpec.scala | 2 +- .../scala/org/apache/spark/sql/functions.scala | 4 +-- .../sql/internal/BaseSessionStateBuilder.scala | 2 +- .../spark/sql/internal/SessionState.scala | 2 +- .../apache/spark/sql/sources/interfaces.scala | 2 +- 20 files changed, 82 insertions(+), 73 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/beed5e20/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala index 6d1d019..8713053 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala @@ -88,8 +88,10 @@ object ScalaReflection extends ScalaReflection { } /** - * Given a type `T` this function constructs and ObjectType that holds a class of type - * Array[T]. Special handling is performed for primitive types to map them back to their raw + * Given a type `T` this function constructs `ObjectType` that holds a class of type + * `Array[T]`. + * + * Special handling is performed for primitive types to map them back to their raw * JVM form instead of the Scala Array that handles auto boxing. */ private def arrayClassFor(tpe: `Type`): ObjectType = ScalaReflectionLock.synchronized { http://git-wip-us.apache.org/repos/asf/spark/blob/beed5e20/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 29183fd..196b4a9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -85,8 +85,7 @@ object AnalysisContext { /** * Provides a logical query plan analyzer, which translates [[UnresolvedAttribute]]s and - * [[UnresolvedRelation]]s into fully typed objects using information in a - * [[SessionCatalog]] and a [[FunctionRegistry]]. + * [[UnresolvedRelation]]s into fully typed objects using information in a [[SessionCatalog]]. */ class Analyzer( catalog: SessionCatalog, @@ -1900,7 +1899,7 @@ class Analyzer( * `[Sum(_w0) OVER (PARTITION BY _w1 ORDER BY _w2)]` and the second returned value will be * [col1, col2 + col3 as _w0, col4 as _w1, col5 as _w2]. * - * @return (seq of expressions containing at lease one window expressions, + * @return (seq of expressions containing at least one window expression, * seq of non-window expressions) */ private def extract( http://git-wip-us.apache.org/repos/asf/spark/blob/beed5e20/sql/catalyst/s