spark git commit: [MINOR] [BUILD] Change link to jenkins builds on github.
Repository: spark Updated Branches: refs/heads/master 3a5c4da47 - da20c8ca3 [MINOR] [BUILD] Change link to jenkins builds on github. Link to the tail of the console log, instead of the full log. That's bound to have the info the user is looking for, and at the same time loads way more quickly than the (huge) full log, which is just one click away if needed. Author: Marcelo Vanzin van...@cloudera.com Closes #6664 from vanzin/jenkins-link and squashes the following commits: ba07ed8 [Marcelo Vanzin] [minor] [build] Change link to jenkins builds on github. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/da20c8ca Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/da20c8ca Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/da20c8ca Branch: refs/heads/master Commit: da20c8ca37663738112b04657057858ee3e55072 Parents: 3a5c4da Author: Marcelo Vanzin van...@cloudera.com Authored: Fri Jun 5 10:32:33 2015 +0200 Committer: Sean Owen so...@cloudera.com Committed: Fri Jun 5 10:32:33 2015 +0200 -- dev/run-tests-jenkins | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/da20c8ca/dev/run-tests-jenkins -- diff --git a/dev/run-tests-jenkins b/dev/run-tests-jenkins index 3cbd866..641b0ff 100755 --- a/dev/run-tests-jenkins +++ b/dev/run-tests-jenkins @@ -193,7 +193,7 @@ done test_result=$? if [ $test_result -eq 124 ]; then -fail_message=**[Test build ${BUILD_DISPLAY_NAME} timed out](${BUILD_URL}consoleFull)** \ +fail_message=**[Test build ${BUILD_DISPLAY_NAME} timed out](${BUILD_URL}console)** \ for PR $ghprbPullId at commit [\`${SHORT_COMMIT_HASH}\`](${COMMIT_URL}) \ after a configured wait of \`${TESTS_TIMEOUT}\`. @@ -233,7 +233,7 @@ done # post end message { result_message=\ - [Test build ${BUILD_DISPLAY_NAME} has finished](${BUILD_URL}consoleFull) for \ + [Test build ${BUILD_DISPLAY_NAME} has finished](${BUILD_URL}console) for \ PR $ghprbPullId at commit [\`${SHORT_COMMIT_HASH}\`](${COMMIT_URL}). result_message=${result_message}\n${test_result_note} - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR] remove unused interpolation var in log message
Repository: spark Updated Branches: refs/heads/master 2777ed394 - 3a5c4da47 [MINOR] remove unused interpolation var in log message Completely trivial but I noticed this wrinkle in a log message today; `$sender` doesn't refer to anything and isn't interpolated here. Author: Sean Owen so...@cloudera.com Closes #6650 from srowen/Interpolation and squashes the following commits: 518687a [Sean Owen] Actually interpolate log string 7edb866 [Sean Owen] Trivial: remove unused interpolation var in log message Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3a5c4da4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3a5c4da4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3a5c4da4 Branch: refs/heads/master Commit: 3a5c4da473a8a497004dfe6eacc0e6646651b227 Parents: 2777ed3 Author: Sean Owen so...@cloudera.com Authored: Fri Jun 5 00:32:46 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Fri Jun 5 00:32:46 2015 -0700 -- .../spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala| 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3a5c4da4/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala index fcad959..7c7f70d 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala @@ -103,7 +103,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp case None = // Ignoring the update since we don't know about the executor. logWarning(sIgnored task status update ($taskId state $state) + -from unknown executor $sender with ID $executorId) +sfrom unknown executor with ID $executorId) } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR] remove unused interpolation var in log message
Repository: spark Updated Branches: refs/heads/branch-1.4 f02af7c8f - 90cf68638 [MINOR] remove unused interpolation var in log message Completely trivial but I noticed this wrinkle in a log message today; `$sender` doesn't refer to anything and isn't interpolated here. Author: Sean Owen so...@cloudera.com Closes #6650 from srowen/Interpolation and squashes the following commits: 518687a [Sean Owen] Actually interpolate log string 7edb866 [Sean Owen] Trivial: remove unused interpolation var in log message (cherry picked from commit 3a5c4da473a8a497004dfe6eacc0e6646651b227) Signed-off-by: Reynold Xin r...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/90cf6863 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/90cf6863 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/90cf6863 Branch: refs/heads/branch-1.4 Commit: 90cf686386be08439a17d402fb4fd0a35e5a554f Parents: f02af7c Author: Sean Owen so...@cloudera.com Authored: Fri Jun 5 00:32:46 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Fri Jun 5 00:32:52 2015 -0700 -- .../spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala| 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/90cf6863/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala -- diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala index b2898b1..ab626be 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala @@ -98,7 +98,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp case None = // Ignoring the update since we don't know about the executor. logWarning(sIgnored task status update ($taskId state $state) + -from unknown executor $sender with ID $executorId) +sfrom unknown executor with ID $executorId) } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR] [BUILD] Use custom temp directory during build.
Repository: spark Updated Branches: refs/heads/branch-1.4 90cf68638 - 9b3e4c187 [MINOR] [BUILD] Use custom temp directory during build. Even with all the efforts to cleanup the temp directories created by unit tests, Spark leaves a lot of garbage in /tmp after a test run. This change overrides java.io.tmpdir to place those files under the build directory instead. After an sbt full unit test run, I was left with 400 MB of temp files. Since they're now under the build dir, it's much easier to clean them up. Also make a slight change to a unit test to make it not pollute the source directory with test data. Author: Marcelo Vanzin van...@cloudera.com Closes #6653 from vanzin/unit-test-tmp and squashes the following commits: 31e2dd5 [Marcelo Vanzin] Fix tests that depend on each other. aa92944 [Marcelo Vanzin] [minor] [build] Use custom temp directory during build. (cherry picked from commit b16b5434ff44c42e4b3a337f9af147669ba44896) Signed-off-by: Sean Owen so...@cloudera.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9b3e4c18 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9b3e4c18 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9b3e4c18 Branch: refs/heads/branch-1.4 Commit: 9b3e4c187123baa22666b8e119ddd55dafecbf89 Parents: 90cf686 Author: Marcelo Vanzin van...@cloudera.com Authored: Fri Jun 5 14:11:38 2015 +0200 Committer: Sean Owen so...@cloudera.com Committed: Fri Jun 5 14:12:05 2015 +0200 -- .../spark/deploy/SparkSubmitUtilsSuite.scala| 22 +++- pom.xml | 4 +++- project/SparkBuild.scala| 1 + 3 files changed, 16 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9b3e4c18/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala index 8fda5c8..07d261c 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala @@ -28,9 +28,12 @@ import org.apache.ivy.plugins.resolver.IBiblioResolver import org.apache.spark.SparkFunSuite import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate +import org.apache.spark.util.Utils class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { + private var tempIvyPath: String = _ + private val noOpOutputStream = new OutputStream { def write(b: Int) = {} } @@ -47,6 +50,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { super.beforeAll() // We don't want to write logs during testing SparkSubmitUtils.printStream = new BufferPrintStream +tempIvyPath = Utils.createTempDir(namePrefix = ivy).getAbsolutePath() } test(incorrect maven coordinate throws error) { @@ -90,21 +94,20 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { } test(ivy path works correctly) { -val ivyPath = dummy + File.separator + ivy val md = SparkSubmitUtils.getModuleDescriptor val artifacts = for (i - 0 until 3) yield new MDArtifact(md, sjar-$i, jar, jar) -var jPaths = SparkSubmitUtils.resolveDependencyPaths(artifacts.toArray, new File(ivyPath)) +var jPaths = SparkSubmitUtils.resolveDependencyPaths(artifacts.toArray, new File(tempIvyPath)) for (i - 0 until 3) { - val index = jPaths.indexOf(ivyPath) + val index = jPaths.indexOf(tempIvyPath) assert(index = 0) - jPaths = jPaths.substring(index + ivyPath.length) + jPaths = jPaths.substring(index + tempIvyPath.length) } val main = MavenCoordinate(my.awesome.lib, mylib, 0.1) IvyTestUtils.withRepository(main, None, None) { repo = // end to end val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, Option(repo), -Option(ivyPath), true) - assert(jarPath.indexOf(ivyPath) = 0, should use non-default ivy path) +Option(tempIvyPath), true) + assert(jarPath.indexOf(tempIvyPath) = 0, should use non-default ivy path) } } @@ -123,13 +126,12 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { assert(jarPath.indexOf(mylib) = 0, should find artifact) } // Local ivy repository with modified home -val dummyIvyPath = dummy + File.separator + ivy -val dummyIvyLocal = new File(dummyIvyPath, local + File.separator) +val dummyIvyLocal = new File(tempIvyPath, local + File.separator) IvyTestUtils.withRepository(main, None, Some(dummyIvyLocal), true) { repo =
spark git commit: [STREAMING] Update streaming-kafka-integration.md
Repository: spark Updated Branches: refs/heads/master b16b5434f - 019dc9f55 [STREAMING] Update streaming-kafka-integration.md Fixed the broken links (Examples) in the documentation. Author: Akhil Das ak...@darktech.ca Closes # from akhld/patch-2 and squashes the following commits: 2228b83 [Akhil Das] Update streaming-kafka-integration.md Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/019dc9f5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/019dc9f5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/019dc9f5 Branch: refs/heads/master Commit: 019dc9f558cf7c0b708d3b1f0882b0c19134ffb6 Parents: b16b543 Author: Akhil Das ak...@darktech.ca Authored: Fri Jun 5 14:23:23 2015 +0200 Committer: Sean Owen so...@cloudera.com Committed: Fri Jun 5 14:23:23 2015 +0200 -- docs/streaming-kafka-integration.md | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/019dc9f5/docs/streaming-kafka-integration.md -- diff --git a/docs/streaming-kafka-integration.md b/docs/streaming-kafka-integration.md index 64714f0..d6d5605 100644 --- a/docs/streaming-kafka-integration.md +++ b/docs/streaming-kafka-integration.md @@ -29,7 +29,7 @@ Next, we discuss how to use this approach in your streaming application. [ZK quorum], [consumer group id], [per-topic number of Kafka partitions to consume]) You can also specify the key and value classes and their corresponding decoder classes using variations of `createStream`. See the [API docs](api/scala/index.html#org.apache.spark.streaming.kafka.KafkaUtils$) - and the [example]({{site.SPARK_GITHUB_URL}}/blob/master/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala). + and the [example]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala). /div div data-lang=java markdown=1 import org.apache.spark.streaming.kafka.*; @@ -39,7 +39,7 @@ Next, we discuss how to use this approach in your streaming application. [ZK quorum], [consumer group id], [per-topic number of Kafka partitions to consume]); You can also specify the key and value classes and their corresponding decoder classes using variations of `createStream`. See the [API docs](api/java/index.html?org/apache/spark/streaming/kafka/KafkaUtils.html) - and the [example]({{site.SPARK_GITHUB_URL}}/blob/master/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaKafkaWordCount.java). + and the [example]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/java/org/apache/spark/examples/streaming/JavaKafkaWordCount.java). /div div data-lang=python markdown=1 @@ -105,7 +105,7 @@ Next, we discuss how to use this approach in your streaming application. streamingContext, [map of Kafka parameters], [set of topics to consume]) See the [API docs](api/scala/index.html#org.apache.spark.streaming.kafka.KafkaUtils$) - and the [example]({{site.SPARK_GITHUB_URL}}/blob/master/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala). + and the [example]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala). /div div data-lang=java markdown=1 import org.apache.spark.streaming.kafka.*; @@ -116,7 +116,7 @@ Next, we discuss how to use this approach in your streaming application. [map of Kafka parameters], [set of topics to consume]); See the [API docs](api/java/index.html?org/apache/spark/streaming/kafka/KafkaUtils.html) - and the [example]({{site.SPARK_GITHUB_URL}}/blob/master/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java). + and the [example]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java). /div /div @@ -153,4 +153,4 @@ Next, we discuss how to use this approach in your streaming application. Another thing to note is that since this approach does not use Receivers, the standard receiver-related (that is, [configurations](configuration.html) of the form `spark.streaming.receiver.*` ) will not apply to the input DStreams created by this approach (will apply to other input DStreams though). Instead, use the [configurations](configuration.html) `spark.streaming.kafka.*`. An important one is `spark.streaming.kafka.maxRatePerPartition` which is
spark git commit: [STREAMING] Update streaming-kafka-integration.md
Repository: spark Updated Branches: refs/heads/branch-1.4 9b3e4c187 - 0ef2e9d35 [STREAMING] Update streaming-kafka-integration.md Fixed the broken links (Examples) in the documentation. Author: Akhil Das ak...@darktech.ca Closes # from akhld/patch-2 and squashes the following commits: 2228b83 [Akhil Das] Update streaming-kafka-integration.md (cherry picked from commit 019dc9f558cf7c0b708d3b1f0882b0c19134ffb6) Signed-off-by: Sean Owen so...@cloudera.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0ef2e9d3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0ef2e9d3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0ef2e9d3 Branch: refs/heads/branch-1.4 Commit: 0ef2e9d351d1a250ee8f1fe844ce7f38e6664b20 Parents: 9b3e4c1 Author: Akhil Das ak...@darktech.ca Authored: Fri Jun 5 14:23:23 2015 +0200 Committer: Sean Owen so...@cloudera.com Committed: Fri Jun 5 14:24:06 2015 +0200 -- docs/streaming-kafka-integration.md | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0ef2e9d3/docs/streaming-kafka-integration.md -- diff --git a/docs/streaming-kafka-integration.md b/docs/streaming-kafka-integration.md index 64714f0..d6d5605 100644 --- a/docs/streaming-kafka-integration.md +++ b/docs/streaming-kafka-integration.md @@ -29,7 +29,7 @@ Next, we discuss how to use this approach in your streaming application. [ZK quorum], [consumer group id], [per-topic number of Kafka partitions to consume]) You can also specify the key and value classes and their corresponding decoder classes using variations of `createStream`. See the [API docs](api/scala/index.html#org.apache.spark.streaming.kafka.KafkaUtils$) - and the [example]({{site.SPARK_GITHUB_URL}}/blob/master/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala). + and the [example]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/scala/org/apache/spark/examples/streaming/KafkaWordCount.scala). /div div data-lang=java markdown=1 import org.apache.spark.streaming.kafka.*; @@ -39,7 +39,7 @@ Next, we discuss how to use this approach in your streaming application. [ZK quorum], [consumer group id], [per-topic number of Kafka partitions to consume]); You can also specify the key and value classes and their corresponding decoder classes using variations of `createStream`. See the [API docs](api/java/index.html?org/apache/spark/streaming/kafka/KafkaUtils.html) - and the [example]({{site.SPARK_GITHUB_URL}}/blob/master/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaKafkaWordCount.java). + and the [example]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/java/org/apache/spark/examples/streaming/JavaKafkaWordCount.java). /div div data-lang=python markdown=1 @@ -105,7 +105,7 @@ Next, we discuss how to use this approach in your streaming application. streamingContext, [map of Kafka parameters], [set of topics to consume]) See the [API docs](api/scala/index.html#org.apache.spark.streaming.kafka.KafkaUtils$) - and the [example]({{site.SPARK_GITHUB_URL}}/blob/master/examples/scala-2.10/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala). + and the [example]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/scala/org/apache/spark/examples/streaming/DirectKafkaWordCount.scala). /div div data-lang=java markdown=1 import org.apache.spark.streaming.kafka.*; @@ -116,7 +116,7 @@ Next, we discuss how to use this approach in your streaming application. [map of Kafka parameters], [set of topics to consume]); See the [API docs](api/java/index.html?org/apache/spark/streaming/kafka/KafkaUtils.html) - and the [example]({{site.SPARK_GITHUB_URL}}/blob/master/examples/scala-2.10/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java). + and the [example]({{site.SPARK_GITHUB_URL}}/blob/master/examples/src/main/java/org/apache/spark/examples/streaming/JavaDirectKafkaWordCount.java). /div /div @@ -153,4 +153,4 @@ Next, we discuss how to use this approach in your streaming application. Another thing to note is that since this approach does not use Receivers, the standard receiver-related (that is, [configurations](configuration.html) of the form `spark.streaming.receiver.*` ) will not apply to the input DStreams created by this approach (will apply to other input DStreams though). Instead, use the
spark git commit: [MINOR] [BUILD] Use custom temp directory during build.
Repository: spark Updated Branches: refs/heads/branch-1.3 5b96b6933 - 5185ea9b4 [MINOR] [BUILD] Use custom temp directory during build. Even with all the efforts to cleanup the temp directories created by unit tests, Spark leaves a lot of garbage in /tmp after a test run. This change overrides java.io.tmpdir to place those files under the build directory instead. After an sbt full unit test run, I was left with 400 MB of temp files. Since they're now under the build dir, it's much easier to clean them up. Also make a slight change to a unit test to make it not pollute the source directory with test data. Author: Marcelo Vanzin van...@cloudera.com Closes #6653 from vanzin/unit-test-tmp and squashes the following commits: 31e2dd5 [Marcelo Vanzin] Fix tests that depend on each other. aa92944 [Marcelo Vanzin] [minor] [build] Use custom temp directory during build. (cherry picked from commit b16b5434ff44c42e4b3a337f9af147669ba44896) Signed-off-by: Sean Owen so...@cloudera.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5185ea9b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5185ea9b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5185ea9b Branch: refs/heads/branch-1.3 Commit: 5185ea9b4df3ee73807859b70ddfca8f02f1a659 Parents: 5b96b69 Author: Marcelo Vanzin van...@cloudera.com Authored: Fri Jun 5 14:11:38 2015 +0200 Committer: Sean Owen so...@cloudera.com Committed: Fri Jun 5 14:16:05 2015 +0200 -- .../spark/deploy/SparkSubmitUtilsSuite.scala| 22 +++- pom.xml | 4 +++- project/SparkBuild.scala| 1 + 3 files changed, 16 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5185ea9b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala index a49b4db..ceff5d7 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala @@ -28,9 +28,12 @@ import org.scalatest.BeforeAndAfterAll import org.apache.spark.SparkFunSuite import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate +import org.apache.spark.util.Utils class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { + private var tempIvyPath: String = _ + private val noOpOutputStream = new OutputStream { def write(b: Int) = {} } @@ -47,6 +50,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { super.beforeAll() // We don't want to write logs during testing SparkSubmitUtils.printStream = new BufferPrintStream +tempIvyPath = Utils.createTempDir(namePrefix = ivy).getAbsolutePath() } test(incorrect maven coordinate throws error) { @@ -90,21 +94,20 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { } test(ivy path works correctly) { -val ivyPath = dummy + File.separator + ivy val md = SparkSubmitUtils.getModuleDescriptor val artifacts = for (i - 0 until 3) yield new MDArtifact(md, sjar-$i, jar, jar) -var jPaths = SparkSubmitUtils.resolveDependencyPaths(artifacts.toArray, new File(ivyPath)) +var jPaths = SparkSubmitUtils.resolveDependencyPaths(artifacts.toArray, new File(tempIvyPath)) for (i - 0 until 3) { - val index = jPaths.indexOf(ivyPath) + val index = jPaths.indexOf(tempIvyPath) assert(index = 0) - jPaths = jPaths.substring(index + ivyPath.length) + jPaths = jPaths.substring(index + tempIvyPath.length) } val main = MavenCoordinate(my.awesome.lib, mylib, 0.1) IvyTestUtils.withRepository(main, None, None) { repo = // end to end val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, Option(repo), -Option(ivyPath), true) - assert(jarPath.indexOf(ivyPath) = 0, should use non-default ivy path) +Option(tempIvyPath), true) + assert(jarPath.indexOf(tempIvyPath) = 0, should use non-default ivy path) } } @@ -123,13 +126,12 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { assert(jarPath.indexOf(mylib) = 0, should find artifact) } // Local ivy repository with modified home -val dummyIvyPath = dummy + File.separator + ivy -val dummyIvyLocal = new File(dummyIvyPath, local + File.separator) +val dummyIvyLocal = new File(tempIvyPath, local + File.separator) IvyTestUtils.withRepository(main, None, Some(dummyIvyLocal), true) { repo = val jarPath
spark git commit: [MINOR] [BUILD] Use custom temp directory during build.
Repository: spark Updated Branches: refs/heads/master da20c8ca3 - b16b5434f [MINOR] [BUILD] Use custom temp directory during build. Even with all the efforts to cleanup the temp directories created by unit tests, Spark leaves a lot of garbage in /tmp after a test run. This change overrides java.io.tmpdir to place those files under the build directory instead. After an sbt full unit test run, I was left with 400 MB of temp files. Since they're now under the build dir, it's much easier to clean them up. Also make a slight change to a unit test to make it not pollute the source directory with test data. Author: Marcelo Vanzin van...@cloudera.com Closes #6653 from vanzin/unit-test-tmp and squashes the following commits: 31e2dd5 [Marcelo Vanzin] Fix tests that depend on each other. aa92944 [Marcelo Vanzin] [minor] [build] Use custom temp directory during build. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b16b5434 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b16b5434 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b16b5434 Branch: refs/heads/master Commit: b16b5434ff44c42e4b3a337f9af147669ba44896 Parents: da20c8c Author: Marcelo Vanzin van...@cloudera.com Authored: Fri Jun 5 14:11:38 2015 +0200 Committer: Sean Owen so...@cloudera.com Committed: Fri Jun 5 14:11:38 2015 +0200 -- .../spark/deploy/SparkSubmitUtilsSuite.scala| 22 +++- pom.xml | 4 +++- project/SparkBuild.scala| 1 + 3 files changed, 16 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b16b5434/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala index 8fda5c8..07d261c 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala @@ -28,9 +28,12 @@ import org.apache.ivy.plugins.resolver.IBiblioResolver import org.apache.spark.SparkFunSuite import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate +import org.apache.spark.util.Utils class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { + private var tempIvyPath: String = _ + private val noOpOutputStream = new OutputStream { def write(b: Int) = {} } @@ -47,6 +50,7 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { super.beforeAll() // We don't want to write logs during testing SparkSubmitUtils.printStream = new BufferPrintStream +tempIvyPath = Utils.createTempDir(namePrefix = ivy).getAbsolutePath() } test(incorrect maven coordinate throws error) { @@ -90,21 +94,20 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { } test(ivy path works correctly) { -val ivyPath = dummy + File.separator + ivy val md = SparkSubmitUtils.getModuleDescriptor val artifacts = for (i - 0 until 3) yield new MDArtifact(md, sjar-$i, jar, jar) -var jPaths = SparkSubmitUtils.resolveDependencyPaths(artifacts.toArray, new File(ivyPath)) +var jPaths = SparkSubmitUtils.resolveDependencyPaths(artifacts.toArray, new File(tempIvyPath)) for (i - 0 until 3) { - val index = jPaths.indexOf(ivyPath) + val index = jPaths.indexOf(tempIvyPath) assert(index = 0) - jPaths = jPaths.substring(index + ivyPath.length) + jPaths = jPaths.substring(index + tempIvyPath.length) } val main = MavenCoordinate(my.awesome.lib, mylib, 0.1) IvyTestUtils.withRepository(main, None, None) { repo = // end to end val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, Option(repo), -Option(ivyPath), true) - assert(jarPath.indexOf(ivyPath) = 0, should use non-default ivy path) +Option(tempIvyPath), true) + assert(jarPath.indexOf(tempIvyPath) = 0, should use non-default ivy path) } } @@ -123,13 +126,12 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { assert(jarPath.indexOf(mylib) = 0, should find artifact) } // Local ivy repository with modified home -val dummyIvyPath = dummy + File.separator + ivy -val dummyIvyLocal = new File(dummyIvyPath, local + File.separator) +val dummyIvyLocal = new File(tempIvyPath, local + File.separator) IvyTestUtils.withRepository(main, None, Some(dummyIvyLocal), true) { repo = val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, None, -Some(dummyIvyPath), true) +
spark git commit: [SPARK-6324] [CORE] Centralize handling of script usage messages.
Repository: spark Updated Branches: refs/heads/master 019dc9f55 - 700312e12 [SPARK-6324] [CORE] Centralize handling of script usage messages. Reorganize code so that the launcher library handles most of the work of printing usage messages, instead of having an awkward protocol between the library and the scripts for that. This mostly applies to SparkSubmit, since the launcher lib does not do command line parsing for classes invoked in other ways, and thus cannot handle failures for those. Most scripts end up going through SparkSubmit, though, so it all works. The change adds a new, internal command line switch, --usage-error, which prints the usage message and exits with a non-zero status. Scripts can override the command printed in the usage message by setting an environment variable - this avoids having to grep the output of SparkSubmit to remove references to the spark-submit script. The only sub-optimal part of the change is the special handling for the spark-sql usage, which is now done in SparkSubmitArguments. Author: Marcelo Vanzin van...@cloudera.com Closes #5841 from vanzin/SPARK-6324 and squashes the following commits: 2821481 [Marcelo Vanzin] Merge branch 'master' into SPARK-6324 bf139b5 [Marcelo Vanzin] Filter output of Spark SQL CLI help. c6609bf [Marcelo Vanzin] Fix exit code never being used when printing usage messages. 6bc1b41 [Marcelo Vanzin] [SPARK-6324] [core] Centralize handling of script usage messages. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/700312e1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/700312e1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/700312e1 Branch: refs/heads/master Commit: 700312e12f9588f01a592d6eac7bff7eb366ac8f Parents: 019dc9f Author: Marcelo Vanzin van...@cloudera.com Authored: Fri Jun 5 14:32:00 2015 +0200 Committer: Sean Owen so...@cloudera.com Committed: Fri Jun 5 14:32:00 2015 +0200 -- bin/pyspark | 16 +--- bin/pyspark2.cmd| 1 + bin/spark-class | 13 +-- bin/spark-shell | 15 +--- bin/spark-shell2.cmd| 21 + bin/spark-sql | 39 + bin/spark-submit| 12 --- bin/spark-submit2.cmd | 13 +-- bin/sparkR | 18 + .../org/apache/spark/deploy/SparkSubmit.scala | 10 +-- .../spark/deploy/SparkSubmitArguments.scala | 76 +- .../apache/spark/deploy/SparkSubmitSuite.scala | 2 +- .../java/org/apache/spark/launcher/Main.java| 83 ++-- .../launcher/SparkSubmitCommandBuilder.java | 18 - .../spark/launcher/SparkSubmitOptionParser.java | 2 + 15 files changed, 147 insertions(+), 192 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/700312e1/bin/pyspark -- diff --git a/bin/pyspark b/bin/pyspark index 7cb19c5..f9dbddf 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -17,24 +17,10 @@ # limitations under the License. # -# Figure out where Spark is installed export SPARK_HOME=$(cd `dirname $0`/..; pwd) source $SPARK_HOME/bin/load-spark-env.sh - -function usage() { - if [ -n $1 ]; then -echo $1 - fi - echo Usage: ./bin/pyspark [options] 12 - $SPARK_HOME/bin/spark-submit --help 21 | grep -v Usage 12 - exit $2 -} -export -f usage - -if [[ $@ = *--help ]] || [[ $@ = *-h ]]; then - usage -fi +export _SPARK_CMD_USAGE=Usage: ./bin/pyspark [options] # In Spark = 1.1, setting IPYTHON=1 would cause the driver to be launched using the `ipython` # executable, while the worker would still be launched using PYSPARK_PYTHON. http://git-wip-us.apache.org/repos/asf/spark/blob/700312e1/bin/pyspark2.cmd -- diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd index 09b4149..45e9e3d 100644 --- a/bin/pyspark2.cmd +++ b/bin/pyspark2.cmd @@ -21,6 +21,7 @@ rem Figure out where the Spark framework is installed set SPARK_HOME=%~dp0.. call %SPARK_HOME%\bin\load-spark-env.cmd +set _SPARK_CMD_USAGE=Usage: bin\pyspark.cmd [options] rem Figure out which Python to use. if x%PYSPARK_DRIVER_PYTHON%==x ( http://git-wip-us.apache.org/repos/asf/spark/blob/700312e1/bin/spark-class -- diff --git a/bin/spark-class b/bin/spark-class index c49d97c..7bb1afe 100755 --- a/bin/spark-class +++ b/bin/spark-class @@ -16,18 +16,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # -set -e # Figure out where Spark is
spark git commit: [SPARK-8099] set executor cores into system in yarn-cluster mode
Repository: spark Updated Branches: refs/heads/master 4036d05ce - 0992a0a77 [SPARK-8099] set executor cores into system in yarn-cluster mode Author: Xutingjun xuting...@huawei.com Author: xutingjun xuting...@huawei.com Closes #6643 from XuTingjun/SPARK-8099 and squashes the following commits: 80b18cd [Xutingjun] change to STANDALONE | YARN ce33148 [Xutingjun] set executor cores into system e51cc9e [Xutingjun] set executor cores into system 0600861 [xutingjun] set executor cores into system Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0992a0a7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0992a0a7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0992a0a7 Branch: refs/heads/master Commit: 0992a0a77d38081c6c206bb34333013125d85376 Parents: 4036d05 Author: Xutingjun xuting...@huawei.com Authored: Fri Jun 5 11:41:39 2015 -0700 Committer: Sandy Ryza sa...@cloudera.com Committed: Fri Jun 5 11:41:39 2015 -0700 -- core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0992a0a7/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 3aa3f94..a0eae77 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -425,7 +425,6 @@ object SparkSubmit { // Yarn client only OptionAssigner(args.queue, YARN, CLIENT, sysProp = spark.yarn.queue), OptionAssigner(args.numExecutors, YARN, CLIENT, sysProp = spark.executor.instances), - OptionAssigner(args.executorCores, YARN, CLIENT, sysProp = spark.executor.cores), OptionAssigner(args.files, YARN, CLIENT, sysProp = spark.yarn.dist.files), OptionAssigner(args.archives, YARN, CLIENT, sysProp = spark.yarn.dist.archives), OptionAssigner(args.principal, YARN, CLIENT, sysProp = spark.yarn.principal), @@ -446,7 +445,7 @@ object SparkSubmit { OptionAssigner(args.keytab, YARN, CLUSTER, clOption = --keytab), // Other options - OptionAssigner(args.executorCores, STANDALONE, ALL_DEPLOY_MODES, + OptionAssigner(args.executorCores, STANDALONE | YARN, ALL_DEPLOY_MODES, sysProp = spark.executor.cores), OptionAssigner(args.executorMemory, STANDALONE | MESOS | YARN, ALL_DEPLOY_MODES, sysProp = spark.executor.memory), - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: Revert [MINOR] [BUILD] Use custom temp directory during build.
Repository: spark Updated Branches: refs/heads/master 12f5eaeee - 4036d05ce Revert [MINOR] [BUILD] Use custom temp directory during build. This reverts commit b16b5434ff44c42e4b3a337f9af147669ba44896. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4036d05c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4036d05c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4036d05c Branch: refs/heads/master Commit: 4036d05ceeec77ebfa9c683cbc699250df3e3895 Parents: 12f5eae Author: Andrew Or and...@databricks.com Authored: Fri Jun 5 10:53:32 2015 -0700 Committer: Andrew Or and...@databricks.com Committed: Fri Jun 5 10:53:32 2015 -0700 -- .../spark/deploy/SparkSubmitUtilsSuite.scala| 22 +--- pom.xml | 4 +--- project/SparkBuild.scala| 1 - 3 files changed, 11 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4036d05c/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala index 07d261c..8fda5c8 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala @@ -28,12 +28,9 @@ import org.apache.ivy.plugins.resolver.IBiblioResolver import org.apache.spark.SparkFunSuite import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate -import org.apache.spark.util.Utils class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { - private var tempIvyPath: String = _ - private val noOpOutputStream = new OutputStream { def write(b: Int) = {} } @@ -50,7 +47,6 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { super.beforeAll() // We don't want to write logs during testing SparkSubmitUtils.printStream = new BufferPrintStream -tempIvyPath = Utils.createTempDir(namePrefix = ivy).getAbsolutePath() } test(incorrect maven coordinate throws error) { @@ -94,20 +90,21 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { } test(ivy path works correctly) { +val ivyPath = dummy + File.separator + ivy val md = SparkSubmitUtils.getModuleDescriptor val artifacts = for (i - 0 until 3) yield new MDArtifact(md, sjar-$i, jar, jar) -var jPaths = SparkSubmitUtils.resolveDependencyPaths(artifacts.toArray, new File(tempIvyPath)) +var jPaths = SparkSubmitUtils.resolveDependencyPaths(artifacts.toArray, new File(ivyPath)) for (i - 0 until 3) { - val index = jPaths.indexOf(tempIvyPath) + val index = jPaths.indexOf(ivyPath) assert(index = 0) - jPaths = jPaths.substring(index + tempIvyPath.length) + jPaths = jPaths.substring(index + ivyPath.length) } val main = MavenCoordinate(my.awesome.lib, mylib, 0.1) IvyTestUtils.withRepository(main, None, None) { repo = // end to end val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, Option(repo), -Option(tempIvyPath), true) - assert(jarPath.indexOf(tempIvyPath) = 0, should use non-default ivy path) +Option(ivyPath), true) + assert(jarPath.indexOf(ivyPath) = 0, should use non-default ivy path) } } @@ -126,12 +123,13 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { assert(jarPath.indexOf(mylib) = 0, should find artifact) } // Local ivy repository with modified home -val dummyIvyLocal = new File(tempIvyPath, local + File.separator) +val dummyIvyPath = dummy + File.separator + ivy +val dummyIvyLocal = new File(dummyIvyPath, local + File.separator) IvyTestUtils.withRepository(main, None, Some(dummyIvyLocal), true) { repo = val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, None, -Some(tempIvyPath), true) +Some(dummyIvyPath), true) assert(jarPath.indexOf(mylib) = 0, should find artifact) - assert(jarPath.indexOf(tempIvyPath) = 0, should be in new ivy path) + assert(jarPath.indexOf(dummyIvyPath) = 0, should be in new ivy path) } } http://git-wip-us.apache.org/repos/asf/spark/blob/4036d05c/pom.xml -- diff --git a/pom.xml b/pom.xml index a848def..e28d4b9 100644 --- a/pom.xml +++ b/pom.xml @@ -179,7 +179,7 @@ parquet.deps.scopecompile/parquet.deps.scope !-- - Overridable test home. So that you can call individual pom files directly without +
spark git commit: Revert [MINOR] [BUILD] Use custom temp directory during build.
Repository: spark Updated Branches: refs/heads/branch-1.4 3e3151e75 - 429c65851 Revert [MINOR] [BUILD] Use custom temp directory during build. This reverts commit 9b3e4c187123baa22666b8e119ddd55dafecbf89. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/429c6585 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/429c6585 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/429c6585 Branch: refs/heads/branch-1.4 Commit: 429c658519760ba9860508c6006ab3e7aba70dfd Parents: 3e3151e Author: Andrew Or and...@databricks.com Authored: Fri Jun 5 10:54:06 2015 -0700 Committer: Andrew Or and...@databricks.com Committed: Fri Jun 5 10:54:06 2015 -0700 -- .../spark/deploy/SparkSubmitUtilsSuite.scala| 22 +--- pom.xml | 4 +--- project/SparkBuild.scala| 1 - 3 files changed, 11 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/429c6585/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala index 07d261c..8fda5c8 100644 --- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala @@ -28,12 +28,9 @@ import org.apache.ivy.plugins.resolver.IBiblioResolver import org.apache.spark.SparkFunSuite import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate -import org.apache.spark.util.Utils class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { - private var tempIvyPath: String = _ - private val noOpOutputStream = new OutputStream { def write(b: Int) = {} } @@ -50,7 +47,6 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { super.beforeAll() // We don't want to write logs during testing SparkSubmitUtils.printStream = new BufferPrintStream -tempIvyPath = Utils.createTempDir(namePrefix = ivy).getAbsolutePath() } test(incorrect maven coordinate throws error) { @@ -94,20 +90,21 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { } test(ivy path works correctly) { +val ivyPath = dummy + File.separator + ivy val md = SparkSubmitUtils.getModuleDescriptor val artifacts = for (i - 0 until 3) yield new MDArtifact(md, sjar-$i, jar, jar) -var jPaths = SparkSubmitUtils.resolveDependencyPaths(artifacts.toArray, new File(tempIvyPath)) +var jPaths = SparkSubmitUtils.resolveDependencyPaths(artifacts.toArray, new File(ivyPath)) for (i - 0 until 3) { - val index = jPaths.indexOf(tempIvyPath) + val index = jPaths.indexOf(ivyPath) assert(index = 0) - jPaths = jPaths.substring(index + tempIvyPath.length) + jPaths = jPaths.substring(index + ivyPath.length) } val main = MavenCoordinate(my.awesome.lib, mylib, 0.1) IvyTestUtils.withRepository(main, None, None) { repo = // end to end val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, Option(repo), -Option(tempIvyPath), true) - assert(jarPath.indexOf(tempIvyPath) = 0, should use non-default ivy path) +Option(ivyPath), true) + assert(jarPath.indexOf(ivyPath) = 0, should use non-default ivy path) } } @@ -126,12 +123,13 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll { assert(jarPath.indexOf(mylib) = 0, should find artifact) } // Local ivy repository with modified home -val dummyIvyLocal = new File(tempIvyPath, local + File.separator) +val dummyIvyPath = dummy + File.separator + ivy +val dummyIvyLocal = new File(dummyIvyPath, local + File.separator) IvyTestUtils.withRepository(main, None, Some(dummyIvyLocal), true) { repo = val jarPath = SparkSubmitUtils.resolveMavenCoordinates(main.toString, None, -Some(tempIvyPath), true) +Some(dummyIvyPath), true) assert(jarPath.indexOf(mylib) = 0, should find artifact) - assert(jarPath.indexOf(tempIvyPath) = 0, should be in new ivy path) + assert(jarPath.indexOf(dummyIvyPath) = 0, should be in new ivy path) } } http://git-wip-us.apache.org/repos/asf/spark/blob/429c6585/pom.xml -- diff --git a/pom.xml b/pom.xml index ac38e8c..c873f68 100644 --- a/pom.xml +++ b/pom.xml @@ -180,7 +180,7 @@ parquet.deps.scopecompile/parquet.deps.scope !-- - Overridable test home. So that you can call individual pom files directly without +
spark git commit: [SPARK-8085] [SPARKR] Support user-specified schema in read.df
Repository: spark Updated Branches: refs/heads/branch-1.4 0ef2e9d35 - 3e3151e75 [SPARK-8085] [SPARKR] Support user-specified schema in read.df cc davies sun-rui Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Closes #6620 from shivaram/sparkr-read-schema and squashes the following commits: 16a6726 [Shivaram Venkataraman] Fix loadDF to pass schema Also add a unit test a229877 [Shivaram Venkataraman] Use wrapper function to DataFrameReader ee70ba8 [Shivaram Venkataraman] Support user-specified schema in read.df (cherry picked from commit 12f5eaeee1235850a076ce5716d069bd2f1205a5) Signed-off-by: Shivaram Venkataraman shiva...@cs.berkeley.edu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3e3151e7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3e3151e7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3e3151e7 Branch: refs/heads/branch-1.4 Commit: 3e3151e755dd68aa9a75188d6ecb968c7c1dff24 Parents: 0ef2e9d Author: Shivaram Venkataraman shiva...@cs.berkeley.edu Authored: Fri Jun 5 10:19:03 2015 -0700 Committer: Shivaram Venkataraman shiva...@cs.berkeley.edu Committed: Fri Jun 5 10:19:15 2015 -0700 -- R/pkg/R/SQLContext.R | 14 ++ R/pkg/inst/tests/test_sparkSQL.R | 13 + .../scala/org/apache/spark/sql/api/r/SQLUtils.scala | 15 +++ 3 files changed, 38 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3e3151e7/R/pkg/R/SQLContext.R -- diff --git a/R/pkg/R/SQLContext.R b/R/pkg/R/SQLContext.R index 88e1a50..22a4b5b 100644 --- a/R/pkg/R/SQLContext.R +++ b/R/pkg/R/SQLContext.R @@ -452,7 +452,7 @@ dropTempTable - function(sqlContext, tableName) { #' df - read.df(sqlContext, path/to/file.json, source = json) #' } -read.df - function(sqlContext, path = NULL, source = NULL, ...) { +read.df - function(sqlContext, path = NULL, source = NULL, schema = NULL, ...) { options - varargsToEnv(...) if (!is.null(path)) { options[['path']] - path @@ -462,15 +462,21 @@ read.df - function(sqlContext, path = NULL, source = NULL, ...) { source - callJMethod(sqlContext, getConf, spark.sql.sources.default, org.apache.spark.sql.parquet) } - sdf - callJMethod(sqlContext, load, source, options) + if (!is.null(schema)) { +stopifnot(class(schema) == structType) +sdf - callJStatic(org.apache.spark.sql.api.r.SQLUtils, loadDF, sqlContext, source, + schema$jobj, options) + } else { +sdf - callJStatic(org.apache.spark.sql.api.r.SQLUtils, loadDF, sqlContext, source, options) + } dataFrame(sdf) } #' @aliases loadDF #' @export -loadDF - function(sqlContext, path = NULL, source = NULL, ...) { - read.df(sqlContext, path, source, ...) +loadDF - function(sqlContext, path = NULL, source = NULL, schema = NULL, ...) { + read.df(sqlContext, path, source, schema, ...) } #' Create an external table http://git-wip-us.apache.org/repos/asf/spark/blob/3e3151e7/R/pkg/inst/tests/test_sparkSQL.R -- diff --git a/R/pkg/inst/tests/test_sparkSQL.R b/R/pkg/inst/tests/test_sparkSQL.R index d2d82e7..30edfc8 100644 --- a/R/pkg/inst/tests/test_sparkSQL.R +++ b/R/pkg/inst/tests/test_sparkSQL.R @@ -504,6 +504,19 @@ test_that(read.df() from json file, { df - read.df(sqlContext, jsonPath, json) expect_true(inherits(df, DataFrame)) expect_true(count(df) == 3) + + # Check if we can apply a user defined schema + schema - structType(structField(name, type = string), + structField(age, type = double)) + + df1 - read.df(sqlContext, jsonPath, json, schema) + expect_true(inherits(df1, DataFrame)) + expect_equal(dtypes(df1), list(c(name, string), c(age, double))) + + # Run the same with loadDF + df2 - loadDF(sqlContext, jsonPath, json, schema) + expect_true(inherits(df2, DataFrame)) + expect_equal(dtypes(df2), list(c(name, string), c(age, double))) }) test_that(write.df() as parquet file, { http://git-wip-us.apache.org/repos/asf/spark/blob/3e3151e7/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala index 604f312..43b62f0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala @@ -139,4 +139,19 @@ private[r] object SQLUtils { case ignore = SaveMode.Ignore } } + + def loadDF( + sqlContext: SQLContext, +
spark git commit: [SPARK-7699] [CORE] Lazy start the scheduler for dynamic allocation
Repository: spark Updated Branches: refs/heads/master 0992a0a77 - 3f80bc841 [SPARK-7699] [CORE] Lazy start the scheduler for dynamic allocation This patch propose to lazy start the scheduler for dynamic allocation to avoid fast ramp down executor numbers is load is less. This implementation will: 1. immediately start the scheduler is `numExecutorsTarget` is 0, this is the expected behavior. 2. if `numExecutorsTarget` is not zero, start the scheduler until the number is satisfied, if the load is less, this initial started executors will last for at least 60 seconds, user will have a window to submit a job, no need to revamp the executors. 3. if `numExecutorsTarget` is not satisfied until the timeout, this means resource is not enough, the scheduler will start until this timeout, will not wait infinitely. Please help to review, thanks a lot. Author: jerryshao saisai.s...@intel.com Closes #6430 from jerryshao/SPARK-7699 and squashes the following commits: 02cac8e [jerryshao] Address the comments 7242450 [jerryshao] Remove the useless import ecc0b00 [jerryshao] Address the comments 6f75f00 [jerryshao] Style changes 8b8decc [jerryshao] change the test name fb822ca [jerryshao] Change the solution according to comments 1cc74e5 [jerryshao] Lazy start the scheduler for dynamic allocation Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3f80bc84 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3f80bc84 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3f80bc84 Branch: refs/heads/master Commit: 3f80bc841ab155925fb0530eef5927990f4a5793 Parents: 0992a0a Author: jerryshao saisai.s...@intel.com Authored: Fri Jun 5 12:28:37 2015 -0700 Committer: Sandy Ryza sa...@cloudera.com Committed: Fri Jun 5 12:28:37 2015 -0700 -- .../spark/ExecutorAllocationManager.scala | 17 +++- .../spark/ExecutorAllocationManagerSuite.scala | 90 2 files changed, 89 insertions(+), 18 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3f80bc84/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala -- diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala index f7323a4..9939103 100644 --- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala +++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala @@ -150,6 +150,13 @@ private[spark] class ExecutorAllocationManager( // Metric source for ExecutorAllocationManager to expose internal status to MetricsSystem. val executorAllocationManagerSource = new ExecutorAllocationManagerSource + // Whether we are still waiting for the initial set of executors to be allocated. + // While this is true, we will not cancel outstanding executor requests. This is + // set to false when: + // (1) a stage is submitted, or + // (2) an executor idle timeout has elapsed. + @volatile private var initializing: Boolean = true + /** * Verify that the settings specified through the config are valid. * If not, throw an appropriate exception. @@ -240,6 +247,7 @@ private[spark] class ExecutorAllocationManager( removeTimes.retain { case (executorId, expireTime) = val expired = now = expireTime if (expired) { +initializing = false removeExecutor(executorId) } !expired @@ -261,7 +269,11 @@ private[spark] class ExecutorAllocationManager( private def updateAndSyncNumExecutorsTarget(now: Long): Int = synchronized { val maxNeeded = maxNumExecutorsNeeded -if (maxNeeded numExecutorsTarget) { +if (initializing) { + // Do not change our target while we are still initializing, + // Otherwise the first job may have to ramp up unnecessarily + 0 +} else if (maxNeeded numExecutorsTarget) { // The target number exceeds the number we actually need, so stop adding new // executors and inform the cluster manager to cancel the extra pending requests val oldNumExecutorsTarget = numExecutorsTarget @@ -271,7 +283,7 @@ private[spark] class ExecutorAllocationManager( // If the new target has not changed, avoid sending a message to the cluster manager if (numExecutorsTarget oldNumExecutorsTarget) { client.requestTotalExecutors(numExecutorsTarget) -logInfo(sLowering target number of executors to $numExecutorsTarget (previously + +logDebug(sLowering target number of executors to $numExecutorsTarget (previously + s$oldNumExecutorsTarget) because not all requested executors are actually needed) } numExecutorsTarget - oldNumExecutorsTarget @@ -481,6 +493,7 @@
spark git commit: [SPARK-7991] [PySpark] Adding support for passing lists to describe.
Repository: spark Updated Branches: refs/heads/master 4060526cd - 356a4a9b9 [SPARK-7991] [PySpark] Adding support for passing lists to describe. This is a minor change. Author: amey a...@skytree.net Closes #6655 from ameyc/JIRA-7991/support-passing-list-to-describe and squashes the following commits: e8a1dff [amey] Adding support for passing lists to describe. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/356a4a9b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/356a4a9b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/356a4a9b Branch: refs/heads/master Commit: 356a4a9b93a1eeedb910c6bccc0abadf59e4877f Parents: 4060526 Author: amey a...@skytree.net Authored: Fri Jun 5 13:49:33 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Fri Jun 5 13:49:33 2015 -0700 -- python/pyspark/sql/dataframe.py | 12 1 file changed, 12 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/356a4a9b/python/pyspark/sql/dataframe.py -- diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 03b01a1..902504d 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -616,7 +616,19 @@ class DataFrame(object): |min| 2| |max| 5| +---+---+ + df.describe(['age', 'name']).show() ++---+---+-+ +|summary|age| name| ++---+---+-+ +| count| 2|2| +| mean|3.5| null| +| stddev|1.5| null| +|min| 2|Alice| +|max| 5| Bob| ++---+---+-+ +if len(cols) == 1 and isinstance(cols[0], list): +cols = cols[0] jdf = self._jdf.describe(self._jseq(cols)) return DataFrame(jdf, self.sql_ctx) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-8114][SQL] Remove some wildcard import on TestSQLContext._ cont'd.
Repository: spark Updated Branches: refs/heads/master 356a4a9b9 - 6ebe419f3 [SPARK-8114][SQL] Remove some wildcard import on TestSQLContext._ cont'd. Fixed the following packages: sql.columnar sql.jdbc sql.json sql.parquet Author: Reynold Xin r...@databricks.com Closes #6667 from rxin/testsqlcontext_wildcard and squashes the following commits: 134a776 [Reynold Xin] Fixed compilation break. 6da7b69 [Reynold Xin] [SPARK-8114][SQL] Remove some wildcard import on TestSQLContext._ cont'd. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6ebe419f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6ebe419f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6ebe419f Branch: refs/heads/master Commit: 6ebe419f335fcfb66dd3da74baf35eb5b2fc061d Parents: 356a4a9 Author: Reynold Xin r...@databricks.com Authored: Fri Jun 5 13:57:21 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Fri Jun 5 13:57:21 2015 -0700 -- .../columnar/InMemoryColumnarQuerySuite.scala | 40 + .../columnar/PartitionBatchPruningSuite.scala | 28 +++--- .../org/apache/spark/sql/jdbc/JDBCSuite.scala | 45 +- .../apache/spark/sql/jdbc/JDBCWriteSuite.scala | 75 .../org/apache/spark/sql/json/JsonSuite.scala | 95 ++-- .../apache/spark/sql/json/TestJsonData.scala| 62 ++--- .../spark/sql/parquet/ParquetFilterSuite.scala | 7 +- .../spark/sql/parquet/ParquetIOSuite.scala | 40 - .../ParquetPartitionDiscoverySuite.scala| 27 +++--- .../spark/sql/parquet/ParquetQuerySuite.scala | 24 ++--- .../spark/sql/parquet/ParquetSchemaSuite.scala | 3 +- .../apache/spark/sql/parquet/ParquetTest.scala | 6 +- .../apache/spark/sql/test/SQLTestUtils.scala| 14 ++- .../spark/sql/hive/orc/OrcQuerySuite.scala | 5 +- .../org/apache/spark/sql/hive/orc/OrcTest.scala | 8 +- 15 files changed, 234 insertions(+), 245 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6ebe419f/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala index 055453e..fa3b814 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/columnar/InMemoryColumnarQuerySuite.scala @@ -21,8 +21,6 @@ import java.sql.{Date, Timestamp} import org.apache.spark.sql.TestData._ import org.apache.spark.sql.catalyst.expressions.Row -import org.apache.spark.sql.test.TestSQLContext._ -import org.apache.spark.sql.test.TestSQLContext.implicits._ import org.apache.spark.sql.types._ import org.apache.spark.sql.{QueryTest, TestData} import org.apache.spark.storage.StorageLevel.MEMORY_ONLY @@ -31,8 +29,12 @@ class InMemoryColumnarQuerySuite extends QueryTest { // Make sure the tables are loaded. TestData + private lazy val ctx = org.apache.spark.sql.test.TestSQLContext + import ctx.implicits._ + import ctx.{logicalPlanToSparkQuery, sql} + test(simple columnar query) { -val plan = executePlan(testData.logicalPlan).executedPlan +val plan = ctx.executePlan(testData.logicalPlan).executedPlan val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan, None) checkAnswer(scan, testData.collect().toSeq) @@ -40,16 +42,16 @@ class InMemoryColumnarQuerySuite extends QueryTest { test(default size avoids broadcast) { // TODO: Improve this test when we have better statistics -sparkContext.parallelize(1 to 10).map(i = TestData(i, i.toString)) +ctx.sparkContext.parallelize(1 to 10).map(i = TestData(i, i.toString)) .toDF().registerTempTable(sizeTst) -cacheTable(sizeTst) +ctx.cacheTable(sizeTst) assert( - table(sizeTst).queryExecution.analyzed.statistics.sizeInBytes -conf.autoBroadcastJoinThreshold) + ctx.table(sizeTst).queryExecution.analyzed.statistics.sizeInBytes +ctx.conf.autoBroadcastJoinThreshold) } test(projection) { -val plan = executePlan(testData.select('value, 'key).logicalPlan).executedPlan +val plan = ctx.executePlan(testData.select('value, 'key).logicalPlan).executedPlan val scan = InMemoryRelation(useCompression = true, 5, MEMORY_ONLY, plan, None) checkAnswer(scan, testData.collect().map { @@ -58,7 +60,7 @@ class InMemoryColumnarQuerySuite extends QueryTest { } test(SPARK-1436 regression: in-memory columns must be able to be accessed multiple times) { -val plan = executePlan(testData.logicalPlan).executedPlan +val
spark git commit: [SPARK-8112] [STREAMING] Fix the negative event count issue
Repository: spark Updated Branches: refs/heads/master 3f80bc841 - 4f16d3fe2 [SPARK-8112] [STREAMING] Fix the negative event count issue Author: zsxwing zsxw...@gmail.com Closes #6659 from zsxwing/SPARK-8112 and squashes the following commits: a5d7da6 [zsxwing] Address comments d255b6e [zsxwing] Fix the negative event count issue Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f16d3fe Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f16d3fe Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f16d3fe Branch: refs/heads/master Commit: 4f16d3fe2e260a716b5b4e4005cb6229386440ed Parents: 3f80bc8 Author: zsxwing zsxw...@gmail.com Authored: Fri Jun 5 12:46:02 2015 -0700 Committer: Tathagata Das tathagata.das1...@gmail.com Committed: Fri Jun 5 12:46:02 2015 -0700 -- .../apache/spark/streaming/dstream/ReceiverInputDStream.scala| 2 +- .../apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala | 4 ++-- .../org/apache/spark/streaming/scheduler/InputInfoTracker.scala | 4 +++- .../org/apache/spark/streaming/scheduler/ReceivedBlockInfo.scala | 4 +++- .../org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala | 2 +- 5 files changed, 10 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4f16d3fe/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala index e4ff05e..e76e7eb 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala @@ -70,7 +70,7 @@ abstract class ReceiverInputDStream[T: ClassTag](@transient ssc_ : StreamingCont val blockIds = blockInfos.map { _.blockId.asInstanceOf[BlockId] }.toArray // Register the input blocks information into InputInfoTracker -val inputInfo = InputInfo(id, blockInfos.map(_.numRecords).sum) +val inputInfo = InputInfo(id, blockInfos.flatMap(_.numRecords).sum) ssc.scheduler.inputInfoTracker.reportInfo(validTime, inputInfo) if (blockInfos.nonEmpty) { http://git-wip-us.apache.org/repos/asf/spark/blob/4f16d3fe/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala index 9293837..8be732b 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala @@ -138,8 +138,8 @@ private[streaming] class ReceiverSupervisorImpl( ) { val blockId = blockIdOption.getOrElse(nextBlockId) val numRecords = receivedBlock match { - case ArrayBufferBlock(arrayBuffer) = arrayBuffer.size - case _ = -1 + case ArrayBufferBlock(arrayBuffer) = Some(arrayBuffer.size.toLong) + case _ = None } val time = System.currentTimeMillis http://git-wip-us.apache.org/repos/asf/spark/blob/4f16d3fe/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala index a72efcc..7c0db8a 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala @@ -23,7 +23,9 @@ import org.apache.spark.Logging import org.apache.spark.streaming.{Time, StreamingContext} /** To track the information of input stream at specified batch time. */ -private[streaming] case class InputInfo(inputStreamId: Int, numRecords: Long) +private[streaming] case class InputInfo(inputStreamId: Int, numRecords: Long) { + require(numRecords = 0, numRecords must not be negative) +} /** * This class manages all the input streams as well as their input data statistics. The information http://git-wip-us.apache.org/repos/asf/spark/blob/4f16d3fe/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockInfo.scala -- diff --git
spark git commit: [SPARK-8112] [STREAMING] Fix the negative event count issue
Repository: spark Updated Branches: refs/heads/branch-1.4 429c65851 - 200c980a1 [SPARK-8112] [STREAMING] Fix the negative event count issue Author: zsxwing zsxw...@gmail.com Closes #6659 from zsxwing/SPARK-8112 and squashes the following commits: a5d7da6 [zsxwing] Address comments d255b6e [zsxwing] Fix the negative event count issue (cherry picked from commit 4f16d3fe2e260a716b5b4e4005cb6229386440ed) Signed-off-by: Tathagata Das tathagata.das1...@gmail.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/200c980a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/200c980a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/200c980a Branch: refs/heads/branch-1.4 Commit: 200c980a13ce2c090df7b51925da1027cbe1bc7f Parents: 429c658 Author: zsxwing zsxw...@gmail.com Authored: Fri Jun 5 12:46:02 2015 -0700 Committer: Tathagata Das tathagata.das1...@gmail.com Committed: Fri Jun 5 12:46:15 2015 -0700 -- .../apache/spark/streaming/dstream/ReceiverInputDStream.scala| 2 +- .../apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala | 4 ++-- .../org/apache/spark/streaming/scheduler/InputInfoTracker.scala | 4 +++- .../org/apache/spark/streaming/scheduler/ReceivedBlockInfo.scala | 4 +++- .../org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala | 2 +- 5 files changed, 10 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/200c980a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala index e4ff05e..e76e7eb 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala @@ -70,7 +70,7 @@ abstract class ReceiverInputDStream[T: ClassTag](@transient ssc_ : StreamingCont val blockIds = blockInfos.map { _.blockId.asInstanceOf[BlockId] }.toArray // Register the input blocks information into InputInfoTracker -val inputInfo = InputInfo(id, blockInfos.map(_.numRecords).sum) +val inputInfo = InputInfo(id, blockInfos.flatMap(_.numRecords).sum) ssc.scheduler.inputInfoTracker.reportInfo(validTime, inputInfo) if (blockInfos.nonEmpty) { http://git-wip-us.apache.org/repos/asf/spark/blob/200c980a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala index 9293837..8be732b 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala @@ -138,8 +138,8 @@ private[streaming] class ReceiverSupervisorImpl( ) { val blockId = blockIdOption.getOrElse(nextBlockId) val numRecords = receivedBlock match { - case ArrayBufferBlock(arrayBuffer) = arrayBuffer.size - case _ = -1 + case ArrayBufferBlock(arrayBuffer) = Some(arrayBuffer.size.toLong) + case _ = None } val time = System.currentTimeMillis http://git-wip-us.apache.org/repos/asf/spark/blob/200c980a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala -- diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala index a72efcc..7c0db8a 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala @@ -23,7 +23,9 @@ import org.apache.spark.Logging import org.apache.spark.streaming.{Time, StreamingContext} /** To track the information of input stream at specified batch time. */ -private[streaming] case class InputInfo(inputStreamId: Int, numRecords: Long) +private[streaming] case class InputInfo(inputStreamId: Int, numRecords: Long) { + require(numRecords = 0, numRecords must not be negative) +} /** * This class manages all the input streams as well as their input data statistics. The information
spark git commit: [SPARK-7747] [SQL] [DOCS] spark.sql.planner.externalSort
Repository: spark Updated Branches: refs/heads/master 4f16d3fe2 - 4060526cd [SPARK-7747] [SQL] [DOCS] spark.sql.planner.externalSort Add documentation for spark.sql.planner.externalSort Author: Luca Martinetti l...@luca.io Closes #6272 from lucamartinetti/docs-externalsort and squashes the following commits: 985661b [Luca Martinetti] [SPARK-7747] [SQL] [DOCS] Add documentation for spark.sql.planner.externalSort Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4060526c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4060526c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4060526c Branch: refs/heads/master Commit: 4060526cd3b7e9ba345ce94f6e081cc1156e53ab Parents: 4f16d3f Author: Luca Martinetti l...@luca.io Authored: Fri Jun 5 13:40:11 2015 -0700 Committer: Yin Huai yh...@databricks.com Committed: Fri Jun 5 13:40:11 2015 -0700 -- docs/sql-programming-guide.md | 7 +++ 1 file changed, 7 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4060526c/docs/sql-programming-guide.md -- diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index 282ea75..cde5830 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -1785,6 +1785,13 @@ that these options will be deprecated in future release as more optimizations ar Configures the number of partitions to use when shuffling data for joins or aggregations. /td /tr + tr +tdcodespark.sql.planner.externalSort/code/td +tdfalse/td +td + When true, performs sorts spilling to disk as needed otherwise sort each partition in memory. +/td + /tr /table # Distributed SQL Engine - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7747] [SQL] [DOCS] spark.sql.planner.externalSort
Repository: spark Updated Branches: refs/heads/branch-1.4 200c980a1 - 94f65bcce [SPARK-7747] [SQL] [DOCS] spark.sql.planner.externalSort Add documentation for spark.sql.planner.externalSort Author: Luca Martinetti l...@luca.io Closes #6272 from lucamartinetti/docs-externalsort and squashes the following commits: 985661b [Luca Martinetti] [SPARK-7747] [SQL] [DOCS] Add documentation for spark.sql.planner.externalSort (cherry picked from commit 4060526cd3b7e9ba345ce94f6e081cc1156e53ab) Signed-off-by: Yin Huai yh...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/94f65bcc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/94f65bcc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/94f65bcc Branch: refs/heads/branch-1.4 Commit: 94f65bcceebd2850e0dc446a1b56bb01d54d19d7 Parents: 200c980 Author: Luca Martinetti l...@luca.io Authored: Fri Jun 5 13:40:11 2015 -0700 Committer: Yin Huai yh...@databricks.com Committed: Fri Jun 5 13:41:52 2015 -0700 -- docs/sql-programming-guide.md | 7 +++ 1 file changed, 7 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/94f65bcc/docs/sql-programming-guide.md -- diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index 282ea75..cde5830 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -1785,6 +1785,13 @@ that these options will be deprecated in future release as more optimizations ar Configures the number of partitions to use when shuffling data for joins or aggregations. /td /tr + tr +tdcodespark.sql.planner.externalSort/code/td +tdfalse/td +td + When true, performs sorts spilling to disk as needed otherwise sort each partition in memory. +/td + /tr /table # Distributed SQL Engine - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7991] [PySpark] Adding support for passing lists to describe.
Repository: spark Updated Branches: refs/heads/branch-1.4 94f65bcce - 84523fc38 [SPARK-7991] [PySpark] Adding support for passing lists to describe. This is a minor change. Author: amey a...@skytree.net Closes #6655 from ameyc/JIRA-7991/support-passing-list-to-describe and squashes the following commits: e8a1dff [amey] Adding support for passing lists to describe. (cherry picked from commit 356a4a9b93a1eeedb910c6bccc0abadf59e4877f) Signed-off-by: Reynold Xin r...@databricks.com Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/84523fc3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/84523fc3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/84523fc3 Branch: refs/heads/branch-1.4 Commit: 84523fc387a57f45b649123a03fcd9dd1ebb61a1 Parents: 94f65bc Author: amey a...@skytree.net Authored: Fri Jun 5 13:49:33 2015 -0700 Committer: Reynold Xin r...@databricks.com Committed: Fri Jun 5 13:49:55 2015 -0700 -- python/pyspark/sql/dataframe.py | 12 1 file changed, 12 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/84523fc3/python/pyspark/sql/dataframe.py -- diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index 03b01a1..902504d 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -616,7 +616,19 @@ class DataFrame(object): |min| 2| |max| 5| +---+---+ + df.describe(['age', 'name']).show() ++---+---+-+ +|summary|age| name| ++---+---+-+ +| count| 2|2| +| mean|3.5| null| +| stddev|1.5| null| +|min| 2|Alice| +|max| 5| Bob| ++---+---+-+ +if len(cols) == 1 and isinstance(cols[0], list): +cols = cols[0] jdf = self._jdf.describe(self._jseq(cols)) return DataFrame(jdf, self.sql_ctx) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-6964] [SQL] Support Cancellation in the Thrift Server
Repository: spark Updated Branches: refs/heads/master 6ebe419f3 - eb19d3f75 [SPARK-6964] [SQL] Support Cancellation in the Thrift Server Support runInBackground in SparkExecuteStatementOperation, and add cancellation Author: Dong Wang d...@databricks.com Closes #6207 from dongwang218/SPARK-6964-jdbc-cancel and squashes the following commits: 687c113 [Dong Wang] fix 100 characters 7bfa2a7 [Dong Wang] fix merge 380480f [Dong Wang] fix for liancheng's comments eb3e385 [Dong Wang] small nit 341885b [Dong Wang] small fix 3d8ebf8 [Dong Wang] add spark.sql.hive.thriftServer.async flag 04142c3 [Dong Wang] set SQLSession for async execution 184ec35 [Dong Wang] keep hive conf 819ae03 [Dong Wang] [SPARK-6964][SQL][WIP] Support Cancellation in the Thrift Server Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/eb19d3f7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/eb19d3f7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/eb19d3f7 Branch: refs/heads/master Commit: eb19d3f75cbd002f7e72ce02017a8de67f562792 Parents: 6ebe419 Author: Dong Wang d...@databricks.com Authored: Fri Jun 5 17:41:12 2015 -0700 Committer: Yin Huai yh...@databricks.com Committed: Fri Jun 5 17:41:12 2015 -0700 -- .../scala/org/apache/spark/sql/SQLContext.scala | 5 + .../SparkExecuteStatementOperation.scala| 164 +-- .../server/SparkSQLOperationManager.scala | 7 +- .../thriftserver/HiveThriftServer2Suites.scala | 42 - .../org/apache/spark/sql/hive/HiveContext.scala | 6 + 5 files changed, 208 insertions(+), 16 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/eb19d3f7/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 0aab7fa..ddb5402 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -916,6 +916,11 @@ class SQLContext(@transient val sparkContext: SparkContext) tlSession.remove() } + protected[sql] def setSession(session: SQLSession): Unit = { +detachSession() +tlSession.set(session) + } + protected[sql] class SQLSession { // Note that this is a lazy val so we can override the default value in subclasses. protected[sql] lazy val conf: SQLConf = new SQLConf http://git-wip-us.apache.org/repos/asf/spark/blob/eb19d3f7/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala -- diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala index c0d1266..e071103 100644 --- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala +++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala @@ -17,11 +17,23 @@ package org.apache.spark.sql.hive.thriftserver +import java.security.PrivilegedExceptionAction import java.sql.{Date, Timestamp} +import java.util.concurrent.RejectedExecutionException import java.util.{Map = JMap, UUID} +import scala.collection.JavaConversions._ +import scala.collection.mutable.{ArrayBuffer, Map = SMap} +import scala.util.control.NonFatal + +import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.metastore.api.FieldSchema import org.apache.hive.service.cli._ +import org.apache.hadoop.hive.ql.metadata.Hive +import org.apache.hadoop.hive.ql.metadata.HiveException +import org.apache.hadoop.hive.ql.session.SessionState +import org.apache.hadoop.hive.shims.ShimLoader +import org.apache.hadoop.security.UserGroupInformation import org.apache.hive.service.cli.operation.ExecuteStatementOperation import org.apache.hive.service.cli.session.HiveSession @@ -31,8 +43,6 @@ import org.apache.spark.sql.hive.{HiveContext, HiveMetastoreTypes} import org.apache.spark.sql.types._ import org.apache.spark.sql.{DataFrame, Row = SparkRow, SQLConf} -import scala.collection.JavaConversions._ -import scala.collection.mutable.{ArrayBuffer, Map = SMap} private[hive] class SparkExecuteStatementOperation( parentSession: HiveSession, @@ -40,17 +50,19 @@ private[hive] class SparkExecuteStatementOperation( confOverlay: JMap[String, String], runInBackground: Boolean = true) (hiveContext: HiveContext, sessionToActivePool: SMap[SessionHandle, String])
spark git commit: [SQL] Simplifies binary node pattern matching
Repository: spark Updated Branches: refs/heads/master 700312e12 - bc0d76a24 [SQL] Simplifies binary node pattern matching This PR is a simpler version of #2764, and adds `unapply` methods to the following binary nodes for simpler pattern matching: - `BinaryExpression` - `BinaryComparison` - `BinaryArithmetics` This enables nested pattern matching for binary nodes. For example, the following pattern matching ```scala case p: BinaryComparison if p.left.dataType == StringType p.right.dataType == DateType = p.makeCopy(Array(p.left, Cast(p.right, StringType))) ``` can be simplified to ```scala case p BinaryComparison(l StringType(), r DateType()) = p.makeCopy(Array(l, Cast(r, StringType))) ``` Author: Cheng Lian l...@databricks.com Closes #6537 from liancheng/binary-node-patmat and squashes the following commits: a3bf5fe [Cheng Lian] Fixes compilation error introduced while rebasing b738986 [Cheng Lian] Renames `l`/`r` to `left`/`right` or `lhs`/`rhs` 14900ae [Cheng Lian] Simplifies binary node pattern matching Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/bc0d76a2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/bc0d76a2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/bc0d76a2 Branch: refs/heads/master Commit: bc0d76a246cc534234b96a661d70feb94b26538c Parents: 700312e Author: Cheng Lian l...@databricks.com Authored: Fri Jun 5 23:06:19 2015 +0800 Committer: Cheng Lian l...@databricks.com Committed: Fri Jun 5 23:06:19 2015 +0800 -- .../catalyst/analysis/HiveTypeCoercion.scala| 215 +-- .../sql/catalyst/expressions/Expression.scala | 4 + .../sql/catalyst/expressions/arithmetic.scala | 4 + .../sql/catalyst/expressions/predicates.scala | 5 +- .../sql/catalyst/optimizer/Optimizer.scala | 19 +- 5 files changed, 119 insertions(+), 128 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/bc0d76a2/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index b064600..9b8a08a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -130,7 +130,7 @@ trait HiveTypeCoercion { * the appropriate numeric equivalent. */ object ConvertNaNs extends Rule[LogicalPlan] { -private val stringNaN = Literal(NaN) +private val StringNaN = Literal(NaN) def apply(plan: LogicalPlan): LogicalPlan = plan transform { case q: LogicalPlan = q transformExpressions { @@ -138,20 +138,20 @@ trait HiveTypeCoercion { case e if !e.childrenResolved = e /* Double Conversions */ -case b: BinaryExpression if b.left == stringNaN b.right.dataType == DoubleType = - b.makeCopy(Array(b.right, Literal(Double.NaN))) -case b: BinaryExpression if b.left.dataType == DoubleType b.right == stringNaN = - b.makeCopy(Array(Literal(Double.NaN), b.left)) -case b: BinaryExpression if b.left == stringNaN b.right == stringNaN = - b.makeCopy(Array(Literal(Double.NaN), b.left)) +case b @ BinaryExpression(StringNaN, right @ DoubleType()) = + b.makeCopy(Array(Literal(Double.NaN), right)) +case b @ BinaryExpression(left @ DoubleType(), StringNaN) = + b.makeCopy(Array(left, Literal(Double.NaN))) /* Float Conversions */ -case b: BinaryExpression if b.left == stringNaN b.right.dataType == FloatType = - b.makeCopy(Array(b.right, Literal(Float.NaN))) -case b: BinaryExpression if b.left.dataType == FloatType b.right == stringNaN = - b.makeCopy(Array(Literal(Float.NaN), b.left)) -case b: BinaryExpression if b.left == stringNaN b.right == stringNaN = - b.makeCopy(Array(Literal(Float.NaN), b.left)) +case b @ BinaryExpression(StringNaN, right @ FloatType()) = + b.makeCopy(Array(Literal(Float.NaN), right)) +case b @ BinaryExpression(left @ FloatType(), StringNaN) = + b.makeCopy(Array(left, Literal(Float.NaN))) + +/* Use float NaN by default to avoid unnecessary type widening */ +case b @ BinaryExpression(left @ StringNaN, StringNaN) = + b.makeCopy(Array(left, Literal(Float.NaN))) } } } @@ -184,21 +184,25 @@ trait HiveTypeCoercion { case u @ Union(left, right) if u.childrenResolved !u.resolved = val castedInput =