[GitHub] spark pull request: [SPARK-10956] Common MemoryManager interface f...
Github user andrewor14 commented on a diff in the pull request: https://github.com/apache/spark/pull/9000#discussion_r41587930 --- Diff: core/src/test/scala/org/apache/spark/StaticMemoryManagerSuite.scala --- @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark + +import scala.collection.mutable.ArrayBuffer + +import org.mockito.Mockito.{mock, reset, verify, when} +import org.mockito.Matchers.{any, eq => meq} + +import org.apache.spark.storage.{BlockId, BlockStatus, MemoryStore, TestBlockId} + + +class StaticMemoryManagerSuite extends SparkFunSuite { + private val conf = new SparkConf().set("spark.storage.unrollFraction", "0.4") + + test("basic execution memory") { +val maxExecutionMem = 1000L +val (mm, _) = makeThings(maxExecutionMem, Long.MaxValue) +assert(mm.executionMemoryUsed === 0L) +assert(mm.acquireExecutionMemory(10L) === 10L) +assert(mm.executionMemoryUsed === 10L) +assert(mm.acquireExecutionMemory(100L) === 100L) +// Acquire up to the max +assert(mm.acquireExecutionMemory(1000L) === 890L) +assert(mm.executionMemoryUsed === 1000L) +assert(mm.acquireExecutionMemory(1L) === 0L) +assert(mm.executionMemoryUsed === 1000L) +mm.releaseExecutionMemory(800L) +assert(mm.executionMemoryUsed === 200L) +// Acquire after release +assert(mm.acquireExecutionMemory(1L) === 1L) +assert(mm.executionMemoryUsed === 201L) +// Release beyond what was acquired +mm.releaseExecutionMemory(maxExecutionMem) +assert(mm.executionMemoryUsed === 0L) + } + + test("basic storage memory") { +val maxStorageMem = 1000L +val dummyBlock = TestBlockId("you can see the world you brought to live") +val dummyBlocks = new ArrayBuffer[(BlockId, BlockStatus)] +val (mm, ms) = makeThings(Long.MaxValue, maxStorageMem) +assert(mm.storageMemoryUsed === 0L) +assert(mm.acquireStorageMemory(dummyBlock, 10L, dummyBlocks) === 10L) +// `ensureFreeSpace` should be called with the number of bytes requested +assertEnsureFreeSpaceCalled(ms, dummyBlock, 10L) +assert(mm.storageMemoryUsed === 10L) +assert(dummyBlocks.isEmpty) +assert(mm.acquireStorageMemory(dummyBlock, 100L, dummyBlocks) === 100L) +assertEnsureFreeSpaceCalled(ms, dummyBlock, 100L) +// Acquire up to the max, not granted +assert(mm.acquireStorageMemory(dummyBlock, 1000L, dummyBlocks) === 0L) +assertEnsureFreeSpaceCalled(ms, dummyBlock, 1000L) +assert(mm.storageMemoryUsed === 110L) +assert(mm.acquireStorageMemory(dummyBlock, 890L, dummyBlocks) === 890L) +assertEnsureFreeSpaceCalled(ms, dummyBlock, 890L) +assert(mm.storageMemoryUsed === 1000L) +assert(mm.acquireStorageMemory(dummyBlock, 1L, dummyBlocks) === 0L) +assertEnsureFreeSpaceCalled(ms, dummyBlock, 1L) +assert(mm.storageMemoryUsed === 1000L) +mm.releaseStorageMemory(800L) +assert(mm.storageMemoryUsed === 200L) +// Acquire after release +assert(mm.acquireStorageMemory(dummyBlock, 1L, dummyBlocks) === 1L) +assertEnsureFreeSpaceCalled(ms, dummyBlock, 1L) +assert(mm.storageMemoryUsed === 201L) +mm.releaseStorageMemory() +assert(mm.storageMemoryUsed === 0L) +assert(mm.acquireStorageMemory(dummyBlock, 1L, dummyBlocks) === 1L) +assertEnsureFreeSpaceCalled(ms, dummyBlock, 1L) +assert(mm.storageMemoryUsed === 1L) +// Release beyond what was acquired +mm.releaseStorageMemory(100L) +assert(mm.storageMemoryUsed === 0L) + } + + test("execution and storage isolation") { +val maxExecutionMem = 200L +val maxStorageMem = 1000L +val dummyBlock = TestBlockId("ain't nobody love like you do") +val dummyBlocks = new ArrayBuffer[(BlockId, BlockStatus)] +val (mm, ms) = makeThings(maxExecutionMem, maxStorageMem) +//
[GitHub] spark pull request: [SPARK-10956] Common MemoryManager interface f...
Github user andrewor14 commented on a diff in the pull request: https://github.com/apache/spark/pull/9000#discussion_r41587896 --- Diff: core/src/test/scala/org/apache/spark/StaticMemoryManagerSuite.scala --- @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark + +import scala.collection.mutable.ArrayBuffer + +import org.mockito.Mockito.{mock, reset, verify, when} +import org.mockito.Matchers.{any, eq => meq} + +import org.apache.spark.storage.{BlockId, BlockStatus, MemoryStore, TestBlockId} + + +class StaticMemoryManagerSuite extends SparkFunSuite { + private val conf = new SparkConf().set("spark.storage.unrollFraction", "0.4") + + test("basic execution memory") { +val maxExecutionMem = 1000L +val (mm, _) = makeThings(maxExecutionMem, Long.MaxValue) +assert(mm.executionMemoryUsed === 0L) +assert(mm.acquireExecutionMemory(10L) === 10L) +assert(mm.executionMemoryUsed === 10L) +assert(mm.acquireExecutionMemory(100L) === 100L) +// Acquire up to the max +assert(mm.acquireExecutionMemory(1000L) === 890L) +assert(mm.executionMemoryUsed === 1000L) +assert(mm.acquireExecutionMemory(1L) === 0L) +assert(mm.executionMemoryUsed === 1000L) +mm.releaseExecutionMemory(800L) +assert(mm.executionMemoryUsed === 200L) +// Acquire after release +assert(mm.acquireExecutionMemory(1L) === 1L) +assert(mm.executionMemoryUsed === 201L) +// Release beyond what was acquired +mm.releaseExecutionMemory(maxExecutionMem) --- End diff -- it logs a warning --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10855] [SQL] Add a JDBC dialect for Apa...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/8982#issuecomment-146727544 [Test build #43436 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43436/console) for PR 8982 at commit [`f83d86c`](https://github.com/apache/spark/commit/f83d86ce01c3943595518503230e283b3062a44d). * This patch **passes all tests**. * This patch merges cleanly. * This patch adds no public classes. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10855] [SQL] Add a JDBC dialect for Apa...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/8982#issuecomment-146727613 Merged build finished. Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10767][PYSPARK] Make pyspark shared par...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9017#issuecomment-146728663 [Test build #43447 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43447/console) for PR 9017 at commit [`929d850`](https://github.com/apache/spark/commit/929d8501ef74575f00a19459fc8a163d58ffe6e5). * This patch **passes all tests**. * This patch merges cleanly. * This patch adds no public classes. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10767][PYSPARK] Make pyspark shared par...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9017#issuecomment-146728730 Merged build finished. Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10767][PYSPARK] Make pyspark shared par...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9017#issuecomment-146728731 Test PASSed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43447/ Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10956] Common MemoryManager interface f...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9000#issuecomment-146729669 [Test build #43450 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43450/consoleFull) for PR 9000 at commit [`fc7f9f5`](https://github.com/apache/spark/commit/fc7f9f519852c2b3ef3eebcbc8e3f0ba63fcb3dc). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-8386] [SQL]add write.mode for insertInt...
GitHub user huaxingao opened a pull request: https://github.com/apache/spark/pull/9042 [SPARK-8386] [SQL]add write.mode for insertIntoJDBC when the parm overwrite is false the fix is for jira https://issues.apache.org/jira/browse/SPARK-8386 You can merge this pull request into a Git repository by running: $ git pull https://github.com/huaxingao/spark spark8386 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/spark/pull/9042.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #9042 commit 75b7f57b42a1f0e88258187f75ab6b36a8706b0f Author: Huaxin GaoDate: 2015-10-09T00:49:50Z add write.mode for insertIntoJDBC when the parm overwrite is false --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11007] [SQL] Adds dictionary aware Parq...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9040#issuecomment-146732928 Test PASSed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43443/ Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11007] [SQL] Adds dictionary aware Parq...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9040#issuecomment-146732725 [Test build #43443 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43443/console) for PR 9040 at commit [`bd694d6`](https://github.com/apache/spark/commit/bd694d65a4a126446e50096dabfb2a3c91521453). * This patch **passes all tests**. * This patch merges cleanly. * This patch adds no public classes. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10973][ML][PYTHON] __gettitem__ method ...
Github user jkbradley commented on the pull request: https://github.com/apache/spark/pull/9009#issuecomment-146732821 merging with master --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10515] When killing executor, the pendi...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/8945#issuecomment-146732399 [Test build #43453 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43453/consoleFull) for PR 8945 at commit [`e382315`](https://github.com/apache/spark/commit/e382315e9905b735837bc37a63acd16b72242ada). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11019][streaming][flume] Gracefully shu...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9041#issuecomment-146734813 [Test build #43452 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43452/console) for PR 9041 at commit [`dba200f`](https://github.com/apache/spark/commit/dba200fc912ae7b29b8082dd77490bcb504dcf74). * This patch **passes all tests**. * This patch merges cleanly. * This patch adds no public classes. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11020] [core] Wait for HDFS to leave sa...
GitHub user vanzin opened a pull request: https://github.com/apache/spark/pull/9043 [SPARK-11020] [core] Wait for HDFS to leave safe mode before initializing HS. Large HDFS clusters may take a while to leave safe mode when starting; this change makes the HS wait for that before doing checks about its configuraton. This means the HS won't stop right away if HDFS is in safe mode and the configuration is not correct, but that should be a very uncommon situation. You can merge this pull request into a Git repository by running: $ git pull https://github.com/vanzin/spark SPARK-11020 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/spark/pull/9043.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #9043 commit 715b4ffd63c9a43f10ca8c8ae5ad653d862e5d49 Author: Marcelo VanzinDate: 2015-10-09T01:39:28Z [SPARK-11020] [core] Wait for HDFS to leave safe mode before initializing HS. Large HDFS clusters may take a while to leave safe mode when starting; this change makes the HS wait for that before doing checks about its configuraton. This means the HS won't stop right away if HDFS is in safe mode and the configuration is not correct, but that should be a very uncommon situation. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11020] [core] Wait for HDFS to leave sa...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9043#issuecomment-146734763 Merged build started. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10955][streaming] Add a warning if dyna...
Github user asfgit closed the pull request at: https://github.com/apache/spark/pull/8998 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11020] [core] Wait for HDFS to leave sa...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9043#issuecomment-146735610 [Test build #43455 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43455/consoleFull) for PR 9043 at commit [`715b4ff`](https://github.com/apache/spark/commit/715b4ffd63c9a43f10ca8c8ae5ad653d862e5d49). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10739][Yarn] Add application attempt wi...
Github user vanzin commented on a diff in the pull request: https://github.com/apache/spark/pull/8857#discussion_r41593173 --- Diff: docs/running-on-yarn.md --- @@ -304,6 +304,14 @@ If you need a reference to the proper location to put log files in the YARN so t + spark.yarn.attemptFailuresValidityInterval + -1 + + Ignore the failure number which happens out the validity interval (in millisecond). --- End diff -- "Ignore failures that happen outside the validity interval." --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-8654] [SQL] Analysis exception when usi...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9036#issuecomment-146740993 [Test build #43449 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43449/console) for PR 9036 at commit [`f5006f7`](https://github.com/apache/spark/commit/f5006f737ab917289c5007ede0277966f69f37e4). * This patch **passes all tests**. * This patch merges cleanly. * This patch adds no public classes. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10739][Yarn] Add application attempt wi...
Github user vanzin commented on a diff in the pull request: https://github.com/apache/spark/pull/8857#discussion_r41593235 --- Diff: yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala --- @@ -185,6 +185,23 @@ private[spark] class Client( case None => logDebug("spark.yarn.maxAppAttempts is not set. " + "Cluster's default value will be used.") } + sparkConf.getOption("spark.yarn.attemptFailuresValidityInterval").map(_.toLong) match { --- End diff -- You should use `sparkConf.getTimeAsMs` here. Since there's no `Option` equivalent for that one, you'll need to use `sparkConf.contains` to check if the value is set. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-8654] [SQL] Analysis exception when usi...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9036#issuecomment-146741058 Test PASSed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43449/ Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10956] Common MemoryManager interface f...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9000#issuecomment-146745125 [Test build #43445 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43445/console) for PR 9000 at commit [`6e2e870`](https://github.com/apache/spark/commit/6e2e870ab5610d6a0152021ec68bb276422ac963). * This patch **passes all tests**. * This patch merges cleanly. * This patch adds no public classes. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-8654] [SQL] Analysis exception when usi...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9036#issuecomment-146751458 Merged build triggered. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-8654] [SQL] Analysis exception when usi...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9036#issuecomment-146751462 Merged build started. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-8654] [SQL] Analysis exception when usi...
Github user cloud-fan commented on the pull request: https://github.com/apache/spark/pull/9036#issuecomment-146752602 LGTM pending test. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: SPARK-10771: Implement the shuffle encryption ...
Github user vanzin commented on a diff in the pull request: https://github.com/apache/spark/pull/8880#discussion_r41585457 --- Diff: core/src/main/scala/org/apache/spark/crypto/CryptoOutputStream.scala --- @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.crypto + +import java.io.{IOException, FilterOutputStream, OutputStream} +import java.nio.ByteBuffer +import java.security.GeneralSecurityException + +import com.google.common.base.Preconditions + +import org.apache.spark.Logging + +/** + * CryptoOutputStream encrypts data. It is not thread-safe. AES CTR mode is + * required in order to ensure that the plain text and cipher text have a 1:1 + * mapping. The encryption is buffer based. The key points of the encryption are + * (1) calculating counter and (2) padding through stream position. + * + * counter = base + pos/(algorithm blocksize); + * padding = pos%(algorithm blocksize); + * + * The underlying stream offset is maintained as state. + */ +class CryptoOutputStream(out: OutputStream, codecVal: CryptoCodec, bufferSizeVal: Int, + keyVal: Array[Byte], ivVal: Array[Byte], streamOffsetVal: Long) extends +FilterOutputStream(out: OutputStream) with Logging { + var codec: CryptoCodec = null + var encryptor: Encryptor = null + var bufferSize: Int = 0 + /** + * Input data buffer. The data starts at inBuffer.position() and ends at + * inBuffer.limit(). + */ + var inBuffer: ByteBuffer = null + /** + * Encrypted data buffer. The data starts at outBuffer.position() and ends at + * outBuffer.limit(); + */ + var outBuffer: ByteBuffer = null + var streamOffset: Long = 0 + /** + * Padding = pos%(algorithm blocksize); Padding is put into {@link #inBuffer} + * before any other data goes in. The purpose of padding is to put input data + * at proper position. + */ + var padding: Byte = 0 + var closed: Boolean = false + var key: Array[Byte] = null + var initIV: Array[Byte] = null + var iv: Array[Byte] = null + var tmpBuf: Array[Byte] = null + val oneByteBuf: Array[Byte] = new Array[Byte](1) + + CryptoStreamUtils.checkCodec(codecVal) + bufferSize = CryptoStreamUtils.checkBufferSize(codecVal, bufferSizeVal) + codec = codecVal + key = keyVal.clone + initIV = ivVal.clone + iv = ivVal.clone + inBuffer = ByteBuffer.allocateDirect(bufferSize) + outBuffer = ByteBuffer.allocateDirect(bufferSize) + streamOffset = streamOffsetVal + try { +encryptor = codec.createEncryptor + } + catch { +case e: GeneralSecurityException => { + throw e +} + } + updateEncryptor + + + def this(out: OutputStream, codec: CryptoCodec, bufferSize: Int, key: Array[Byte], + iv: Array[Byte]) { +this(out, codec, bufferSize, key, iv, 0) + } + + def this(out: OutputStream, codec: CryptoCodec, key: Array[Byte], iv: Array[Byte], streamOffset: + Long) { +this(out, codec, CryptoStreamUtils.getBufferSize, key, iv, streamOffset) + } + + def this(out: OutputStream, codec: CryptoCodec, key: Array[Byte], iv: Array[Byte]) { +this(out, codec, key, iv, 0) + } + + def getWrappedStream: OutputStream = { +out + } + + /** + * Encryption is buffer based. + * If there is enough room in {@link #inBuffer}, then write to this buffer. + * If {@link #inBuffer} is full, then do encryption and write data to the + * underlying stream. + * @param b the data. + * @param offVal the start offset in the data. + * @param lenVal the number of bytes to write. + * @throws IOException + */ + override def write(b: Array[Byte], offVal: Int, lenVal: Int) { +var off = offVal +var len = lenVal +checkStream +if (b == null) { + throw new
[GitHub] spark pull request: [SPARK-10599] Lower communication for block ma...
Github user jkbradley commented on the pull request: https://github.com/apache/spark/pull/8757#issuecomment-146720466 Making a pass now --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10599] Lower communication for block ma...
Github user jkbradley commented on a diff in the pull request: https://github.com/apache/spark/pull/8757#discussion_r41586254 --- Diff: mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala --- @@ -352,6 +353,30 @@ class BlockMatrix @Since("1.3.0") ( } } + private type BlockDestinations = Map[(Int, Int), Set[Int]] --- End diff -- Document ``` /** Block (i,j) --> Set of destination partitions */ ``` --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10599] Lower communication for block ma...
Github user jkbradley commented on a diff in the pull request: https://github.com/apache/spark/pull/8757#discussion_r41586262 --- Diff: mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala --- @@ -352,6 +353,30 @@ class BlockMatrix @Since("1.3.0") ( } } + private type BlockDestinations = Map[(Int, Int), Set[Int]] + + /** + * Simulate the multiplication with just indices in order to cut costs on communication, when + * we are actually shuffling the matrices. + */ + private def simulateMultiply( --- End diff -- It'd be nice to add a unit test for this, though the logic looks fine. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: SPARK-10771: Implement the shuffle encryption ...
Github user vanzin commented on a diff in the pull request: https://github.com/apache/spark/pull/8880#discussion_r41586275 --- Diff: core/src/main/scala/org/apache/spark/crypto/JceAesCtrCryptoCodec.scala --- @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.crypto + +import java.io.IOException +import java.nio.ByteBuffer +import java.security.{GeneralSecurityException, SecureRandom} +import javax.crypto.Cipher +import javax.crypto.spec.{IvParameterSpec, SecretKeySpec} + +import com.google.common.base.Preconditions + +import org.apache.spark.crypto.CommonConfigurationKeys +.SPARK_SECURITY_JAVA_SECURE_RANDOM_ALGORITHM_DEFAULT +import org.apache.spark.crypto.CommonConfigurationKeys +.SPARK_SECURITY_JAVA_SECURE_RANDOM_ALGORITHM_KEY +import org.apache.spark.{SparkConf, Logging} + +/** + * Implement the AES-CTR crypto codec using JCE provider. + * @param conf + */ +private[spark] class JceAesCtrCryptoCodec(conf: SparkConf) extends AesCtrCryptoCodec with Logging { + var provider: String = null + var random: SecureRandom = null + + setConf(conf) + + def setConf(conf: SparkConf) { +val secureRandomAlg: String = conf.get(SPARK_SECURITY_JAVA_SECURE_RANDOM_ALGORITHM_KEY, + SPARK_SECURITY_JAVA_SECURE_RANDOM_ALGORITHM_DEFAULT) +try { + random = SecureRandom.getInstance(secureRandomAlg) +} +catch { + case e: GeneralSecurityException => { --- End diff -- nit: you don't need the braces here --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10599] Lower communication for block ma...
Github user jkbradley commented on a diff in the pull request: https://github.com/apache/spark/pull/8757#discussion_r41586259 --- Diff: mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala --- @@ -352,6 +353,30 @@ class BlockMatrix @Since("1.3.0") ( } } + private type BlockDestinations = Map[(Int, Int), Set[Int]] + + /** + * Simulate the multiplication with just indices in order to cut costs on communication, when --- End diff -- "just indices" --> "just block indices" Document return value. Copy doc: ```The `colsPerBlock` of this matrix must equal the `rowsPerBlock` of `other`.``` --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11019][streaming][flume] Gracefully shu...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9041#issuecomment-146723009 [Test build #43444 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43444/consoleFull) for PR 9041 at commit [`dba200f`](https://github.com/apache/spark/commit/dba200fc912ae7b29b8082dd77490bcb504dcf74). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: SPARK-10771: Implement the shuffle encryption ...
Github user vanzin commented on a diff in the pull request: https://github.com/apache/spark/pull/8880#discussion_r41587132 --- Diff: core/src/test/scala/org/apache/spark/crypto/JceAesCtrCryptoCodecSuite.scala --- @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.crypto + +import java.io.{ByteArrayInputStream, BufferedOutputStream, ByteArrayOutputStream} +import java.security.SecureRandom + +import org.apache.spark.crypto.CommonConfigurationKeys._ +import org.apache.spark.{SparkFunSuite, SparkConf, Logging} + +/** + * test JceAesCtrCryptoCodec + */ +class JceAesCtrCryptoCodecSuite extends SparkFunSuite with Logging { + + test("TestCryptoCodecSuite") { +val random = new SecureRandom +val dataLen = 1000 +val inputData = new Array[Byte](dataLen) +val outputData = new Array[Byte](dataLen) +random.nextBytes(inputData) +// encrypt +val sparkConf = new SparkConf + sparkConf.set(SPARK_SECURITY_CRYPTO_CODEC_CLASSES_AES_CTR_NOPADDING_KEY, + classOf[JceAesCtrCryptoCodec].getName) +val cipherSuite = new CipherSuite("AES/CTR/NoPadding", 16) +val codec = new JceAesCtrCryptoCodec(sparkConf) +val aos = new ByteArrayOutputStream +val bos = new BufferedOutputStream(aos) +val key = new Array[Byte](16) +val iv = new Array[Byte](16) +random.nextBytes(key) +random.nextBytes(iv) + +val cos = new CryptoOutputStream(bos, codec, 1024, key, iv) +cos.write(inputData, 0, inputData.length) +cos.flush +// decrypt +val cis = new CryptoInputStream(new ByteArrayInputStream(aos.toByteArray), + codec, 1024, key, iv) +var readLen: Int = 0 +var outOffset: Int = 0 +while (readLen < dataLen) { + val n: Int = cis.read(outputData, outOffset, outputData.length - outOffset) --- End diff -- Similarly, this should test different array sizes passed to the `read` call. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10250][CORE] External group by to handl...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/8438#issuecomment-146725820 Merged build finished. Test FAILed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11019][streaming][flume] Gracefully shu...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9041#issuecomment-146725895 [Test build #43444 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43444/console) for PR 9041 at commit [`dba200f`](https://github.com/apache/spark/commit/dba200fc912ae7b29b8082dd77490bcb504dcf74). * This patch **fails MiMa tests**. * This patch merges cleanly. * This patch adds no public classes. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10810] [SPARK-10902] [SQL] Improve sess...
Github user davies commented on the pull request: https://github.com/apache/spark/pull/8909#issuecomment-146725869 Merged into master, new comments will be addressed in follow-up PR, thanks you all! --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10810] [SPARK-10902] [SQL] Improve sess...
Github user asfgit closed the pull request at: https://github.com/apache/spark/pull/8909 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10250][CORE] External group by to handl...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/8438#issuecomment-146725821 Test FAILed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43422/ Test FAILed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: SPARK-10771: Implement the shuffle encryption ...
Github user vanzin commented on a diff in the pull request: https://github.com/apache/spark/pull/8880#discussion_r41587038 --- Diff: core/src/test/scala/org/apache/spark/crypto/JceAesCtrCryptoCodecSuite.scala --- @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.crypto + +import java.io.{ByteArrayInputStream, BufferedOutputStream, ByteArrayOutputStream} +import java.security.SecureRandom + +import org.apache.spark.crypto.CommonConfigurationKeys._ +import org.apache.spark.{SparkFunSuite, SparkConf, Logging} + +/** + * test JceAesCtrCryptoCodec + */ +class JceAesCtrCryptoCodecSuite extends SparkFunSuite with Logging { + + test("TestCryptoCodecSuite") { +val random = new SecureRandom +val dataLen = 1000 +val inputData = new Array[Byte](dataLen) +val outputData = new Array[Byte](dataLen) +random.nextBytes(inputData) +// encrypt +val sparkConf = new SparkConf + sparkConf.set(SPARK_SECURITY_CRYPTO_CODEC_CLASSES_AES_CTR_NOPADDING_KEY, + classOf[JceAesCtrCryptoCodec].getName) +val cipherSuite = new CipherSuite("AES/CTR/NoPadding", 16) +val codec = new JceAesCtrCryptoCodec(sparkConf) +val aos = new ByteArrayOutputStream +val bos = new BufferedOutputStream(aos) +val key = new Array[Byte](16) +val iv = new Array[Byte](16) +random.nextBytes(key) +random.nextBytes(iv) + +val cos = new CryptoOutputStream(bos, codec, 1024, key, iv) +cos.write(inputData, 0, inputData.length) --- End diff -- It would be nice if the test exercised more code paths; right now it's writing a huge byte array. It should also try to write small byte arrays that are smaller than the configured buffer size, for example. It should also make that the data leaves an unfinished buffer at the end, to make sure that the `flush` implementation is doing the right thing. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-8654] [SQL] Analysis exception when usi...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/9036#discussion_r41587125 --- Diff: sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercionSuite.scala --- @@ -470,7 +470,8 @@ class HiveTypeCoercionSuite extends PlanTest { ) ruleTest(inConversion, In(Literal("a"), Seq(Literal(1), Literal("b"))), - In(Literal("a"), Seq(Cast(Literal(1), StringType), Cast(Literal("b"), StringType))) + In(Cast(Literal("a"), StringType), Seq(Cast(Literal(1), StringType), + Cast(Literal("b"), StringType))) --- End diff -- nit: ``` In(Cast(Literal("a"), StringType), Seq(Cast(Literal(1), StringType), Cast(Literal("b"), StringType))) ``` looks better --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11019][streaming][flume] Gracefully shu...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9041#issuecomment-146725914 Merged build finished. Test FAILed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: SPARK-10771: Implement the shuffle encryption ...
Github user vanzin commented on a diff in the pull request: https://github.com/apache/spark/pull/8880#discussion_r41587190 --- Diff: yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala --- @@ -17,6 +17,7 @@ package org.apache.spark.deploy.yarn + --- End diff -- nit: not needed --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10956] Common MemoryManager interface f...
Github user andrewor14 commented on a diff in the pull request: https://github.com/apache/spark/pull/9000#discussion_r41587851 --- Diff: core/src/test/scala/org/apache/spark/StaticMemoryManagerSuite.scala --- @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark + +import scala.collection.mutable.ArrayBuffer + +import org.mockito.Mockito.{mock, reset, verify, when} +import org.mockito.Matchers.{any, eq => meq} + +import org.apache.spark.storage.{BlockId, BlockStatus, MemoryStore, TestBlockId} + + +class StaticMemoryManagerSuite extends SparkFunSuite { + private val conf = new SparkConf().set("spark.storage.unrollFraction", "0.4") + + test("basic execution memory") { +val maxExecutionMem = 1000L +val (mm, _) = makeThings(maxExecutionMem, Long.MaxValue) +assert(mm.executionMemoryUsed === 0L) +assert(mm.acquireExecutionMemory(10L) === 10L) +assert(mm.executionMemoryUsed === 10L) +assert(mm.acquireExecutionMemory(100L) === 100L) +// Acquire up to the max +assert(mm.acquireExecutionMemory(1000L) === 890L) +assert(mm.executionMemoryUsed === 1000L) +assert(mm.acquireExecutionMemory(1L) === 0L) +assert(mm.executionMemoryUsed === 1000L) +mm.releaseExecutionMemory(800L) +assert(mm.executionMemoryUsed === 200L) +// Acquire after release +assert(mm.acquireExecutionMemory(1L) === 1L) +assert(mm.executionMemoryUsed === 201L) +// Release beyond what was acquired +mm.releaseExecutionMemory(maxExecutionMem) +assert(mm.executionMemoryUsed === 0L) + } + + test("basic storage memory") { +val maxStorageMem = 1000L +val dummyBlock = TestBlockId("you can see the world you brought to live") +val dummyBlocks = new ArrayBuffer[(BlockId, BlockStatus)] --- End diff -- meh, let's not do that --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10956] Common MemoryManager interface f...
Github user andrewor14 commented on a diff in the pull request: https://github.com/apache/spark/pull/9000#discussion_r41587795 --- Diff: core/src/test/scala/org/apache/spark/StaticMemoryManagerSuite.scala --- @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark + +import scala.collection.mutable.ArrayBuffer + +import org.mockito.Mockito.{mock, reset, verify, when} +import org.mockito.Matchers.{any, eq => meq} + +import org.apache.spark.storage.{BlockId, BlockStatus, MemoryStore, TestBlockId} + + +class StaticMemoryManagerSuite extends SparkFunSuite { + private val conf = new SparkConf().set("spark.storage.unrollFraction", "0.4") + + test("basic execution memory") { +val maxExecutionMem = 1000L +val (mm, _) = makeThings(maxExecutionMem, Long.MaxValue) +assert(mm.executionMemoryUsed === 0L) +assert(mm.acquireExecutionMemory(10L) === 10L) +assert(mm.executionMemoryUsed === 10L) +assert(mm.acquireExecutionMemory(100L) === 100L) +// Acquire up to the max +assert(mm.acquireExecutionMemory(1000L) === 890L) +assert(mm.executionMemoryUsed === 1000L) +assert(mm.acquireExecutionMemory(1L) === 0L) +assert(mm.executionMemoryUsed === 1000L) +mm.releaseExecutionMemory(800L) +assert(mm.executionMemoryUsed === 200L) +// Acquire after release +assert(mm.acquireExecutionMemory(1L) === 1L) +assert(mm.executionMemoryUsed === 201L) +// Release beyond what was acquired +mm.releaseExecutionMemory(maxExecutionMem) +assert(mm.executionMemoryUsed === 0L) + } + + test("basic storage memory") { +val maxStorageMem = 1000L +val dummyBlock = TestBlockId("you can see the world you brought to live") +val dummyBlocks = new ArrayBuffer[(BlockId, BlockStatus)] +val (mm, ms) = makeThings(Long.MaxValue, maxStorageMem) +assert(mm.storageMemoryUsed === 0L) +assert(mm.acquireStorageMemory(dummyBlock, 10L, dummyBlocks) === 10L) +// `ensureFreeSpace` should be called with the number of bytes requested +assertEnsureFreeSpaceCalled(ms, dummyBlock, 10L) +assert(mm.storageMemoryUsed === 10L) +assert(dummyBlocks.isEmpty) +assert(mm.acquireStorageMemory(dummyBlock, 100L, dummyBlocks) === 100L) +assertEnsureFreeSpaceCalled(ms, dummyBlock, 100L) +// Acquire up to the max, not granted +assert(mm.acquireStorageMemory(dummyBlock, 1000L, dummyBlocks) === 0L) +assertEnsureFreeSpaceCalled(ms, dummyBlock, 1000L) +assert(mm.storageMemoryUsed === 110L) +assert(mm.acquireStorageMemory(dummyBlock, 890L, dummyBlocks) === 890L) +assertEnsureFreeSpaceCalled(ms, dummyBlock, 890L) +assert(mm.storageMemoryUsed === 1000L) +assert(mm.acquireStorageMemory(dummyBlock, 1L, dummyBlocks) === 0L) +assertEnsureFreeSpaceCalled(ms, dummyBlock, 1L) +assert(mm.storageMemoryUsed === 1000L) +mm.releaseStorageMemory(800L) +assert(mm.storageMemoryUsed === 200L) +// Acquire after release +assert(mm.acquireStorageMemory(dummyBlock, 1L, dummyBlocks) === 1L) +assertEnsureFreeSpaceCalled(ms, dummyBlock, 1L) +assert(mm.storageMemoryUsed === 201L) +mm.releaseStorageMemory() +assert(mm.storageMemoryUsed === 0L) +assert(mm.acquireStorageMemory(dummyBlock, 1L, dummyBlocks) === 1L) +assertEnsureFreeSpaceCalled(ms, dummyBlock, 1L) +assert(mm.storageMemoryUsed === 1L) +// Release beyond what was acquired +mm.releaseStorageMemory(100L) +assert(mm.storageMemoryUsed === 0L) + } + + test("execution and storage isolation") { +val maxExecutionMem = 200L +val maxStorageMem = 1000L +val dummyBlock = TestBlockId("ain't nobody love like you do") +val dummyBlocks = new ArrayBuffer[(BlockId, BlockStatus)] +val (mm, ms) = makeThings(maxExecutionMem, maxStorageMem) +//
[GitHub] spark pull request: [SPARK-10990] [SPARK-11018] [SQL] improve unro...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9016#issuecomment-146727257 [Test build #43448 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43448/consoleFull) for PR 9016 at commit [`b29314b`](https://github.com/apache/spark/commit/b29314b180d475b53adbc4fcd696bfe061b6ae12). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-8552] [THRIFTSERVER] Using incorrect da...
Github user davies commented on the pull request: https://github.com/apache/spark/pull/7118#issuecomment-146727231 Since #8909 is merged, would you mind close this PR? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-8654] [SQL] Analysis exception when usi...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9036#issuecomment-146729170 [Test build #43449 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43449/consoleFull) for PR 9036 at commit [`f5006f7`](https://github.com/apache/spark/commit/f5006f737ab917289c5007ede0277966f69f37e4). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10665][SQL] Query planner infra for loc...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9039#issuecomment-146730022 [Test build #43451 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43451/console) for PR 9039 at commit [`123c135`](https://github.com/apache/spark/commit/123c1356e017155e925888552c45988094c6ccb0). * This patch **fails RAT tests**. * This patch merges cleanly. * This patch adds the following public classes _(experimental)_: * `public final class UnsafeRow extends MutableRow implements Externalizable, KryoSerializable ` * `class CoPartitionedRDD(` * `class QueryFragmentBuilder(maxIterations: Int = 100) extends RuleExecutor[SparkPlan] ` * `case class IteratorScanNode(` * `abstract class LocalNode(conf: SQLConf) extends QueryPlan[LocalNode] with Logging with Serializable ` --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10665][SQL] Query planner infra for loc...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9039#issuecomment-146730024 Test FAILed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43451/ Test FAILed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10665][SQL] Query planner infra for loc...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9039#issuecomment-146730023 Merged build finished. Test FAILed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11019][streaming][flume] Gracefully shu...
Github user harishreedharan commented on the pull request: https://github.com/apache/spark/pull/9041#issuecomment-146730016 This basically makes testing a bit more predictable. Sometimes, we end up hitting a situation where the last transaction is still not completed and the remaining data just stays stuck in Flume. Also, it removes some unneeded `InterruptedException` from the logs --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10665][SQL] Query planner infra for loc...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9039#issuecomment-146729982 [Test build #43451 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43451/consoleFull) for PR 9039 at commit [`123c135`](https://github.com/apache/spark/commit/123c1356e017155e925888552c45988094c6ccb0). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10991][ML] logistic regression training...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9037#issuecomment-146731721 Test PASSed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43446/ Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10956] Common MemoryManager interface f...
Github user JoshRosen commented on the pull request: https://github.com/apache/spark/pull/9000#issuecomment-146733140 LGTM. Feel free to merge pending Jenkins. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11020] [core] Wait for HDFS to leave sa...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9043#issuecomment-146734747 Merged build triggered. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11019][streaming][flume] Gracefully shu...
Github user tdas commented on the pull request: https://github.com/apache/spark/pull/9041#issuecomment-146735090 LGTM. Merging it master and branch 1.5 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11019][streaming][flume] Gracefully shu...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9041#issuecomment-146734952 Merged build finished. Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11019][streaming][flume] Gracefully shu...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9041#issuecomment-146734953 Test PASSed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43452/ Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-8654] [SQL] Analysis exception when usi...
Github user dilipbiswal commented on a diff in the pull request: https://github.com/apache/spark/pull/9036#discussion_r41595658 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala --- @@ -304,7 +304,10 @@ object HiveTypeCoercion { } /** - * Convert all expressions in in() list to the left operator type + * Convert the value and in list expressions to the common operator type + * by looking at all the argument types and finding the closest one that + * all the arguments can be cast to. When no common operator type is found + * an Analysis Exception is raised. --- End diff -- @cloud-fan You are right Wenchen.I just wanted to somehow mention that an exception will be raised on a data type mistmatch. I will reword it to the way you suggest. ""the original one will be returned and an Analysis Exception will be raised at type checking phase". Let me push a commit. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: SPARK-10771: Implement the shuffle encryption ...
Github user vanzin commented on a diff in the pull request: https://github.com/apache/spark/pull/8880#discussion_r41584529 --- Diff: core/src/main/scala/org/apache/spark/crypto/CryptoOutputStream.scala --- @@ -0,0 +1,224 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.crypto + +import java.io.{IOException, FilterOutputStream, OutputStream} +import java.nio.ByteBuffer +import java.security.GeneralSecurityException + +import com.google.common.base.Preconditions + +import org.apache.spark.Logging + +/** + * CryptoOutputStream encrypts data. It is not thread-safe. AES CTR mode is + * required in order to ensure that the plain text and cipher text have a 1:1 + * mapping. The encryption is buffer based. The key points of the encryption are + * (1) calculating counter and (2) padding through stream position. + * + * counter = base + pos/(algorithm blocksize); + * padding = pos%(algorithm blocksize); + * + * The underlying stream offset is maintained as state. + */ +class CryptoOutputStream(out: OutputStream, codecVal: CryptoCodec, bufferSizeVal: Int, + keyVal: Array[Byte], ivVal: Array[Byte], streamOffsetVal: Long) extends +FilterOutputStream(out: OutputStream) with Logging { + var codec: CryptoCodec = null + var encryptor: Encryptor = null + var bufferSize: Int = 0 + /** + * Input data buffer. The data starts at inBuffer.position() and ends at + * inBuffer.limit(). + */ + var inBuffer: ByteBuffer = null + /** + * Encrypted data buffer. The data starts at outBuffer.position() and ends at + * outBuffer.limit(); + */ + var outBuffer: ByteBuffer = null + var streamOffset: Long = 0 + /** + * Padding = pos%(algorithm blocksize); Padding is put into {@link #inBuffer} + * before any other data goes in. The purpose of padding is to put input data + * at proper position. + */ + var padding: Byte = 0 + var closed: Boolean = false + var key: Array[Byte] = null + var initIV: Array[Byte] = null + var iv: Array[Byte] = null + var tmpBuf: Array[Byte] = null + val oneByteBuf: Array[Byte] = new Array[Byte](1) + + CryptoStreamUtils.checkCodec(codecVal) + bufferSize = CryptoStreamUtils.checkBufferSize(codecVal, bufferSizeVal) + codec = codecVal + key = keyVal.clone + initIV = ivVal.clone + iv = ivVal.clone + inBuffer = ByteBuffer.allocateDirect(bufferSize) + outBuffer = ByteBuffer.allocateDirect(bufferSize) + streamOffset = streamOffsetVal + try { +encryptor = codec.createEncryptor + } + catch { +case e: GeneralSecurityException => { + throw e +} + } + updateEncryptor + + + def this(out: OutputStream, codec: CryptoCodec, bufferSize: Int, key: Array[Byte], + iv: Array[Byte]) { +this(out, codec, bufferSize, key, iv, 0) + } + + def this(out: OutputStream, codec: CryptoCodec, key: Array[Byte], iv: Array[Byte], streamOffset: + Long) { +this(out, codec, CryptoStreamUtils.getBufferSize, key, iv, streamOffset) + } + + def this(out: OutputStream, codec: CryptoCodec, key: Array[Byte], iv: Array[Byte]) { +this(out, codec, key, iv, 0) + } + + def getWrappedStream: OutputStream = { +out + } + + /** + * Encryption is buffer based. + * If there is enough room in {@link #inBuffer}, then write to this buffer. + * If {@link #inBuffer} is full, then do encryption and write data to the + * underlying stream. + * @param b the data. + * @param offVal the start offset in the data. + * @param lenVal the number of bytes to write. + * @throws IOException + */ + override def write(b: Array[Byte], offVal: Int, lenVal: Int) { +var off = offVal +var len = lenVal +checkStream +if (b == null) { + throw new
[GitHub] spark pull request: [SPARK-11007] [SQL] Adds dictionary aware Parq...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9040#issuecomment-146718396 [Test build #43443 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43443/consoleFull) for PR 9040 at commit [`bd694d6`](https://github.com/apache/spark/commit/bd694d65a4a126446e50096dabfb2a3c91521453). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10956] Common MemoryManager interface f...
Github user JoshRosen commented on the pull request: https://github.com/apache/spark/pull/9000#issuecomment-146718407 LGTM, by the way, pending updates to address comments. The majority of the unaddressed comments are minor and nitpicky, but I think that https://github.com/apache/spark/pull/9000#discussion_r41568609 might deserve special attention. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10914] UnsafeRow serialization breaks w...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9030#issuecomment-146721247 Merged build finished. Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10914] UnsafeRow serialization breaks w...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9030#issuecomment-146721142 [Test build #43434 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43434/console) for PR 9030 at commit [`9b79e6f`](https://github.com/apache/spark/commit/9b79e6f1de4114d7a9a1dc693c079409b6846ffb). * This patch **passes all tests**. * This patch merges cleanly. * This patch adds the following public classes _(experimental)_: * `public final class UnsafeRow extends MutableRow implements Externalizable, KryoSerializable ` --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10914] UnsafeRow serialization breaks w...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9030#issuecomment-146721248 Test PASSed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43434/ Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: SPARK-10771: Implement the shuffle encryption ...
Github user vanzin commented on a diff in the pull request: https://github.com/apache/spark/pull/8880#discussion_r41586239 --- Diff: core/src/main/scala/org/apache/spark/crypto/JceAesCtrCryptoCodec.scala --- @@ -0,0 +1,132 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.crypto + +import java.io.IOException +import java.nio.ByteBuffer +import java.security.{GeneralSecurityException, SecureRandom} +import javax.crypto.Cipher +import javax.crypto.spec.{IvParameterSpec, SecretKeySpec} + +import com.google.common.base.Preconditions + +import org.apache.spark.crypto.CommonConfigurationKeys +.SPARK_SECURITY_JAVA_SECURE_RANDOM_ALGORITHM_DEFAULT +import org.apache.spark.crypto.CommonConfigurationKeys +.SPARK_SECURITY_JAVA_SECURE_RANDOM_ALGORITHM_KEY +import org.apache.spark.{SparkConf, Logging} + +/** + * Implement the AES-CTR crypto codec using JCE provider. + * @param conf + */ +private[spark] class JceAesCtrCryptoCodec(conf: SparkConf) extends AesCtrCryptoCodec with Logging { + var provider: String = null --- End diff -- I don't see this value being set anywhere. Am I missing something? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10599] Lower communication for block ma...
Github user jkbradley commented on a diff in the pull request: https://github.com/apache/spark/pull/8757#discussion_r41586252 --- Diff: mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala --- @@ -60,6 +60,7 @@ private[mllib] class GridPartitioner( */ override def getPartition(key: Any): Int = { key match { + case i: Int => i --- End diff -- Update documentation --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10810] [SPARK-10902] [SQL] Improve sess...
Github user andrewor14 commented on the pull request: https://github.com/apache/spark/pull/8909#issuecomment-146725227 Non-hive parts LGTM! --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: SPARK-10771: Implement the shuffle encryption ...
Github user vanzin commented on a diff in the pull request: https://github.com/apache/spark/pull/8880#discussion_r41586770 --- Diff: core/src/main/scala/org/apache/spark/storage/DiskBlockObjectWriter.scala --- @@ -80,12 +83,28 @@ private[spark] class DiskBlockObjectWriter( */ private var numRecordsWritten = 0 + private var sparkConf: SparkConf = null + def open(): DiskBlockObjectWriter = { if (hasBeenClosed) { throw new IllegalStateException("Writer already closed. Cannot be reopened.") } fos = new FileOutputStream(file, true) -ts = new TimeTrackingOutputStream(writeMetrics, fos) +if (CryptoConf.isShuffleEncryted(sparkConf)) { + val cryptoCodec: CryptoCodec = CryptoCodec.getInstance(sparkConf) --- End diff -- It feels like this code (and the respective code in that creates the input stream) should be in a helper method somewhere. That should make it easier to test `CryptoInputStream` and `CryptoOutputStream` in isolation. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10905][SparkR]: Export freqItems() for ...
Github user felixcheung commented on a diff in the pull request: https://github.com/apache/spark/pull/8962#discussion_r41586812 --- Diff: R/pkg/R/DataFrame.R --- @@ -1873,3 +1872,30 @@ setMethod("as.data.frame", collect(x) } ) + +#' freqItems +#' +#' Finding frequent items for columns, possibly with false positives. +#' Using the frequent element count algorithm described in +#' \url{http://dx.doi.org/10.1145/762471.762473}, proposed by Karp, Schenker, and Papadimitriou. +#' +#' @param x A SparkSQL DataFrame. +#' @param cols A vector column names to search frequent items in. +#' @param support (Optional) The minimum frequency for an item to be considered `frequent`. +#'Should be greater than 1e-4. Default support = 0.01. +#' @return a local R data.frame with the frequent items in each column +#' +#' @rdname freqItems +#' @name freqItems +#' @export +#' @examples +#' \dontrun{ +#' df <- jsonFile(sqlCtx, "/path/to/file.json") +#' fi = freqItems(df, c("title", "gender")) +#' } +setMethod("freqItems", signature(x = "DataFrame", cols = "character"), + function(x, cols, support) { --- End diff -- any update? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10905][SparkR]: Export freqItems() for ...
Github user felixcheung commented on a diff in the pull request: https://github.com/apache/spark/pull/8962#discussion_r41586805 --- Diff: R/pkg/R/DataFrame.R --- @@ -1873,3 +1872,30 @@ setMethod("as.data.frame", collect(x) } ) + +#' freqItems +#' +#' Finding frequent items for columns, possibly with false positives. +#' Using the frequent element count algorithm described in +#' \url{http://dx.doi.org/10.1145/762471.762473}, proposed by Karp, Schenker, and Papadimitriou. +#' +#' @param x A SparkSQL DataFrame. +#' @param cols A vector column names to search frequent items in. +#' @param support (Optional) The minimum frequency for an item to be considered `frequent`. +#'Should be greater than 1e-4. Default support = 0.01. +#' @return a local R data.frame with the frequent items in each column +#' +#' @rdname freqItems +#' @name freqItems +#' @export +#' @examples +#' \dontrun{ +#' df <- jsonFile(sqlCtx, "/path/to/file.json") --- End diff -- any update? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: SPARK-10771: Implement the shuffle encryption ...
Github user vanzin commented on a diff in the pull request: https://github.com/apache/spark/pull/8880#discussion_r41586814 --- Diff: core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala --- @@ -21,13 +21,17 @@ import java.io.InputStream import java.util.concurrent.LinkedBlockingQueue import scala.collection.mutable.{ArrayBuffer, HashSet, Queue} +import scala.util.{Failure, Success, Try} --- End diff -- Changes to this file seem unnecessary. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: SPARK-10771: Implement the shuffle encryption ...
Github user vanzin commented on a diff in the pull request: https://github.com/apache/spark/pull/8880#discussion_r41587282 --- Diff: yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala --- @@ -608,6 +612,18 @@ private[spark] class ApplicationMaster( } } + /** set "SPARK_SHUFFLE_TOKEN" before registerAM +* @return +*/ + private def initJobCredentialsAndUGI() = { --- End diff -- I mentioned this before but I don't believe this should be necessary. `Client` always runs before this and should be propagating the credentials to the AM. In fact, if somehow the secret generated by `Client` is not showing up here, you could have weird things happening in client mode, since the driver and the AM will have different secrets. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10990] [SPARK-11018] [SQL] improve unro...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9016#issuecomment-146732468 [Test build #43439 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43439/console) for PR 9016 at commit [`297b06e`](https://github.com/apache/spark/commit/297b06ef1468a658ae68dc4e6884d65c915fb628). * This patch **fails Spark unit tests**. * This patch merges cleanly. * This patch adds the following public classes _(experimental)_: * `case class BinaryHashJoinNode(` * `case class BroadcastHashJoinNode(` * `trait HashJoinNode ` --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11019][streaming][flume] Gracefully shu...
Github user tdas commented on the pull request: https://github.com/apache/spark/pull/9041#issuecomment-146735295 Actually, there were small conflicts in branch 1.5, so I only merged to master. Seems fine, not a highpriority bug that 1.5 should get it. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10955][streaming] Add a warning if dyna...
Github user tdas commented on the pull request: https://github.com/apache/spark/pull/8998#issuecomment-146735373 LGTM, merging this to master and branch 1.5 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11019][streaming][flume] Gracefully shu...
Github user asfgit closed the pull request at: https://github.com/apache/spark/pull/9041 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10515] When killing executor, the pendi...
Github user andrewor14 commented on the pull request: https://github.com/apache/spark/pull/8945#issuecomment-146737544 Thanks LGTM. I'll merge this once tests pass. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARKR] [SPARK-10981] SparkR Join improvement...
Github user felixcheung commented on a diff in the pull request: https://github.com/apache/spark/pull/9029#discussion_r41592873 --- Diff: R/pkg/R/DataFrame.R --- @@ -1314,50 +1273,21 @@ setClassUnion("characterOrColumn", c("character", "Column")) #' path <- "path/to/file.json" #' df <- jsonFile(sqlContext, path) #' arrange(df, df$col1) +#' arrange(df, "col1") #' arrange(df, asc(df$col1), desc(abs(df$col2))) -#' arrange(df, "col1", decreasing = TRUE) -#' arrange(df, "col1", "col2", decreasing = c(TRUE, FALSE)) #' } setMethod("arrange", - signature(x = "DataFrame", col = "Column"), + signature(x = "DataFrame", col = "characterOrColumn"), function(x, col, ...) { +if (class(col) == "character") { + sdf <- callJMethod(x@sdf, "sort", col, toSeq(...)) +} else if (class(col) == "Column") { jcols <- lapply(list(col, ...), function(c) { c@jc }) - -sdf <- callJMethod(x@sdf, "sort", jcols) -dataFrame(sdf) - }) - -#' @rdname arrange -#' @export -setMethod("arrange", - signature(x = "DataFrame", col = "character"), - function(x, col, ..., decreasing = FALSE) { --- End diff -- It looks like this is undoing a recent PR, could you check? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10905][SparkR]: Export freqItems() for ...
Github user sun-rui commented on a diff in the pull request: https://github.com/apache/spark/pull/8962#discussion_r41592900 --- Diff: R/pkg/R/stats.R --- @@ -100,3 +100,30 @@ setMethod("corr", statFunctions <- callJMethod(x@sdf, "stat") callJMethod(statFunctions, "corr", col1, col2, method) }) + +#' freqItems +#' +#' Finding frequent items for columns, possibly with false positives. +#' Using the frequent element count algorithm described in +#' \url{http://dx.doi.org/10.1145/762471.762473}, proposed by Karp, Schenker, and Papadimitriou. +#' +#' @param x A SparkSQL DataFrame. +#' @param cols A vector column names to search frequent items in. +#' @param support (Optional) The minimum frequency for an item to be considered `frequent`. +#'Should be greater than 1e-4. Default support = 0.01. +#' @return a local R data.frame with the frequent items in each column +#' +#' @rdname statfunctions +#' @name freqItems +#' @export +#' @examples +#' \dontrun{ +#' df <- jsonFile(sqlCtx, "/path/to/file.json") +#' fi = freqItems(df, c("title", "gender")) +#' } +setMethod("freqItems", signature(x = "DataFrame", cols = "character"), + function(x, cols, support) { --- End diff -- support = 0.01 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARKR] [SPARK-10981] SparkR Join improvement...
Github user felixcheung commented on a diff in the pull request: https://github.com/apache/spark/pull/9029#discussion_r41592919 --- Diff: R/pkg/R/DataFrame.R --- @@ -1854,30 +1784,36 @@ setMethod("fillna", sdf <- if (length(cols) == 0) { callJMethod(naFunctions, "fill", value) } else { - callJMethod(naFunctions, "fill", value, as.list(cols)) + callJMethod(naFunctions, "fill", value, listToSeq(as.list(cols))) } dataFrame(sdf) }) -#' This function downloads the contents of a DataFrame into an R's data.frame. -#' Since data.frames are held in memory, ensure that you have enough memory -#' in your system to accommodate the contents. +#' crosstab #' -#' @title Download data from a DataFrame into a data.frame -#' @param x a DataFrame -#' @return a data.frame -#' @rdname as.data.frame -#' @examples \dontrun{ +#' Computes a pair-wise frequency table of the given columns. Also known as a contingency +#' table. The number of distinct values for each column should be less than 1e4. At most 1e6 +#' non-zero pair frequencies will be returned. #' -#' irisDF <- createDataFrame(sqlContext, iris) -#' df <- as.data.frame(irisDF[irisDF$Species == "setosa", ]) +#' @param col1 name of the first column. Distinct items will make the first item of each row. +#' @param col2 name of the second column. Distinct items will make the column names of the output. +#' @return a local R data.frame representing the contingency table. The first column of each row +#' will be the distinct values of `col1` and the column names will be the distinct values +#' of `col2`. The name of the first column will be `$col1_$col2`. Pairs that have no +#' occurrences will have zero as their counts. +#' +#' @rdname statfunctions +#' @name crosstab +#' @export +#' @examples +#' \dontrun{ +#' df <- jsonFile(sqlCtx, "/path/to/file.json") +#' ct = crosstab(df, "title", "gender") #' } -setMethod("as.data.frame", --- End diff -- also for this from a recent PR --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: SPARK-10771: Implement the shuffle encryption ...
Github user winningsix commented on a diff in the pull request: https://github.com/apache/spark/pull/8880#discussion_r41593638 --- Diff: core/src/main/scala/org/apache/spark/crypto/CommonConfigurationKeys.scala --- @@ -0,0 +1,51 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.crypto + +import org.apache.hadoop.io.Text + +/** + * Constant variables + */ +object CommonConfigurationKeys { + val SPARK_SHUFFLE_TOKEN = new Text("SPARK_SHUFFLE_TOKEN") + val SPARK_SECURITY_CRYPTO_BUFFER_SIZE_DEFAULT = 8192 + val SPARK_SECURITY_CRYPTO_CIPHER_SUITE_DEFAULT = "AES/CTR/NoPadding" + val SPARK_SECURITY_CRYPTO_CIPHER_SUITE_KEY = "spark.security.crypto.cipher.suite" + val SPARK_SECURITY_CRYPTO_CODEC_CLASSES_KEY_PREFIX = "spark.security.crypto.codec.classes" + val SPARK_SECURITY_CRYPTO_CODEC_CLASSES_AES_CTR_NOPADDING_KEY = +SPARK_SECURITY_CRYPTO_CODEC_CLASSES_KEY_PREFIX + AES_CTR_NOPADDING.getConfigSuffix() + val SPARK_SECURITY_JAVA_SECURE_RANDOM_ALGORITHM_KEY = +"spark.security.java.secure.random.algorithm" + val SPARK_SECURITY_JAVA_SECURE_RANDOM_ALGORITHM_DEFAULT = "SHA1PRNG" + val SPARK_SECURITY_SECURE_RANDOM_IMPL_KEY = "spark.security.secure.random.impl" + val SPARK_ENCRYPTED_INTERMEDIATE_DATA_BUFFER_KB = "spark.job" + +".encrypted-intermediate-data.buffer.kb" + val DEFAULT_SPARK_ENCRYPTED_INTERMEDIATE_DATA_BUFFER_KB = 128 + val SPARK_SECURITY_SECURE_RANDOM_DEVICE_FILE_PATH_KEY = "spark.security.random.device.file.path" + val SPARK_SECURITY_SECURE_RANDOM_DEVICE_FILE_PATH_DEFAULT = "/dev/urandom" + val SPARK_ENCRYPTED_INTERMEDIATE_DATA = "spark.job.encrypted-intermediate-data" + val DEFAULT_SPARK_ENCRYPTED_INTERMEDIATE_DATA = false + val SPARK_ENCRYPTED_INTERMEDIATE_DATA_KEY_SIZE_BITS = +"spark.job.encrypted-intermediate-data-key-size-bits" + val DEFAULT_SPARK_ENCRYPTED_INTERMEDIATE_DATA_KEY_SIZE_BITS = 128 + val SPARK_SHUFFLE_KEYGEN_ALGORITHM = "spark.shuffle.keygen.algorithm" + val DEFAULT_SPARK_SHUFFLE_KEYGEN_ALGORITHM = "HmacSHA1" + val SHUFFLE_KEY_LENGTH = "" + val DEFAULT_SHUFFLE_KEY_LENGTH = 64 + val SPARK_ENCRYPTED_SHUFFLE = "spark.encrypted.shuffle" --- End diff -- Yes, we are sending the encrypted content over the wire. I am confused by "it doesn't mean you're encrypting shuffle". Do you mean the config name is ambiguous that we should rename it to "spark.shuffle.traffic.encrypt.enabled" ? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10989] [MLLIB] Added the dot and hadama...
Github user da-steve101 commented on a diff in the pull request: https://github.com/apache/spark/pull/9020#discussion_r41593650 --- Diff: mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala --- @@ -512,6 +513,92 @@ object Vectors { squaredDistance } + private def dot(a : DenseVector, b : DenseVector) : Double = { +(a.toArray zip b.toArray).map(x => (x._1 * x._2)).sum + } + + private def dot(a : SparseVector, b : DenseVector) : Double = { +(a.indices zip a.values).map(x => { b(x._1)*x._2 }).sum --- End diff -- problem is that toBreeze is private ( i don't think it should be but i felt i was overstepping to change that ). Also its just a bit annoying from programmers perspective. I can see what you are saying, I just think that extra step should be inside rather than outside. Really I think all the breeze operations should be defined for these vectors (or just use breeze vectors but I guess a single vector may need to be distributed). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10956] Common MemoryManager interface f...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9000#issuecomment-146747085 Test PASSed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43450/ Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10956] Common MemoryManager interface f...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9000#issuecomment-146747083 Merged build finished. Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10515] When killing executor, the pendi...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/8945#issuecomment-146747357 Merged build finished. Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10515] When killing executor, the pendi...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/8945#issuecomment-146747096 [Test build #43453 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43453/console) for PR 8945 at commit [`e382315`](https://github.com/apache/spark/commit/e382315e9905b735837bc37a63acd16b72242ada). * This patch **passes all tests**. * This patch merges cleanly. * This patch adds no public classes. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10956] Common MemoryManager interface f...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9000#issuecomment-146747027 [Test build #43450 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43450/console) for PR 9000 at commit [`fc7f9f5`](https://github.com/apache/spark/commit/fc7f9f519852c2b3ef3eebcbc8e3f0ba63fcb3dc). * This patch **passes all tests**. * This patch merges cleanly. * This patch adds no public classes. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10515] When killing executor, the pendi...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/8945#issuecomment-146747359 Test PASSed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43453/ Test PASSed. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10772][Streaming][Scala]: NullPointerEx...
Github user tdas commented on a diff in the pull request: https://github.com/apache/spark/pull/8881#discussion_r41584948 --- Diff: streaming/src/main/scala/org/apache/spark/streaming/dstream/TransformedDStream.scala --- @@ -38,6 +39,11 @@ class TransformedDStream[U: ClassTag] ( override def compute(validTime: Time): Option[RDD[U]] = { val parentRDDs = parents.map(_.getOrCompute(validTime).orNull).toSeq -Some(transformFunc(parentRDDs, validTime)) +val transformedRDD = transformFunc(parentRDDs, validTime) +if (transformedRDD == null) { + throw new SparkException("Transform function must not return null. " + +"Return RDD.empty to return no elements as the result of the transformation.") --- End diff -- I am sooo sorry for the correction, but its not RDD.empty, but `SparkContext.emptyRDD()`. Can you fix it? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: SPARK-10771: Implement the shuffle encryption ...
Github user vanzin commented on a diff in the pull request: https://github.com/apache/spark/pull/8880#discussion_r41585773 --- Diff: core/src/main/scala/org/apache/spark/crypto/CryptoCodec.scala --- @@ -0,0 +1,96 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.crypto + +import scala.reflect.runtime.universe + +import org.apache.spark.{Logging, SparkConf} +import org.apache.spark.crypto.CommonConfigurationKeys.SPARK_SECURITY_CRYPTO_CIPHER_SUITE_DEFAULT +import org.apache.spark.crypto.CommonConfigurationKeys.SPARK_SECURITY_CRYPTO_CIPHER_SUITE_KEY +import org.apache.spark.crypto.CommonConfigurationKeys.SPARK_SECURITY_CRYPTO_CODEC_CLASSES_AES_CTR_NOPADDING_KEY + +/** + * Crypto codec class, encapsulates encryptor/decryptor pair. + */ +abstract class CryptoCodec() { + /** + * + * @return the CipherSuite for this codec. + */ + def getCipherSuite(): CipherSuite + + /** + * This interface is only for Counter (CTR) mode. Generally the Encryptor + * or Decryptor calculates the IV and maintain encryption context internally. + * For example a {@link javax.crypto.Cipher} will maintain its encryption + * context internally when we do encryption/decryption using the + * Cipher#update interface. + * + * The IV can be calculated by combining the initial IV and the counter with + * a lossless operation (concatenation, addition, or XOR). + * @see http://en.wikipedia.org/wiki/Block_cipher_mode_of_operation#Counter_.28CTR.29 + * @param initIV + * @param counter + * @param iv initialization vector + */ + def calculateIV(initIV: Array[Byte], counter: Long, iv: Array[Byte]) + + /** + * @return Encryptor the encryptor + */ + def createEncryptor: Encryptor + + /** + * @return Decryptor the decryptor + */ + def createDecryptor: Decryptor + + /** + * Generate a number of secure, random bytes suitable for cryptographic use. + * This method needs to be thread-safe. + * @param bytes byte array to populate with random data + */ + def generateSecureRandom(bytes: Array[Byte]) +} + +object CryptoCodec extends Logging { + def getInstance(conf: SparkConf): CryptoCodec = { +val name = conf.get(SPARK_SECURITY_CRYPTO_CIPHER_SUITE_KEY, + SPARK_SECURITY_CRYPTO_CIPHER_SUITE_DEFAULT) +getInstance(conf, CipherSuite.apply(name)) + } + + def getInstance(conf: SparkConf, cipherSuite: CipherSuite): CryptoCodec = { +getCodecClasses(conf, cipherSuite).toIterator.map { + case name if name == classOf[JceAesCtrCryptoCodec].getName => new JceAesCtrCryptoCodec(conf) + case _ => throw new RuntimeException("unsupported codec class") +}.find { + _.getCipherSuite.name.equals(cipherSuite.name) +}.headOption.get + } + + def getCodecClasses(conf: SparkConf, cipherSuite: CipherSuite): List[String] = { --- End diff -- `cipherSuite` is not really used. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11019][streaming][flume] Gracefully shu...
Github user harishreedharan commented on the pull request: https://github.com/apache/spark/pull/9041#issuecomment-146724990 /cc @tdas --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10767][PYSPARK] Make pyspark shared par...
Github user SparkQA commented on the pull request: https://github.com/apache/spark/pull/9017#issuecomment-146726910 [Test build #43447 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/43447/consoleFull) for PR 9017 at commit [`929d850`](https://github.com/apache/spark/commit/929d8501ef74575f00a19459fc8a163d58ffe6e5). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10956] Common MemoryManager interface f...
Github user andrewor14 commented on a diff in the pull request: https://github.com/apache/spark/pull/9000#discussion_r41587618 --- Diff: core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala --- @@ -1040,17 +1044,23 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE test("reserve/release unroll memory") { store = makeBlockManager(12000) val memoryStore = store.memoryStore +val dummyBlock = TestBlockId("") --- End diff -- ok, I wanted to avoid making an array buffer every time we call that, but I guess it's not that expensive. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10956] Common MemoryManager interface f...
Github user andrewor14 commented on a diff in the pull request: https://github.com/apache/spark/pull/9000#discussion_r41588629 --- Diff: core/src/main/scala/org/apache/spark/StaticMemoryManager.scala --- @@ -0,0 +1,229 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark + +import scala.collection.mutable + +import org.apache.spark.storage.{BlockId, BlockStatus, MemoryStore} + + +/** + * A [[MemoryManager]] that statically partitions the heap space into disjoint regions. + * + * The sizes of the execution and storage regions are determined through + * `spark.shuffle.memoryFraction` and `spark.storage.memoryFraction` respectively. The two + * regions are cleanly separated such that neither usage can borrow memory from the other. + */ +private[spark] class StaticMemoryManager( +conf: SparkConf, +override val maxExecutionMemory: Long, +override val maxStorageMemory: Long) + extends MemoryManager with Logging { + + // Max number of bytes worth of blocks to evict when unrolling + private val maxMemoryToEvictForUnroll: Long = { +(maxStorageMemory * conf.getDouble("spark.storage.unrollFraction", 0.2)).toLong + } + + // Amount of execution memory in use. Accesses must be synchronized on `executionLock`. + private var _executionMemoryUsed: Long = 0 + private val executionLock = new Object + + // Amount of storage memory in use. Accesses must be synchronized on `storageLock`. + private var _storageMemoryUsed: Long = 0 + private val storageLock = new Object + + // The memory store used to evict cached blocks + private var _memoryStore: MemoryStore = _ + private def memoryStore: MemoryStore = { +if (_memoryStore == null) { + _memoryStore = SparkEnv.get.blockManager.memoryStore +} +_memoryStore + } + + // For testing only + def setMemoryStore(store: MemoryStore): Unit = { +_memoryStore = store + } + + def this(conf: SparkConf) { +this( + conf, + StaticMemoryManager.getMaxExecutionMemory(conf), + StaticMemoryManager.getMaxStorageMemory(conf)) + } + + /** + * Acquire N bytes of memory for execution. + * @return number of bytes successfully granted (<= N). + */ + override def acquireExecutionMemory(numBytes: Long): Long = { +executionLock.synchronized { + assert(_executionMemoryUsed <= maxExecutionMemory) + val bytesToGrant = math.min(numBytes, maxExecutionMemory - _executionMemoryUsed) + _executionMemoryUsed += bytesToGrant + bytesToGrant +} + } + + /** + * Acquire N bytes of memory to cache the given block, evicting existing ones if necessary. + * Blocks evicted in the process, if any, are added to `evictedBlocks`. + * @return number of bytes successfully granted (0 or N). + */ + override def acquireStorageMemory( + blockId: BlockId, + numBytes: Long, + evictedBlocks: mutable.Buffer[(BlockId, BlockStatus)]): Long = { +acquireStorageMemory(blockId, numBytes, numBytes, evictedBlocks) + } + + /** + * Acquire N bytes of memory to unroll the given block, evicting existing ones if necessary. + * + * This evicts at most M bytes worth of existing blocks, where M is a fraction of the storage + * space specified by `spark.storage.unrollFraction`. Blocks evicted in the process, if any, + * are added to `evictedBlocks`. + * + * @return number of bytes successfully granted (0 or N). + */ + override def acquireUnrollMemory( + blockId: BlockId, + numBytes: Long, + evictedBlocks: mutable.Buffer[(BlockId, BlockStatus)]): Long = { +val currentUnrollMemory = memoryStore.currentUnrollMemory --- End diff -- added in `reserveUnrollMemoryForThisTask` --- If your project is set up for it, you can reply to this
[GitHub] spark pull request: [SPARK-10956] Common MemoryManager interface f...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9000#issuecomment-146729372 Merged build triggered. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10956] Common MemoryManager interface f...
Github user AmplabJenkins commented on the pull request: https://github.com/apache/spark/pull/9000#issuecomment-146729383 Merged build started. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-11019][streaming][flume] Gracefully shu...
Github user harishreedharan commented on the pull request: https://github.com/apache/spark/pull/9041#issuecomment-146730211 Jenkins, test this please --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request: [SPARK-10956] Common MemoryManager interface f...
Github user andrewor14 commented on the pull request: https://github.com/apache/spark/pull/9000#issuecomment-146730232 @JoshRosen alright I believe I addressed all of your comments. Please let me know if it LGTY. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org