[spark] branch master updated: Revert "[SPARK-36575][CORE] Should ignore task finished event if its task set is gone in TaskSchedulerImpl.handleSuccessfulTask"
This is an automated email from the ASF dual-hosted git repository. wuyi pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 16e2604 Revert "[SPARK-36575][CORE] Should ignore task finished event if its task set is gone in TaskSchedulerImpl.handleSuccessfulTask" 16e2604 is described below commit 16e26049afc8ba92b06bdc58c47b211ea87e0d2b Author: yi.wu AuthorDate: Wed Nov 10 15:18:05 2021 +0800 Revert "[SPARK-36575][CORE] Should ignore task finished event if its task set is gone in TaskSchedulerImpl.handleSuccessfulTask" This reverts commit bc80c844fcb37d8d699d46bb34edadb98ed0d9f7. --- .../apache/spark/scheduler/TaskSchedulerImpl.scala | 8 +- .../spark/scheduler/TaskSchedulerImplSuite.scala | 86 +- 2 files changed, 2 insertions(+), 92 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala index 282f12b..55db73a 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala @@ -871,13 +871,7 @@ private[spark] class TaskSchedulerImpl( taskSetManager: TaskSetManager, tid: Long, taskResult: DirectTaskResult[_]): Unit = synchronized { -if (taskIdToTaskSetManager.contains(tid)) { - taskSetManager.handleSuccessfulTask(tid, taskResult) -} else { - logInfo(s"Ignoring update with state finished for task (TID $tid) because its task set " + -"is gone (this is likely the result of receiving duplicate task finished status updates)" + -" or its executor has been marked as failed.") -} +taskSetManager.handleSuccessfulTask(tid, taskResult) } def handleFailedTask( diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala index 551d55d..53dc14c 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala @@ -18,12 +18,9 @@ package org.apache.spark.scheduler import java.nio.ByteBuffer -import java.util.Properties -import java.util.concurrent.{CountDownLatch, ExecutorService, LinkedBlockingQueue, ThreadPoolExecutor, TimeUnit} import scala.collection.mutable.{ArrayBuffer, HashMap} import scala.concurrent.duration._ -import scala.language.reflectiveCalls import org.mockito.ArgumentMatchers.{any, anyInt, anyString, eq => meq} import org.mockito.Mockito.{atLeast, atMost, never, spy, times, verify, when} @@ -37,7 +34,7 @@ import org.apache.spark.internal.config import org.apache.spark.resource.{ExecutorResourceRequests, ResourceProfile, TaskResourceRequests} import org.apache.spark.resource.ResourceUtils._ import org.apache.spark.resource.TestResourceIDs._ -import org.apache.spark.util.{Clock, ManualClock, ThreadUtils} +import org.apache.spark.util.{Clock, ManualClock} class FakeSchedulerBackend extends SchedulerBackend { def start(): Unit = {} @@ -1998,87 +1995,6 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B assert(!normalTSM.runningTasksSet.contains(taskId)) } - test("SPARK-36575: Should ignore task finished event if its task set is gone " + -"in TaskSchedulerImpl.handleSuccessfulTask") { -val taskScheduler = setupScheduler() - -val latch = new CountDownLatch(2) -val resultGetter = new TaskResultGetter(sc.env, taskScheduler) { - override protected val getTaskResultExecutor: ExecutorService = -new ThreadPoolExecutor(1, 1, 0L, TimeUnit.MILLISECONDS, new LinkedBlockingQueue[Runnable], - ThreadUtils.namedThreadFactory("task-result-getter")) { - override def execute(command: Runnable): Unit = { -super.execute(new Runnable { - override def run(): Unit = { -command.run() -latch.countDown() - } -}) - } -} - def taskResultExecutor() : ExecutorService = getTaskResultExecutor -} -taskScheduler.taskResultGetter = resultGetter - -val workerOffers = IndexedSeq(new WorkerOffer("executor0", "host0", 1), - new WorkerOffer("executor1", "host1", 1)) -val task1 = new ShuffleMapTask(1, 0, null, new Partition { - override def index: Int = 0 -}, Seq(TaskLocation("host0", "executor0")), new Properties, null) - -val task2 = new ShuffleMapTask(1, 0, null, new Partition { - override def index: Int = 1 -}, Seq(TaskLocation("host1", "executor1")), new Properties, null) - -val taskSet = new TaskSet(Array(task1, task2), 0, 0, 0, null, 0) - -taskScheduler.submitTasks(taskSet) -val taskDescriptions =
[spark] branch master updated (bc80c84 -> b89f415)
This is an automated email from the ASF dual-hosted git repository. sarutak pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from bc80c84 [SPARK-36575][CORE] Should ignore task finished event if its task set is gone in TaskSchedulerImpl.handleSuccessfulTask add b89f415 [SPARK-37264][BUILD] Exclude `hadoop-client-api` transitive dependency from `orc-core` No new revisions were added by this update. Summary of changes: pom.xml | 4 1 file changed, 4 insertions(+) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-36575][CORE] Should ignore task finished event if its task set is gone in TaskSchedulerImpl.handleSuccessfulTask
This is an automated email from the ASF dual-hosted git repository. wuyi pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new bc80c84 [SPARK-36575][CORE] Should ignore task finished event if its task set is gone in TaskSchedulerImpl.handleSuccessfulTask bc80c84 is described below commit bc80c844fcb37d8d699d46bb34edadb98ed0d9f7 Author: hujiahua AuthorDate: Wed Nov 10 11:20:35 2021 +0800 [SPARK-36575][CORE] Should ignore task finished event if its task set is gone in TaskSchedulerImpl.handleSuccessfulTask ### What changes were proposed in this pull request? When a executor finished a task of some stage, the driver will receive a `StatusUpdate` event to handle it. At the same time the driver found the executor heartbeat timed out, so the dirver also need handle ExecutorLost event simultaneously. There was a race condition issues here, which will make `TaskSetManager.successful` and `TaskSetManager.tasksSuccessful` wrong result. The problem is that `TaskResultGetter.enqueueSuccessfulTask` use asynchronous thread to handle successful task, that mean the synchronized lock of `TaskSchedulerImpl` was released prematurely during midway https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala#L61. So `TaskSchedulerImpl` may handle executorLost first, then the asynchronous thread will go on to handle successful task. It cause `TaskSetManager.successful` and `T [...] ### Why are the changes needed? It will cause `TaskSetManager.successful` and `TaskSetManager.tasksSuccessful` wrong result. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Add a new test. Closes #33872 from sleep1661/SPARK-36575. Lead-authored-by: hujiahua Co-authored-by: MattHu Signed-off-by: yi.wu --- .../apache/spark/scheduler/TaskSchedulerImpl.scala | 8 +- .../spark/scheduler/TaskSchedulerImplSuite.scala | 86 +- 2 files changed, 92 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala index 55db73a..282f12b 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala @@ -871,7 +871,13 @@ private[spark] class TaskSchedulerImpl( taskSetManager: TaskSetManager, tid: Long, taskResult: DirectTaskResult[_]): Unit = synchronized { -taskSetManager.handleSuccessfulTask(tid, taskResult) +if (taskIdToTaskSetManager.contains(tid)) { + taskSetManager.handleSuccessfulTask(tid, taskResult) +} else { + logInfo(s"Ignoring update with state finished for task (TID $tid) because its task set " + +"is gone (this is likely the result of receiving duplicate task finished status updates)" + +" or its executor has been marked as failed.") +} } def handleFailedTask( diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala index 53dc14c..551d55d 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala @@ -18,9 +18,12 @@ package org.apache.spark.scheduler import java.nio.ByteBuffer +import java.util.Properties +import java.util.concurrent.{CountDownLatch, ExecutorService, LinkedBlockingQueue, ThreadPoolExecutor, TimeUnit} import scala.collection.mutable.{ArrayBuffer, HashMap} import scala.concurrent.duration._ +import scala.language.reflectiveCalls import org.mockito.ArgumentMatchers.{any, anyInt, anyString, eq => meq} import org.mockito.Mockito.{atLeast, atMost, never, spy, times, verify, when} @@ -34,7 +37,7 @@ import org.apache.spark.internal.config import org.apache.spark.resource.{ExecutorResourceRequests, ResourceProfile, TaskResourceRequests} import org.apache.spark.resource.ResourceUtils._ import org.apache.spark.resource.TestResourceIDs._ -import org.apache.spark.util.{Clock, ManualClock} +import org.apache.spark.util.{Clock, ManualClock, ThreadUtils} class FakeSchedulerBackend extends SchedulerBackend { def start(): Unit = {} @@ -1995,6 +1998,87 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B assert(!normalTSM.runningTasksSet.contains(taskId)) } + test("SPARK-36575: Should ignore task finished event if its task set is gone " + +"in TaskSchedulerImpl.handleSuccessfulTask") { +val taskScheduler = setupScheduler() + +val latch = new CountDownLatch(2) +val resultGetter = new
[spark] branch master updated: [SPARK-37221][SQL][FOLLOWUP] Add toRowBased to SparkPlan
This is an automated email from the ASF dual-hosted git repository. viirya pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new e1f3f22 [SPARK-37221][SQL][FOLLOWUP] Add toRowBased to SparkPlan e1f3f22 is described below commit e1f3f22c3dabfea27880e02cbb5df6533c875795 Author: Liang-Chi Hsieh AuthorDate: Tue Nov 9 19:11:44 2021 -0800 [SPARK-37221][SQL][FOLLOWUP] Add toRowBased to SparkPlan ### What changes were proposed in this pull request? This is a follow up of #34499. Instead of adding `ColumnarToRowExec` in `getByteArrayRdd`, this patch adds `toRowBased` API to explicitly ask for columnar-to-row-based conversion. ### Why are the changes needed? To make the conversion selectable. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests. Closes #34538 from viirya/columnar-followup. Authored-by: Liang-Chi Hsieh Signed-off-by: Liang-Chi Hsieh --- .../scala/org/apache/spark/sql/execution/SparkPlan.scala | 12 ++-- .../org/apache/spark/sql/execution/SparkPlanSuite.scala | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala index ea3b133..5c4266d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala @@ -313,6 +313,11 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ } /** + * Converts the output of this plan to row-based if it is columnar plan. + */ + def toRowBased: SparkPlan = if (supportsColumnar) ColumnarToRowExec(this) else this + + /** * Packing the UnsafeRows into byte array for faster serialization. * The byte arrays are in the following format: * [size] [bytes of UnsafeRow] [size] [bytes of UnsafeRow] ... [-1] @@ -322,12 +327,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ */ private def getByteArrayRdd( n: Int = -1, takeFromEnd: Boolean = false): RDD[(Long, Array[Byte])] = { -val rdd = if (supportsColumnar) { - ColumnarToRowExec(this).execute() -} else { - execute() -} -rdd.mapPartitionsInternal { iter => +execute().mapPartitionsInternal { iter => var count = 0 val buffer = new Array[Byte](4 << 10) // 4K val codec = CompressionCodec.createCodec(SparkEnv.get.conf) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala index c9bbee2..bc4dfcb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala @@ -116,12 +116,12 @@ class SparkPlanSuite extends QueryTest with SharedSparkSession { } test("SPARK-37221: The collect-like API in SparkPlan should support columnar output") { -val emptyResults = ColumnarOp(LocalTableScanExec(Nil, Nil)).executeCollect() +val emptyResults = ColumnarOp(LocalTableScanExec(Nil, Nil)).toRowBased.executeCollect() assert(emptyResults.isEmpty) val relation = LocalTableScanExec( Seq(AttributeReference("val", IntegerType)()), Seq(InternalRow(1))) -val nonEmpty = ColumnarOp(relation).executeCollect() +val nonEmpty = ColumnarOp(relation).toRowBased.executeCollect() assert(nonEmpty === relation.executeCollect()) } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark-website] branch asf-site updated: Change "GitHub Source" to "Source code" on the home page (#369)
This is an automated email from the ASF dual-hosted git repository. gengliang pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/spark-website.git The following commit(s) were added to refs/heads/asf-site by this push: new 71adc13 Change "GitHub Source" to "Source code" on the home page (#369) 71adc13 is described below commit 71adc13b25af256b7e30dbe0078906b764cce2db Author: Gengliang Wang AuthorDate: Wed Nov 10 10:14:14 2021 +0800 Change "GitHub Source" to "Source code" on the home page (#369) - Change the text GitHub Source to Source code on the home page. - Attribute alt is not allowed on element a. This is found during updating the text. We should remove them. --- index.md| 14 +++--- site/index.html | 14 +++--- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/index.md b/index.md index b1c89e8..1aceb4c 100644 --- a/index.md +++ b/index.md @@ -335,7 +335,7 @@ head(select(df, df$name.first)) - + Mailing list @@ -345,17 +345,17 @@ head(select(df, df$name.first)) -https://github.com/apache/spark; alt="GitHub Source"> +https://github.com/apache/spark;> GitHub Source + width="96" height="96" alt="Source code"/> Source code - + News and events @@ -365,7 +365,7 @@ head(select(df, df$name.first)) - + How to contribute @@ -375,7 +375,7 @@ head(select(df, df$name.first)) -https://issues.apache.org/jira/projects/SPARK/issues; alt="Issue tracking"> +https://issues.apache.org/jira/projects/SPARK/issues;> Issue tracking @@ -385,7 +385,7 @@ head(select(df, df$name.first)) - + - + Mailing list @@ -482,16 +482,16 @@ -https://github.com/apache/spark; alt="GitHub Source"> +https://github.com/apache/spark;> - GitHub Source + Source code - + News and events @@ -500,7 +500,7 @@ - + How to contribute @@ -509,7 +509,7 @@ -https://issues.apache.org/jira/projects/SPARK/issues; alt="Issue tracking"> +https://issues.apache.org/jira/projects/SPARK/issues;> Issue tracking @@ -518,7 +518,7 @@ - + Committers - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[GitHub] [spark-website] gengliangwang commented on pull request #369: Change "GitHub Source" to "Source code" on the home page
gengliangwang commented on pull request #369: URL: https://github.com/apache/spark-website/pull/369#issuecomment-964718323 @srowen @Ngone51 Thanks for the review. Merged to the asf-site. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[GitHub] [spark-website] gengliangwang merged pull request #369: Change "GitHub Source" to "Source code" on the home page
gengliangwang merged pull request #369: URL: https://github.com/apache/spark-website/pull/369 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[GitHub] [spark-website] gengliangwang opened a new pull request #369: Change the "GitHub Source" to "Source code" on the home page
gengliangwang opened a new pull request #369: URL: https://github.com/apache/spark-website/pull/369 This PR is to - Change the text `GitHub Source` to `Source code` on the home page. - Attribute `alt` not allowed on element `a`. This is found during updating the text. We should remove them. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (4bba110 -> abecdfe)
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 4bba110 [SPARK-37120][BUILD][FOLLOWUP] Test master branch and skip mima/unidoc in Java11/17 tests add abecdfe [MINOR][CORE] Fix error message when requested executor memory exceeds the worker memory No new revisions were added by this update. Summary of changes: core/src/main/scala/org/apache/spark/SparkContext.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (f6a044c -> 4bba110)
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from f6a044c [SPARK-37239][YARN][TESTS][FOLLOWUP] Add UT to cover `Client.prepareLocalResources` with custom `STAGING_FILE_REPLICATION` add 4bba110 [SPARK-37120][BUILD][FOLLOWUP] Test master branch and skip mima/unidoc in Java11/17 tests No new revisions were added by this update. Summary of changes: .github/workflows/build_and_test.yml | 8 1 file changed, 4 insertions(+), 4 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark-website] branch asf-site updated: Remove preview for 3.0 in Download page (#368)
This is an automated email from the ASF dual-hosted git repository. lixiao pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/spark-website.git The following commit(s) were added to refs/heads/asf-site by this push: new 13be9bc Remove preview for 3.0 in Download page (#368) 13be9bc is described below commit 13be9bcd059cfb60f60320e520c3eb36adf00cc8 Author: wuyi AuthorDate: Wed Nov 10 00:57:51 2021 +0800 Remove preview for 3.0 in Download page (#368) --- downloads.md| 8 site/downloads.html | 8 2 files changed, 16 deletions(-) diff --git a/downloads.md b/downloads.md index 518ae5b..993bd7a 100644 --- a/downloads.md +++ b/downloads.md @@ -30,14 +30,6 @@ window.onload = function () { Note that, Spark 2.x is pre-built with Scala 2.11 except version 2.4.2, which is pre-built with Scala 2.12. Spark 3.0+ is pre-built with Scala 2.12. -### Latest preview release -Preview releases, as the name suggests, are releases for previewing upcoming features. -Unlike nightly packages, preview releases have been audited by the project's management committee -to satisfy the legal requirements of Apache Software Foundation's release policy. -Preview releases are not meant to be functional, i.e. they can and highly likely will contain -critical bugs or documentation errors. -The latest preview release is Spark 3.0.0-preview2, published on Dec 23, 2019. - ### Link with Spark Spark artifacts are [hosted in Maven Central](https://search.maven.org/search?q=g:org.apache.spark). You can add a Maven dependency with the following coordinates: diff --git a/site/downloads.html b/site/downloads.html index 8869e19..2deb4e4 100644 --- a/site/downloads.html +++ b/site/downloads.html @@ -174,14 +174,6 @@ window.onload = function () { Note that, Spark 2.x is pre-built with Scala 2.11 except version 2.4.2, which is pre-built with Scala 2.12. Spark 3.0+ is pre-built with Scala 2.12. -Latest preview release -Preview releases, as the name suggests, are releases for previewing upcoming features. -Unlike nightly packages, preview releases have been audited by the projects management committee -to satisfy the legal requirements of Apache Software Foundations release policy. -Preview releases are not meant to be functional, i.e. they can and highly likely will contain -critical bugs or documentation errors. -The latest preview release is Spark 3.0.0-preview2, published on Dec 23, 2019. - Link with Spark Spark artifacts are https://search.maven.org/search?q=g:org.apache.spark;>hosted in Maven Central. You can add a Maven dependency with the following coordinates: - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[GitHub] [spark-website] gatorsmile merged pull request #368: Remove preview release for Spark 3.0
gatorsmile merged pull request #368: URL: https://github.com/apache/spark-website/pull/368 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[GitHub] [spark-website] gatorsmile commented on pull request #368: Remove preview release for Spark 3.0
gatorsmile commented on pull request #368: URL: https://github.com/apache/spark-website/pull/368#issuecomment-964344461 Thanks! Merged -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[GitHub] [spark-website] gatorsmile commented on pull request #368: Remove preview release for Spark 3.0
gatorsmile commented on pull request #368: URL: https://github.com/apache/spark-website/pull/368#issuecomment-964344212 > I don't mind removing it, but we may have another preview release one day for Spark 4. The preview releases were still official releases in the past too Agree. We should add it back when the preview of Spark 4 is ready. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-37239][YARN][TESTS][FOLLOWUP] Add UT to cover `Client.prepareLocalResources` with custom `STAGING_FILE_REPLICATION`
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new f6a044c [SPARK-37239][YARN][TESTS][FOLLOWUP] Add UT to cover `Client.prepareLocalResources` with custom `STAGING_FILE_REPLICATION` f6a044c is described below commit f6a044cf8cd83e6b3b30e515acbac0ec81607463 Author: yangjie01 AuthorDate: Tue Nov 9 08:07:38 2021 -0800 [SPARK-37239][YARN][TESTS][FOLLOWUP] Add UT to cover `Client.prepareLocalResources` with custom `STAGING_FILE_REPLICATION` ### What changes were proposed in this pull request? This pr add a new UT to cover `o.a.s.deploy.yarn.Client.prepareLocalResources` method with custom `STAGING_FILE_REPLICATION` configuration and change other related UTs to verify that the `replication` passed into the `copyFileToRemote` method is `None` explicitly. ### Why are the changes needed? Add new UT. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass the Jenkins or GitHub Action Closes #34531 from LuciferYang/SPARK-37239-followup. Authored-by: yangjie01 Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/deploy/yarn/ClientSuite.scala | 35 +- 1 file changed, 27 insertions(+), 8 deletions(-) diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala index 58e49c9..a8815dc 100644 --- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala +++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala @@ -132,7 +132,7 @@ class ClientSuite extends SparkFunSuite with Matchers { .set("spark.yarn.dist.jars", ADDED) val client = createClient(sparkConf, args = Array("--jar", USER)) doReturn(new Path("/")).when(client).copyFileToRemote(any(classOf[Path]), - any(classOf[Path]), any(), any(classOf[MutableHashMap[URI, Path]]), anyBoolean(), any()) + any(classOf[Path]), meq(None), any(classOf[MutableHashMap[URI, Path]]), anyBoolean(), any()) val tempDir = Utils.createTempDir() try { @@ -308,12 +308,12 @@ class ClientSuite extends SparkFunSuite with Matchers { assert(sparkConf.get(SPARK_JARS) === Some(Seq(s"local:${jar4.getPath()}", s"local:${single.getAbsolutePath()}/*"))) -verify(client).copyFileToRemote(any(classOf[Path]), meq(new Path(jar1.toURI())), any(), - any(classOf[MutableHashMap[URI, Path]]), anyBoolean(), any()) -verify(client).copyFileToRemote(any(classOf[Path]), meq(new Path(jar2.toURI())), any(), - any(classOf[MutableHashMap[URI, Path]]), anyBoolean(), any()) -verify(client).copyFileToRemote(any(classOf[Path]), meq(new Path(jar3.toURI())), any(), - any(classOf[MutableHashMap[URI, Path]]), anyBoolean(), any()) +verify(client).copyFileToRemote(any(classOf[Path]), meq(new Path(jar1.toURI())), + meq(None), any(classOf[MutableHashMap[URI, Path]]), anyBoolean(), any()) +verify(client).copyFileToRemote(any(classOf[Path]), meq(new Path(jar2.toURI())), + meq(None), any(classOf[MutableHashMap[URI, Path]]), anyBoolean(), any()) +verify(client).copyFileToRemote(any(classOf[Path]), meq(new Path(jar3.toURI())), + meq(None), any(classOf[MutableHashMap[URI, Path]]), anyBoolean(), any()) val cp = classpath(client) cp should contain (buildPath(PWD, LOCALIZED_LIB_DIR, "*")) @@ -330,7 +330,7 @@ class ClientSuite extends SparkFunSuite with Matchers { val client = createClient(sparkConf) client.prepareLocalResources(new Path(temp.getAbsolutePath()), Nil) -verify(client).copyFileToRemote(any(classOf[Path]), meq(new Path(archive.toURI())), any(), +verify(client).copyFileToRemote(any(classOf[Path]), meq(new Path(archive.toURI())), meq(None), any(classOf[MutableHashMap[URI, Path]]), anyBoolean(), any()) classpath(client) should contain (buildPath(PWD, LOCALIZED_LIB_DIR, "*")) @@ -340,6 +340,25 @@ class ClientSuite extends SparkFunSuite with Matchers { } } + test("SPARK-37239: distribute jars archive with set STAGING_FILE_REPLICATION") { +val temp = Utils.createTempDir() +val archive = TestUtils.createJarWithFiles(Map(), temp) +val replication = 5 + +val sparkConf = new SparkConf() + .set(SPARK_ARCHIVE, archive.getPath()) + .set(STAGING_FILE_REPLICATION, replication) +val client = createClient(sparkConf) +client.prepareLocalResources(new Path(temp.getAbsolutePath()), Nil) + +// It is difficult to assert the result of `setReplication` in UT because this method in +// `RawLocalFileSystem` always return true and not change the value of `replication`. +// So we can only assert the call of
[spark] branch master updated (8ae88d0 -> 06175c0)
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 8ae88d0 [SPARK-37253][PYTHON] `try_simplify_traceback` should not fail when `tb_frame.f_lineno` is None add 06175c0 [SPARK-37257][PYTHON] Update setup.py for Python 3.10 No new revisions were added by this update. Summary of changes: python/setup.py | 1 + 1 file changed, 1 insertion(+) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.1 updated: [SPARK-37253][PYTHON] `try_simplify_traceback` should not fail when `tb_frame.f_lineno` is None
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.1 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.1 by this push: new 599e258 [SPARK-37253][PYTHON] `try_simplify_traceback` should not fail when `tb_frame.f_lineno` is None 599e258 is described below commit 599e258a0f507e2799f46f3d4774bee90139720d Author: Dongjoon Hyun AuthorDate: Tue Nov 9 20:03:34 2021 +0900 [SPARK-37253][PYTHON] `try_simplify_traceback` should not fail when `tb_frame.f_lineno` is None ### What changes were proposed in this pull request? This PR aims to handle the corner case when `tb_frame.f_lineno` is `None` in `try_simplify_traceback` which was added by https://github.com/apache/spark/pull/30309 at Apache Spark 3.1.0. ### Why are the changes needed? This will handle the following corner case. ```python Traceback (most recent call last): File "/Users/dongjoon/APACHE/spark-merge/python/lib/pyspark.zip/pyspark/worker.py", line 630, in main tb = try_simplify_traceback(sys.exc_info()[-1]) File "/Users/dongjoon/APACHE/spark-merge/python/lib/pyspark.zip/pyspark/util.py", line 217, in try_simplify_traceback new_tb = types.TracebackType( TypeError: 'NoneType' object cannot be interpreted as an integer ``` Python GitHub Repo also has the test case for this corner case. - https://github.com/python/cpython/blob/main/Lib/test/test_exceptions.py#L2373 ```python None if frame.f_lineno is None else ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? N/A Closes #34530 from dongjoon-hyun/SPARK-37253. Authored-by: Dongjoon Hyun Signed-off-by: Hyukjin Kwon (cherry picked from commit 8ae88d01b46d581367d0047b50fcfb65078ab972) Signed-off-by: Hyukjin Kwon --- python/pyspark/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/util.py b/python/pyspark/util.py index 09c5963..2e07f19 100644 --- a/python/pyspark/util.py +++ b/python/pyspark/util.py @@ -213,7 +213,7 @@ def try_simplify_traceback(tb): tb_next=tb_next, tb_frame=cur_tb.tb_frame, tb_lasti=cur_tb.tb_frame.f_lasti, -tb_lineno=cur_tb.tb_frame.f_lineno) +tb_lineno=cur_tb.tb_frame.f_lineno if cur_tb.tb_frame.f_lineno is not None else -1) tb_next = new_tb return new_tb - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.2 updated: [SPARK-37253][PYTHON] `try_simplify_traceback` should not fail when `tb_frame.f_lineno` is None
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.2 by this push: new bda1ecd [SPARK-37253][PYTHON] `try_simplify_traceback` should not fail when `tb_frame.f_lineno` is None bda1ecd is described below commit bda1ecd2477d28a2f9205fb8e22d298a83412ec9 Author: Dongjoon Hyun AuthorDate: Tue Nov 9 20:03:34 2021 +0900 [SPARK-37253][PYTHON] `try_simplify_traceback` should not fail when `tb_frame.f_lineno` is None ### What changes were proposed in this pull request? This PR aims to handle the corner case when `tb_frame.f_lineno` is `None` in `try_simplify_traceback` which was added by https://github.com/apache/spark/pull/30309 at Apache Spark 3.1.0. ### Why are the changes needed? This will handle the following corner case. ```python Traceback (most recent call last): File "/Users/dongjoon/APACHE/spark-merge/python/lib/pyspark.zip/pyspark/worker.py", line 630, in main tb = try_simplify_traceback(sys.exc_info()[-1]) File "/Users/dongjoon/APACHE/spark-merge/python/lib/pyspark.zip/pyspark/util.py", line 217, in try_simplify_traceback new_tb = types.TracebackType( TypeError: 'NoneType' object cannot be interpreted as an integer ``` Python GitHub Repo also has the test case for this corner case. - https://github.com/python/cpython/blob/main/Lib/test/test_exceptions.py#L2373 ```python None if frame.f_lineno is None else ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? N/A Closes #34530 from dongjoon-hyun/SPARK-37253. Authored-by: Dongjoon Hyun Signed-off-by: Hyukjin Kwon (cherry picked from commit 8ae88d01b46d581367d0047b50fcfb65078ab972) Signed-off-by: Hyukjin Kwon --- python/pyspark/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/util.py b/python/pyspark/util.py index e075b04..e0933f1 100644 --- a/python/pyspark/util.py +++ b/python/pyspark/util.py @@ -214,7 +214,7 @@ def try_simplify_traceback(tb): tb_next=tb_next, tb_frame=cur_tb.tb_frame, tb_lasti=cur_tb.tb_frame.f_lasti, -tb_lineno=cur_tb.tb_frame.f_lineno) +tb_lineno=cur_tb.tb_frame.f_lineno if cur_tb.tb_frame.f_lineno is not None else -1) tb_next = new_tb return new_tb - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-37253][PYTHON] `try_simplify_traceback` should not fail when `tb_frame.f_lineno` is None
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 8ae88d0 [SPARK-37253][PYTHON] `try_simplify_traceback` should not fail when `tb_frame.f_lineno` is None 8ae88d0 is described below commit 8ae88d01b46d581367d0047b50fcfb65078ab972 Author: Dongjoon Hyun AuthorDate: Tue Nov 9 20:03:34 2021 +0900 [SPARK-37253][PYTHON] `try_simplify_traceback` should not fail when `tb_frame.f_lineno` is None ### What changes were proposed in this pull request? This PR aims to handle the corner case when `tb_frame.f_lineno` is `None` in `try_simplify_traceback` which was added by https://github.com/apache/spark/pull/30309 at Apache Spark 3.1.0. ### Why are the changes needed? This will handle the following corner case. ```python Traceback (most recent call last): File "/Users/dongjoon/APACHE/spark-merge/python/lib/pyspark.zip/pyspark/worker.py", line 630, in main tb = try_simplify_traceback(sys.exc_info()[-1]) File "/Users/dongjoon/APACHE/spark-merge/python/lib/pyspark.zip/pyspark/util.py", line 217, in try_simplify_traceback new_tb = types.TracebackType( TypeError: 'NoneType' object cannot be interpreted as an integer ``` Python GitHub Repo also has the test case for this corner case. - https://github.com/python/cpython/blob/main/Lib/test/test_exceptions.py#L2373 ```python None if frame.f_lineno is None else ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? N/A Closes #34530 from dongjoon-hyun/SPARK-37253. Authored-by: Dongjoon Hyun Signed-off-by: Hyukjin Kwon --- python/pyspark/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyspark/util.py b/python/pyspark/util.py index e07474d..3f36fb3 100644 --- a/python/pyspark/util.py +++ b/python/pyspark/util.py @@ -218,7 +218,7 @@ def try_simplify_traceback(tb): tb_next=tb_next, tb_frame=cur_tb.tb_frame, tb_lasti=cur_tb.tb_frame.f_lasti, -tb_lineno=cur_tb.tb_frame.f_lineno) +tb_lineno=cur_tb.tb_frame.f_lineno if cur_tb.tb_frame.f_lineno is not None else -1) tb_next = new_tb return new_tb - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[GitHub] [spark-website] Ngone51 opened a new pull request #368: Remove preview release for Spark 3.0
Ngone51 opened a new pull request #368: URL: https://github.com/apache/spark-website/pull/368 Remove the preview release for Spark 3.0 since we've already released Spark 3.2. Tested with manual build and preview. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org