Github user kayousterhout commented on a diff in the pull request: https://github.com/apache/spark/pull/15644#discussion_r85248691 --- Diff: core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala --- @@ -282,6 +316,114 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B assert(!failedTaskSet) } + test("scheduled tasks obey task and stage blacklists") { + taskScheduler = setupSchedulerWithMockTsm() + (0 to 2).foreach { stageId => + val taskSet = FakeTask.createTaskSet(numTasks = 2, stageId = stageId, stageAttemptId = 0) + taskScheduler.submitTasks(taskSet) + } + + val offers = IndexedSeq( + new WorkerOffer("executor0", "host0", 1), + new WorkerOffer("executor1", "host1", 1), + new WorkerOffer("executor2", "host1", 1), + new WorkerOffer("executor3", "host2", 10) + ) + + // Setup our mock blacklist: + // * stage 0 is blacklisted on node "host1" + // * stage 1 is blacklisted on executor "executor3" + // * stage 0, part 0 is blacklisted on executor 0 + // Setup some defaults, then override them with particulars. + // (Later stubs take precedence over earlier ones.) + stageToMockTaskSetBlacklist.values.foreach { taskSetBlacklist => + when(taskSetBlacklist.isNodeBlacklistedForTaskSet(anyString())).thenReturn(false) + when(taskSetBlacklist.isExecutorBlacklistedForTaskSet(anyString())).thenReturn(false) + when(taskSetBlacklist.isExecutorBlacklistedForTask(anyString(), anyInt())).thenReturn(false) + when(taskSetBlacklist.isNodeBlacklistedForTask(anyString(), anyInt())).thenReturn(false) + } + when(stageToMockTaskSetBlacklist(0).isNodeBlacklistedForTaskSet("host1")).thenReturn(true) + when(stageToMockTaskSetBlacklist(1).isExecutorBlacklistedForTaskSet("executor3")) + .thenReturn(true) + when(stageToMockTaskSetBlacklist(0).isExecutorBlacklistedForTask("executor0", 0)) + .thenReturn(true) + + val firstTaskAttempts = taskScheduler.resourceOffers(offers).flatten + (0 to 2).foreach { stageId => + verify(stageToMockTaskSetBlacklist(stageId), atLeast(1)) + .isNodeBlacklistedForTaskSet(anyString()) + } + for { + exec <- Seq("executor1", "executor2") + part <- 0 to 1 + } { + // The node blacklist should ensure we never check the task blacklist. This is important + // for performance, otherwise we end up changing an O(1) operation into a + // O(numPendingTasks) one. + verify(stageToMockTaskSetBlacklist(0), never).isExecutorBlacklistedForTask(exec, part) + } + + // Similarly, the executor blacklist for an entire stage should prevent us from ever checking + // the blacklist for specific parts in a stage. + (0 to 1).foreach { part => + verify(stageToMockTaskSetBlacklist(1), never).isExecutorBlacklistedForTask("executor3", part) + } + + // We should schedule all tasks. + assert(firstTaskAttempts.size === 6) + def tasksForStage(stageId: Int): Seq[TaskDescription] = { + firstTaskAttempts.filter{_.name.contains(s"stage $stageId")} + } + tasksForStage(0).foreach { task => + // exec 1 & 2 blacklisted for node + // exec 0 blacklisted just for part 0 + if (task.index == 0) { + assert(task.executorId === "executor3") + } else { + assert(Set("executor0", "executor3").contains(task.executorId)) + } + } + tasksForStage(1).foreach { task => + // exec 3 blacklisted + assert("executor3" != task.executorId) + } + // no restrictions on stage 2 + + // Have all tasksets finish (stages 0 & 1 successfully, 2 unsuccessfully). + (0 to 2).foreach { stageId => + val tasks = tasksForStage(stageId) + val tsm = taskScheduler.taskSetManagerForAttempt(stageId, 0).get + val valueSer = SparkEnv.get.serializer.newInstance() + if (stageId == 2) { + // Just need to make one task fail 4 times. + var task = tasks(0) + val taskIndex = task.index + (0 until 4).foreach { attempt => --- End diff -- It looks like this part of the test is making sure no further attempts are scheduled after the 4th failure. Is this necessary for the blacklisting functionality / different than what's already covered by other tests?
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org