Github user squito commented on a diff in the pull request:

    https://github.com/apache/spark/pull/15644#discussion_r86078676
  
    --- Diff: 
core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala ---
    @@ -282,6 +316,114 @@ class TaskSchedulerImplSuite extends SparkFunSuite 
with LocalSparkContext with B
         assert(!failedTaskSet)
       }
     
    +  test("scheduled tasks obey task and stage blacklists") {
    +    taskScheduler = setupSchedulerWithMockTsm()
    +    (0 to 2).foreach { stageId =>
    +      val taskSet = FakeTask.createTaskSet(numTasks = 2, stageId = 
stageId, stageAttemptId = 0)
    +      taskScheduler.submitTasks(taskSet)
    +    }
    +
    +    val offers = IndexedSeq(
    +      new WorkerOffer("executor0", "host0", 1),
    +      new WorkerOffer("executor1", "host1", 1),
    +      new WorkerOffer("executor2", "host1", 1),
    +      new WorkerOffer("executor3", "host2", 10)
    +    )
    +
    +    // Setup our mock blacklist:
    +    // * stage 0 is blacklisted on node "host1"
    +    // * stage 1 is blacklisted on executor "executor3"
    +    // * stage 0, part 0 is blacklisted on executor 0
    +    // Setup some defaults, then override them with particulars.
    +    // (Later stubs take precedence over earlier ones.)
    +    stageToMockTaskSetBlacklist.values.foreach { taskSetBlacklist =>
    +      
when(taskSetBlacklist.isNodeBlacklistedForTaskSet(anyString())).thenReturn(false)
    +      
when(taskSetBlacklist.isExecutorBlacklistedForTaskSet(anyString())).thenReturn(false)
    +      when(taskSetBlacklist.isExecutorBlacklistedForTask(anyString(), 
anyInt())).thenReturn(false)
    +      when(taskSetBlacklist.isNodeBlacklistedForTask(anyString(), 
anyInt())).thenReturn(false)
    +    }
    +    
when(stageToMockTaskSetBlacklist(0).isNodeBlacklistedForTaskSet("host1")).thenReturn(true)
    +    
when(stageToMockTaskSetBlacklist(1).isExecutorBlacklistedForTaskSet("executor3"))
    +      .thenReturn(true)
    +    
when(stageToMockTaskSetBlacklist(0).isExecutorBlacklistedForTask("executor0", 
0))
    +      .thenReturn(true)
    +
    +    val firstTaskAttempts = taskScheduler.resourceOffers(offers).flatten
    +    (0 to 2).foreach { stageId =>
    +      verify(stageToMockTaskSetBlacklist(stageId), atLeast(1))
    +        .isNodeBlacklistedForTaskSet(anyString())
    +    }
    +    for {
    +      exec <- Seq("executor1", "executor2")
    +      part <- 0 to 1
    +    } {
    +      // The node blacklist should ensure we never check the task 
blacklist.  This is important
    +      // for performance, otherwise we end up changing an O(1) operation 
into a
    +      // O(numPendingTasks) one.
    --- End diff --
    
    I've updated the comment (and re-organized slightly) so hopefully this is 
more clear.  The point is to avoid calling `dequeueTask()` O(numPendingTasks) 
times inside the scheduler (that's one of the major problems w/ the old 
blacklisting).


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to