Github user sryza commented on a diff in the pull request:

    https://github.com/apache/spark/pull/4168#discussion_r24438965
  
    --- Diff: 
core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala ---
    @@ -224,59 +240,90 @@ private[spark] class ExecutorAllocationManager(
       }
     
       /**
    +   * Check to see whether our existing allocation and the requests we've 
made previously exceed our
    +   * current needs. If so, let the cluster manager know so that it can 
cancel pending requests that
    +   * are unneeded.
    +   *
    +   * If not, and the add time has expired, see if we can request new 
executors and refresh the add
    +   * time.
    +   *
    +   * @return the delta in the target number of executors.
    +   */
    +  private def addOrCancelExecutorRequests(now: Long): Int = synchronized {
    +    val currentTarget = targetNumExecutors
    +    val maxNeeded = maxNumExecutorsNeeded
    +
    +    if (maxNeeded < currentTarget) {
    +      // The target number exceeds the number we actually need, so stop 
adding new
    +      // executors and inform the cluster manager to cancel the extra 
pending requests.
    +      val newTotalExecutors = math.max(maxNeeded, minNumExecutors)
    +      client.requestTotalExecutors(newTotalExecutors)
    +      numExecutorsToAdd = 1
    +      updateNumExecutorsPending(newTotalExecutors)
    +    } else if (addTime != NOT_SET && now >= addTime) {
    +      val delta = addExecutors(maxNeeded)
    +      logDebug(s"Starting timer to add more executors (to " +
    +        s"expire in $sustainedSchedulerBacklogTimeout seconds)")
    +      addTime += sustainedSchedulerBacklogTimeout * 1000
    +      delta
    +    } else {
    +      0
    +    }
    +  }
    +
    +  /**
        * Request a number of executors from the cluster manager.
        * If the cap on the number of executors is reached, give up and reset 
the
        * number of executors to add next round instead of continuing to double 
it.
    -   * Return the number actually requested.
    +   *
    +   * @param maxNumExecutorsNeeded the maximum number of executors all 
currently running or pending
    +   *                              tasks could fill
    +   * @return the number of additional executors actually requested.
        */
    -  private def addExecutors(): Int = synchronized {
    -    // Do not request more executors if we have already reached the upper 
bound
    -    val numExistingExecutors = executorIds.size + numExecutorsPending
    -    if (numExistingExecutors >= maxNumExecutors) {
    +  private def addExecutors(maxNumExecutorsNeeded: Int): Int = {
    +    // Do not request more executors if it would put our target over the 
upper bound
    +    val currentTarget = targetNumExecutors
    +    if (currentTarget >= maxNumExecutors) {
           logDebug(s"Not adding executors because there are already 
${executorIds.size} " +
             s"registered and $numExecutorsPending pending executor(s) (limit 
$maxNumExecutors)")
           numExecutorsToAdd = 1
           return 0
         }
     
    -    // The number of executors needed to satisfy all pending tasks is the 
number of tasks pending
    -    // divided by the number of tasks each executor can fit, rounded up.
    -    val maxNumExecutorsPending =
    -      (listener.totalPendingTasks() + tasksPerExecutor - 1) / 
tasksPerExecutor
    -    if (numExecutorsPending >= maxNumExecutorsPending) {
    -      logDebug(s"Not adding executors because there are already 
$numExecutorsPending " +
    -        s"pending and pending tasks could only fill 
$maxNumExecutorsPending")
    -      numExecutorsToAdd = 1
    -      return 0
    -    }
    -
    -    // It's never useful to request more executors than could satisfy all 
the pending tasks, so
    -    // cap request at that amount.
    -    // Also cap request with respect to the configured upper bound.
    -    val maxNumExecutorsToAdd = math.min(
    -      maxNumExecutorsPending - numExecutorsPending,
    -      maxNumExecutors - numExistingExecutors)
    -    assert(maxNumExecutorsToAdd > 0)
    -
    -    val actualNumExecutorsToAdd = math.min(numExecutorsToAdd, 
maxNumExecutorsToAdd)
    -
    -    val newTotalExecutors = numExistingExecutors + actualNumExecutorsToAdd
    -    val addRequestAcknowledged = testing || 
client.requestExecutors(actualNumExecutorsToAdd)
    +    val actualMaxNumExecutors = math.min(maxNumExecutors - 
executorsPendingToRemove.size,
    --- End diff --
    
    Oy, yup, you're definitely right.  Not sure what I was thinking last night.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to