Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/20930#discussion_r178470893 --- Diff: core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala --- @@ -794,6 +794,19 @@ private[spark] class TaskSetManager( fetchFailed.bmAddress.host, fetchFailed.bmAddress.executorId)) } + // Kill any other attempts for this FetchFailed task + for (attemptInfo <- taskAttempts(index) if attemptInfo.running) { + logInfo(s"Killing attempt ${attemptInfo.attemptNumber} for task ${attemptInfo.id} " + + s"in stage ${taskSet.id} (TID ${attemptInfo.taskId}) on ${attemptInfo.host} " + + s"as the attempt ${info.attemptNumber} failed because FetchFailed") + killedByOtherAttempt(index) = true + sched.backend.killTask( --- End diff -- if this is async, we can't guarantee to not have task success events after marking staging as failed, right?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org