Github user jiangxb1987 commented on a diff in the pull request: https://github.com/apache/spark/pull/20930#discussion_r178494006 --- Diff: core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala --- @@ -794,6 +794,19 @@ private[spark] class TaskSetManager( fetchFailed.bmAddress.host, fetchFailed.bmAddress.executorId)) } + // Kill any other attempts for this FetchFailed task + for (attemptInfo <- taskAttempts(index) if attemptInfo.running) { + logInfo(s"Killing attempt ${attemptInfo.attemptNumber} for task ${attemptInfo.id} " + + s"in stage ${taskSet.id} (TID ${attemptInfo.taskId}) on ${attemptInfo.host} " + + s"as the attempt ${info.attemptNumber} failed because FetchFailed") + killedByOtherAttempt(index) = true + sched.backend.killTask( --- End diff -- This should not work. Maybe we shall just ignore the finished tasks submitted to a failed stage?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org