Ngone51 commented on a change in pull request #30650: URL: https://github.com/apache/spark/pull/30650#discussion_r551123668
########## File path: core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala ########## @@ -661,35 +667,51 @@ private[spark] class TaskSchedulerImpl( } if (launchedAnyTask && taskSet.isBarrier) { + val barrierPendingLaunchTasks = taskSet.barrierPendingLaunchTasks.values.toArray // Check whether the barrier tasks are partially launched. - // TODO SPARK-24818 handle the assert failure case (that can happen when some locality - // requirements are not fulfilled, and we should revert the launched tasks). - if (addressesWithDescs.size != taskSet.numTasks) { - val errorMsg = - s"Fail resource offers for barrier stage ${taskSet.stageId} because only " + - s"${addressesWithDescs.size} out of a total number of ${taskSet.numTasks}" + - s" tasks got resource offers. This happens because barrier execution currently " + - s"does not work gracefully with delay scheduling. We highly recommend you to " + - s"disable delay scheduling by setting spark.locality.wait=0 as a workaround if " + - s"you see this error frequently." - logWarning(errorMsg) - taskSet.abort(errorMsg) - throw new SparkException(errorMsg) - } + if (barrierPendingLaunchTasks.size != taskSet.numTasks) { + barrierPendingLaunchTasks.foreach { task => Review comment: IIUC, the order shouldn't matter. Changed to `appendAll` first. Let's see @tgravescs @jiangxb1987 's opinion. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org