This is an automated email from the ASF dual-hosted git repository. mridulm80 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new b9380589a1c [SPARK-40968] Fix a few wrong/misleading comments in DAGSchedulerSuite b9380589a1c is described below commit b9380589a1caba6ec2586128aa0d76f81090c9a7 Author: JiexingLi <jiexing...@databricks.com> AuthorDate: Wed Nov 2 00:57:20 2022 -0500 [SPARK-40968] Fix a few wrong/misleading comments in DAGSchedulerSuite ### What changes were proposed in this pull request? Fix a few wrong or misleading comments in DAGSchedulerSuite. ### Why are the changes needed? The wrong or misleading comments in DAGSchedulerSuite introduce confusions and make it harder to understanding the code. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? No code changes, pure comment changes. Original tests pass. Closes #38371 from JiexingLi/fix-comments. Authored-by: JiexingLi <jiexing...@databricks.com> Signed-off-by: Mridul <mridul<at>gmail.com> --- .../scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala index 04b335987d2..59e725e2b75 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala @@ -1051,7 +1051,8 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti * @param stageId - The current stageId * @param attemptIdx - The current attempt count * @param numShufflePartitions - The number of partitions in the next stage - * @param hostNames - Host on which each task in the task set is executed + * @param hostNames - Host on which each task in the task set is executed. In case no hostNames + * are provided, the tasks will progressively complete on hostA, hostB, etc. */ private def completeShuffleMapStageSuccessfully( stageId: Int, @@ -3088,14 +3089,17 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti submit(finalRdd, Array(0, 1), properties = new Properties()) - // Finish the first 2 shuffle map stages. + // Finish the first shuffle map stages, with shuffle data on hostA and hostB. completeShuffleMapStageSuccessfully(0, 0, 2) assert(mapOutputTracker.findMissingPartitions(shuffleId1) === Some(Seq.empty)) + // Finish the second shuffle map stages, with shuffle data on hostB and hostD. completeShuffleMapStageSuccessfully(1, 0, 2, Seq("hostB", "hostD")) assert(mapOutputTracker.findMissingPartitions(shuffleId2) === Some(Seq.empty)) - // Executor lost on hostB, both of stage 0 and 1 should be reran. + // Executor lost on hostB, both of stage 0 and 1 should be rerun - as part of re-computation + // of stage 2, as we have output on hostB for both stage 0 and stage 1 (see + // completeShuffleMapStageSuccessfully). runEvent(makeCompletionEvent( taskSets(2).tasks(0), FetchFailed(makeBlockManagerId("hostB"), shuffleId2, 0L, 0, 0, "ignored"), @@ -3207,7 +3211,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti assert(failure == null, "job should not fail") val failedStages = scheduler.failedStages.toSeq assert(failedStages.length == 2) - // Shuffle blocks of "hostA" is lost, so first task of the `shuffleMapRdd2` needs to retry. + // Shuffle blocks of "hostA" is lost, so first task of the `mapRdd` needs to retry. assert(failedStages.collect { case stage: ShuffleMapStage if stage.shuffleDep.shuffleId == shuffleId => stage }.head.findMissingPartitions() == Seq(0)) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org