ulysses-you commented on a change in pull request #32816: URL: https://github.com/apache/spark/pull/32816#discussion_r662725098
########## File path: sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala ########## @@ -252,17 +275,26 @@ case class AdaptiveSparkPlanExec( // plans are updated, we can clear the query stage list because at this point the two plans // are semantically and physically in sync again. val logicalPlan = replaceWithQueryStagesInLogicalPlan(currentLogicalPlan, stagesToReplace) - val (newPhysicalPlan, newLogicalPlan) = reOptimize(logicalPlan) + val (reOptimizePhysicalPlan, newLogicalPlan) = reOptimize(logicalPlan) + val planWithExtraShuffle = rePlanWithExtraShuffle(reOptimizePhysicalPlan) val origCost = costEvaluator.evaluateCost(currentPhysicalPlan) - val newCost = costEvaluator.evaluateCost(newPhysicalPlan) - if (newCost < origCost || - (newCost == origCost && currentPhysicalPlan != newPhysicalPlan)) { + val newCost = costEvaluator.evaluateCost(reOptimizePhysicalPlan) + val extraShuffleCost = costEvaluator.evaluateCost(planWithExtraShuffle) + def updateCurrentPlan(newPhysicalPlan: SparkPlan): Unit = { logOnLevel(s"Plan changed from $currentPhysicalPlan to $newPhysicalPlan") cleanUpTempTags(newPhysicalPlan) currentPhysicalPlan = newPhysicalPlan currentLogicalPlan = newLogicalPlan stagesToReplace = Seq.empty[QueryStageExec] } + + if (extraShuffleCost < newCost || + (extraShuffleCost == newCost && planWithExtraShuffle != reOptimizePhysicalPlan)) { + updateCurrentPlan(planWithExtraShuffle) + } else if (newCost < origCost || + (newCost == origCost && currentPhysicalPlan != reOptimizePhysicalPlan)) { + updateCurrentPlan(reOptimizePhysicalPlan) + } Review comment: @cloud-fan here use 3 costs to find the better plan 1. plan with skew join if force optimize skew join 2. plan with reOptimize if not force optimize skew join and has no extra shuffle 3. origin plan if reOptimize has extra shuffle -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org