Github user gczsjdy commented on a diff in the pull request: https://github.com/apache/spark/pull/20099#discussion_r158961453 --- Diff: sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala --- @@ -158,45 +158,65 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] { def smallerSide = if (right.stats.sizeInBytes <= left.stats.sizeInBytes) BuildRight else BuildLeft - val buildRight = canBuildRight && right.stats.hints.broadcast - val buildLeft = canBuildLeft && left.stats.hints.broadcast - - if (buildRight && buildLeft) { + if (canBuildRight && canBuildLeft) { // Broadcast smaller side base on its estimated physical size // if both sides have broadcast hint smallerSide - } else if (buildRight) { + } else if (canBuildRight) { BuildRight - } else if (buildLeft) { + } else if (canBuildLeft) { BuildLeft - } else if (canBuildRight && canBuildLeft) { + } else { // for the last default broadcast nested loop join smallerSide - } else { - throw new AnalysisException("Can not decide which side to broadcast for this join") } } + private def needsBroadcastByHints(joinType: JoinType, left: LogicalPlan, right: LogicalPlan) + : Boolean = { + val buildLeft = canBuildLeft(joinType) && left.stats.hints.broadcast + val buildRight = canBuildRight(joinType) && right.stats.hints.broadcast + buildLeft || buildRight + } + + private def broadcastSideByHints(joinType: JoinType, left: LogicalPlan, right: LogicalPlan) + : BuildSide = { + val buildLeft = canBuildLeft(joinType) && left.stats.hints.broadcast + val buildRight = canBuildRight(joinType) && right.stats.hints.broadcast + broadcastSide(buildLeft, buildRight, left, right) + } + + private def needsBroadcastByConfig(joinType: JoinType, left: LogicalPlan, right: LogicalPlan) + : Boolean = { + val buildLeft = canBuildLeft(joinType) && canBroadcast(left) + val buildRight = canBuildRight(joinType) && canBroadcast(right) + buildLeft || buildRight + } + + private def broadcastSideByConfig(joinType: JoinType, left: LogicalPlan, right: LogicalPlan) --- End diff -- Is is better to use `xxxbySize`? `byConfig` might confuse people.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org