Github user xuanyuanking commented on a diff in the pull request: https://github.com/apache/spark/pull/22326#discussion_r216127710 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala --- @@ -1202,15 +1243,38 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper { split(joinCondition.map(splitConjunctivePredicates).getOrElse(Nil), left, right) joinType match { - case _: InnerLike | LeftSemi => - // push down the single side only join filter for both sides sub queries - val newLeft = leftJoinConditions. - reduceLeftOption(And).map(Filter(_, left)).getOrElse(left) - val newRight = rightJoinConditions. - reduceLeftOption(And).map(Filter(_, right)).getOrElse(right) - val newJoinCond = commonJoinCondition.reduceLeftOption(And) + case LeftSemi => + val (newLeft, newRight, newJoinCond, others) = getNewChildAndSplitCondForJoin( + j, leftJoinConditions, rightJoinConditions, commonJoinCondition) + // need to add cross join when unevaluable condition exists + val newJoinType = if (others.nonEmpty) { + tryToGetCrossType(commonJoinCondition, j) + } else { + joinType + } - Join(newLeft, newRight, joinType, newJoinCond) + val join = Join(newLeft, newRight, newJoinType, newJoinCond) + if (others.nonEmpty) { + Project(newLeft.output.map(_.toAttribute), Filter(others.reduceLeft(And), join)) --- End diff -- Could I try to answer this? The projection only used in a left semi join after cross join in this scenario for ensuring it only contains left side attributes.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org