beliefer commented on code in PR #46183: URL: https://github.com/apache/spark/pull/46183#discussion_r1580583432
########## sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala: ########## @@ -38,28 +38,29 @@ case class RelationWrapper(cls: Class[_], outputAttrIds: Seq[Long]) object DeduplicateRelations extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = { val newPlan = renewDuplicatedRelations(mutable.HashSet.empty, plan)._1 - if (newPlan.find(p => p.resolved && p.missingInput.nonEmpty).isDefined) { - // Wait for `ResolveMissingReferences` to resolve missing attributes first - return newPlan - } + + def noMissingInput(p: LogicalPlan) = !p.exists(_.missingInput.nonEmpty) + newPlan.resolveOperatorsUpWithPruning( _.containsAnyPattern(JOIN, LATERAL_JOIN, AS_OF_JOIN, INTERSECT, EXCEPT, UNION, COMMAND), ruleId) { case p: LogicalPlan if !p.childrenResolved => p // To resolve duplicate expression IDs for Join. - case j @ Join(left, right, _, _, _) if !j.duplicateResolved => + case j @ Join(left, right, _, _, _) if !j.duplicateResolved && noMissingInput(right) => j.copy(right = dedupRight(left, right)) // Resolve duplicate output for LateralJoin. - case j @ LateralJoin(left, right, _, _) if right.resolved && !j.duplicateResolved => + case j @ LateralJoin(left, right, _, _) + if right.resolved && !j.duplicateResolved && noMissingInput(right.plan) => j.copy(right = right.withNewPlan(dedupRight(left, right.plan))) // Resolve duplicate output for AsOfJoin. - case j @ AsOfJoin(left, right, _, _, _, _, _) if !j.duplicateResolved => + case j @ AsOfJoin(left, right, _, _, _, _, _) + if !j.duplicateResolved && noMissingInput(right) => j.copy(right = dedupRight(left, right)) // intersect/except will be rewritten to join at the beginning of optimizer. Here we need to // deduplicate the right side plan, so that we won't produce an invalid self-join later. - case i @ Intersect(left, right, _) if !i.duplicateResolved => + case i @ Intersect(left, right, _) if !i.duplicateResolved && noMissingInput(right) => i.copy(right = dedupRight(left, right)) - case e @ Except(left, right, _) if !e.duplicateResolved => + case e @ Except(left, right, _) if !e.duplicateResolved && noMissingInput(right) => e.copy(right = dedupRight(left, right)) // Only after we finish by-name resolution for Union case u: Union if !u.byName && !u.duplicateResolved => Review Comment: Got it. Thank you for your explanation. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org