Github user rxin commented on a diff in the pull request:

    https://github.com/apache/spark/pull/10762#discussion_r51832603
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 ---
    @@ -1159,6 +1161,47 @@ class Analyzer(
           }
         }
       }
    +
    +  /**
    +   * Removes natural joins by calculating output columns based on output 
from two sides,
    +   * Then apply a Project on a normal Join to eliminate natural join.
    +   */
    +  object ResolveNaturalJoin extends Rule[LogicalPlan] {
    +    override def apply(plan: LogicalPlan): LogicalPlan = plan 
resolveOperators {
    +      // Should not skip unresolved nodes because natural join is always 
unresolved.
    +      case j @ Join(left, right, NaturalJoin(joinType), condition) if 
j.resolvedExceptNatural =>
    +        // find common column names from both sides, should be treated 
like usingColumns
    +        val joinNames = 
left.output.map(_.name).intersect(right.output.map(_.name))
    +        val leftKeys = joinNames.map(keyName => left.output.find(_.name == 
keyName).get)
    +        val rightKeys = joinNames.map(keyName => right.output.find(_.name 
== keyName).get)
    +        val joinPairs = leftKeys.zip(rightKeys)
    +        // Add joinPairs to joinConditions
    +        val newCondition = (condition ++ joinPairs.map {
    +          case (l, r) => EqualTo(l, r)
    +        }).reduceLeftOption(And)
    +        // columns not in joinPairs
    +        val lUniqueOutput = left.output.filterNot(att => 
leftKeys.contains(att))
    +        val rUniqueOutput = right.output.filterNot(att => 
rightKeys.contains(att))
    +        // we should only keep unique columns(depends on joinType) for 
joinCols
    +        val projectList = joinType match {
    +          case LeftOuter =>
    +            leftKeys ++ lUniqueOutput ++ 
rUniqueOutput.map(_.withNullability(true))
    --- End diff --
    
    nvm i figured it out.



---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to