maropu commented on a change in pull request #32476:
URL: https://github.com/apache/spark/pull/32476#discussion_r628894682



##########
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
##########
@@ -418,115 +443,140 @@ case class SortMergeJoinExec(
     // Inline mutable state since not many join operations in a task
     val matches = ctx.addMutableState(clsName, "matches",
       v => s"$v = new $clsName($inMemoryThreshold, $spillThreshold);", 
forceInline = true)
-    // Copy the left keys as class members so they could be used in next 
function call.
-    val matchedKeyVars = copyKeys(ctx, leftKeyVars)
+    // Copy the streamed keys as class members so they could be used in next 
function call.
+    val matchedKeyVars = copyKeys(ctx, streamedKeyVars)
+
+    // Handle the case when streamed rows has any NULL keys.
+    val handleStreamedAnyNull = joinType match {
+      case _: InnerLike =>
+        // Skip streamed row.
+        s"""
+           |$streamedRow = null;
+           |continue;
+         """.stripMargin
+      case LeftOuter | RightOuter =>
+        // Eagerly return streamed row.
+        s"""
+           |if (!$matches.isEmpty()) {
+           |  $matches.clear();
+           |}
+           |return false;

Review comment:
       ```
           // Eagerly return streamed row.
           s"""
              |$matches.clear();
              |return false;
            """.stripMargin
   ```
   ?

##########
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
##########
@@ -353,12 +353,37 @@ case class SortMergeJoinExec(
     }
   }
 
-  override def supportCodegen: Boolean = {
-    joinType.isInstanceOf[InnerLike]
+  private lazy val (streamedPlan, bufferedPlan) = joinType match {
+    case _: InnerLike | LeftOuter => (left, right)
+    case RightOuter => (right, left)
+    case x =>
+      throw new IllegalArgumentException(
+        s"SortMergeJoin.streamedPlan/bufferedPlan should not take $x as the 
JoinType")

Review comment:
       How about this?
   ```
     private lazy val ((streamedPlan, streamedKyes), (bufferedPlan, 
bufferedKeys)) = joinType match {
       case _: InnerLike | LeftOuter => ((left, leftKeys), (right, rightKeys))
       case RightOuter => ((right, rightKeys), (left, leftKeys))
       case x =>
         throw new IllegalArgumentException(
           s"SortMergeJoin.streamedPlan/bufferedPlan should not take $x as the 
JoinType")
     }
   
     private lazy val streamOutput = streamedPlan.output
     private lazy val bufferedOutput = bufferedPlan.output
   ```
   I think we don't need to repeat the `joinType` check.

##########
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
##########
@@ -353,12 +353,37 @@ case class SortMergeJoinExec(
     }
   }
 
-  override def supportCodegen: Boolean = {
-    joinType.isInstanceOf[InnerLike]
+  private lazy val (streamedPlan, bufferedPlan) = joinType match {

Review comment:
       We need `lazy` here?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to