sunchao commented on code in PR #55927:
URL: https://github.com/apache/spark/pull/55927#discussion_r3268819175


##########
sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala:
##########
@@ -345,4 +346,249 @@ class OuterJoinSuite extends SharedSparkSession with 
SQLTestData {
     val df2 = join("SHUFFLE_MERGE(t1)")
     checkAnswer(df1, identity, df2.collect().toSeq)
   }
+
+  test("ordinary outer equi-join spreads NULL keys in shuffle partitioning") {
+    val nullableLeft = Seq(
+      (Integer.valueOf(1), "left-1"),
+      (null.asInstanceOf[Integer], "left-null-1"),
+      (null.asInstanceOf[Integer], "left-null-2")).toDF("k", "lv")
+    val nullableRight = Seq(
+      (Integer.valueOf(1), "right-1"),
+      (null.asInstanceOf[Integer], "right-null")).toDF("k", "rv")
+    val joinCondition = (nullableLeft("k") === nullableRight("k")).expr
+    val join = Join(nullableLeft.logicalPlan, nullableRight.logicalPlan,
+      LeftOuter, Some(joinCondition), JoinHint.NONE)
+
+    ExtractEquiJoinKeys.unapply(join).foreach {

Review Comment:
   Updated



##########
sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledJoin.scala:
##########
@@ -28,6 +29,15 @@ import 
org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, Dist
 trait ShuffledJoin extends JoinCodegenSupport {
   def isSkewJoin: Boolean
 
+  private lazy val canSpreadNullJoinKeys: Boolean = {
+    // Null-safe equality usually rewrites to non-null shuffle keys. The 
NullType corner can still
+    // produce NULL shuffle keys, but shuffled join execution already treats 
those rows as
+    // unmatched, so spreading them does not change the result.
+    val isOuterJoin = joinType == LeftOuter || joinType == RightOuter || 
joinType == FullOuter
+    conf.getConf(SQLConf.SHUFFLE_SPREAD_NULL_JOIN_KEYS_ENABLED) &&
+      isOuterJoin
+  }

Review Comment:
   Updated



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to