Repository: spark Updated Branches: refs/heads/master 183d4cb71 -> 2dd37d827
[SPARK-21826][SQL] outer broadcast hash join should not throw NPE ## What changes were proposed in this pull request? This is a bug introduced by https://github.com/apache/spark/pull/11274/files#diff-7adb688cbfa583b5711801f196a074bbL274 . Non-equal join condition should only be applied when the equal-join condition matches. ## How was this patch tested? regression test Author: Wenchen Fan <wenc...@databricks.com> Closes #19036 from cloud-fan/bug. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2dd37d82 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2dd37d82 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2dd37d82 Branch: refs/heads/master Commit: 2dd37d827f2e443dcb3eaf8a95437d179130d55c Parents: 183d4cb Author: Wenchen Fan <wenc...@databricks.com> Authored: Thu Aug 24 16:44:12 2017 +0200 Committer: Herman van Hovell <hvanhov...@databricks.com> Committed: Thu Aug 24 16:44:12 2017 +0200 ---------------------------------------------------------------------- .../execution/joins/BroadcastHashJoinExec.scala | 2 +- .../scala/org/apache/spark/sql/JoinSuite.scala | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/2dd37d82/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala index bfa1e9d..2f52a08 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala @@ -283,8 +283,8 @@ case class BroadcastHashJoinExec( s""" |boolean $conditionPassed = true; |${eval.trim} - |${ev.code} |if ($matched != null) { + | ${ev.code} | $conditionPassed = !${ev.isNull} && ${ev.value}; |} """.stripMargin http://git-wip-us.apache.org/repos/asf/spark/blob/2dd37d82/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala index 86fe09b..453052a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql +import scala.collection.JavaConverters._ import scala.collection.mutable.ListBuffer import scala.language.existentials @@ -26,6 +27,7 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation import org.apache.spark.sql.execution.joins._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSQLContext +import org.apache.spark.sql.types.StructType class JoinSuite extends QueryTest with SharedSQLContext { import testImplicits._ @@ -767,4 +769,22 @@ class JoinSuite extends QueryTest with SharedSQLContext { } } } + + test("outer broadcast hash join should not throw NPE") { + withTempView("v1", "v2") { + withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") { + Seq(2 -> 2).toDF("x", "y").createTempView("v1") + + spark.createDataFrame( + Seq(Row(1, "a")).asJava, + new StructType().add("i", "int", nullable = false).add("j", "string", nullable = false) + ).createTempView("v2") + + checkAnswer( + sql("select x, y, i, j from v1 left join v2 on x = i and y < length(j)"), + Row(2, 2, null, null) + ) + } + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org