This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new e7d9a245 [SPARK-32817][SQL] DPP throws error when broadcast side is empty e7d9a245 is described below commit e7d9a245656655e7bb1df3e04df30eb3cc9e23ad Author: Zhenhua Wang <wzh_...@163.com> AuthorDate: Tue Sep 8 21:36:21 2020 +0900 [SPARK-32817][SQL] DPP throws error when broadcast side is empty ### What changes were proposed in this pull request? In `SubqueryBroadcastExec.relationFuture`, if the `broadcastRelation` is an `EmptyHashedRelation`, then `broadcastRelation.keys()` will throw `UnsupportedOperationException`. ### Why are the changes needed? To fix a bug. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added a new test. Closes #29671 from wzhfy/dpp_empty_broadcast. Authored-by: Zhenhua Wang <wzh_...@163.com> Signed-off-by: Takeshi Yamamuro <yamam...@apache.org> --- .../spark/sql/execution/joins/HashedRelation.scala | 2 +- .../apache/spark/sql/DynamicPartitionPruningSuite.scala | 17 +++++++++++++++++ .../spark/sql/execution/joins/HashedRelationSuite.scala | 6 +++++- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala index 89836f6..3c5ed40 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala @@ -1091,7 +1091,7 @@ case object EmptyHashedRelation extends HashedRelation { override def keyIsUnique: Boolean = true override def keys(): Iterator[InternalRow] = { - throw new UnsupportedOperationException + Iterator.empty } override def close(): Unit = {} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala index ba61be5..55437aa 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala @@ -1344,6 +1344,23 @@ abstract class DynamicPartitionPruningSuiteBase } } } + + test("SPARK-32817: DPP throws error when the broadcast side is empty") { + withSQLConf( + SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true", + SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "true") { + val df = sql( + """ + |SELECT * FROM fact_sk f + |JOIN dim_store s + |ON f.store_id = s.store_id WHERE s.country = 'XYZ' + """.stripMargin) + + checkPartitionPruningPredicate(df, false, true) + + checkAnswer(df, Nil) + } + } } class DynamicPartitionPruningSuiteAEOff extends DynamicPartitionPruningSuiteBase { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala index caa7bdf..84f6299 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala @@ -621,7 +621,7 @@ class HashedRelationSuite extends SharedSparkSession { } } - test("EmptyHashedRelation return null in get / getValue") { + test("EmptyHashedRelation override methods behavior test") { val buildKey = Seq(BoundReference(0, LongType, false)) val hashed = HashedRelation(Seq.empty[InternalRow].toIterator, buildKey, 1, mm) assert(hashed == EmptyHashedRelation) @@ -631,6 +631,10 @@ class HashedRelationSuite extends SharedSparkSession { assert(hashed.get(key) == null) assert(hashed.getValue(0L) == null) assert(hashed.getValue(key) == null) + + assert(hashed.keys().isEmpty) + assert(hashed.keyIsUnique) + assert(hashed.estimatedSize == 0) } test("SPARK-32399: test methods related to key index") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org