This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new db171551d39 [SPARK-42677][SQL][TESTS] Fix the invalid tests for broadcast hint db171551d39 is described below commit db171551d39dc8876cf170bb6e86b2340c768465 Author: Jiaan Geng <belie...@163.com> AuthorDate: Mon Mar 6 16:16:49 2023 +0800 [SPARK-42677][SQL][TESTS] Fix the invalid tests for broadcast hint ### What changes were proposed in this pull request? Currently, there are a lot of test cases for broadcast hint is invalid. Because the data size is smaller than broadcast threshold. ### Why are the changes needed? Fix the invalid tests for broadcast hint. ### Does this PR introduce _any_ user-facing change? 'No'. Just modify the test cases. ### How was this patch tested? Correct test cases. Closes #40293 from beliefer/SPARK-42677. Authored-by: Jiaan Geng <belie...@163.com> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- .../org/apache/spark/sql/DataFrameJoinSuite.scala | 38 +++++++++++++--------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala index e4f6b4cb40c..56e9520fdab 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala @@ -195,22 +195,28 @@ class DataFrameJoinSuite extends QueryTest val df1 = Seq((1, "1"), (2, "2")).toDF("key", "value") val df2 = Seq((1, "1"), (2, "2")).toDF("key", "value") - // equijoin - should be converted into broadcast join - val plan1 = df1.join(broadcast(df2), "key").queryExecution.sparkPlan - assert(plan1.collect { case p: BroadcastHashJoinExec => p }.size === 1) - - // no join key -- should not be a broadcast join - val plan2 = df1.crossJoin(broadcast(df2)).queryExecution.sparkPlan - assert(plan2.collect { case p: BroadcastHashJoinExec => p }.size === 0) - - // planner should not crash without a join - broadcast(df1).queryExecution.sparkPlan - - // SPARK-12275: no physical plan for BroadcastHint in some condition - withTempPath { path => - df1.write.parquet(path.getCanonicalPath) - val pf1 = spark.read.parquet(path.getCanonicalPath) - assert(df1.crossJoin(broadcast(pf1)).count() === 4) + withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { + // equijoin - should not be converted into broadcast join without hint + val plan1 = df1.join(df2, "key").queryExecution.sparkPlan + assert(plan1.collect { case p: BroadcastHashJoinExec => p }.size === 0) + + // equijoin - should be converted into broadcast join with hint + val plan2 = df1.join(broadcast(df2), "key").queryExecution.sparkPlan + assert(plan2.collect { case p: BroadcastHashJoinExec => p }.size === 1) + + // no join key -- should not be a broadcast join + val plan3 = df1.crossJoin(broadcast(df2)).queryExecution.sparkPlan + assert(plan3.collect { case p: BroadcastHashJoinExec => p }.size === 0) + + // planner should not crash without a join + broadcast(df1).queryExecution.sparkPlan + + // SPARK-12275: no physical plan for BroadcastHint in some condition + withTempPath { path => + df1.write.parquet(path.getCanonicalPath) + val pf1 = spark.read.parquet(path.getCanonicalPath) + assert(df1.crossJoin(broadcast(pf1)).count() === 4) + } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org