This is an automated email from the ASF dual-hosted git repository. yumwang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new cb0e4198996 [SPARK-39338][SQL] Remove dynamic pruning subquery if pruningKey's references is empty cb0e4198996 is described below commit cb0e41989964dbb3b287f6df8f0e70222e50c5eb Author: Yuming Wang <yumw...@ebay.com> AuthorDate: Wed Jun 1 11:21:13 2022 +0800 [SPARK-39338][SQL] Remove dynamic pruning subquery if pruningKey's references is empty ### What changes were proposed in this pull request? Remove dynamic pruning subquery if pruningKey's references is empty. For example: ```sql SELECT f.store_id, f.date_id, s.state_province FROM (SELECT store_id, date_id, product_id FROM fact_stats WHERE date_id <= 1000 UNION ALL SELECT 4 AS store_id, date_id, product_id FROM fact_sk WHERE date_id >= 1300) f JOIN dim_store s ON f.store_id = s.store_id WHERE s.country IN ('US', 'NL') ``` Before this PR | After this PR -- | -- ![image](https://user-images.githubusercontent.com/5399861/170940803-b4d4b93d-96d7-47de-ac22-d13259f3447a.png) | ![image](https://user-images.githubusercontent.com/5399861/170941010-b7eec26e-9f93-4ae6-aa10-aacdde15af0d.png) ### Why are the changes needed? Remove useless dynamic pruning subquery because it can't reduce partition. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #36724 from wangyum/SPARK-39338. Authored-by: Yuming Wang <yumw...@ebay.com> Signed-off-by: Yuming Wang <yumw...@ebay.com> --- .../CleanupDynamicPruningFilters.scala | 6 +++-- .../spark/sql/DynamicPartitionPruningSuite.scala | 29 ++++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala index 65621fb1860..9607ca53964 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala @@ -50,11 +50,13 @@ object CleanupDynamicPruningFilters extends Rule[LogicalPlan] with PredicateHelp private def removeUnnecessaryDynamicPruningSubquery(plan: LogicalPlan): LogicalPlan = { plan.transformWithPruning(_.containsPattern(DYNAMIC_PRUNING_SUBQUERY)) { case f @ Filter(condition, _) => - val unnecessaryPruningKeys = ExpressionSet(collectEqualityConditionExpressions(condition)) + lazy val unnecessaryPruningKeys = + ExpressionSet(collectEqualityConditionExpressions(condition)) val newCondition = condition.transformWithPruning( _.containsPattern(DYNAMIC_PRUNING_SUBQUERY)) { case dynamicPruning: DynamicPruningSubquery - if unnecessaryPruningKeys.contains(dynamicPruning.pruningKey) => + if dynamicPruning.pruningKey.references.isEmpty || + unnecessaryPruningKeys.contains(dynamicPruning.pruningKey) => TrueLiteral } f.copy(condition = newCondition) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala index 29c178d6b83..b1b9ed04568 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala @@ -1587,6 +1587,35 @@ abstract class DynamicPartitionPruningSuiteBase } } } + + test("SPARK-39338: Remove dynamic pruning subquery if pruningKey's references is empty") { + withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") { + val df = sql( + """ + |SELECT f.store_id, + | f.date_id, + | s.state_province + |FROM (SELECT store_id, + | date_id, + | product_id + | FROM fact_stats + | WHERE date_id <= 1000 + | UNION ALL + | SELECT 4 AS store_id, + | date_id, + | product_id + | FROM fact_sk + | WHERE date_id >= 1300) f + |JOIN dim_store s + |ON f.store_id = s.store_id + |WHERE s.country IN ('US', 'NL') + |""".stripMargin) + + checkPartitionPruningPredicate(df, withSubquery = false, withBroadcast = true) + checkAnswer(df, Row(4, 1300, "California") :: Row(1, 1000, "North-Holland") :: Nil) + assert(collectDynamicPruningExpressions(df.queryExecution.executedPlan).size === 1) + } + } } abstract class DynamicPartitionPruningDataSourceSuiteBase --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org