Repository: spark Updated Branches: refs/heads/master f96997ba2 -> 77ba3021c
[SPARK-13869][SQL] Remove redundant conditions while combining filters ## What changes were proposed in this pull request? **[I'll link it to the JIRA once ASF JIRA is back online]** This PR modifies the existing `CombineFilters` rule to remove redundant conditions while combining individual filter predicates. For instance, queries of the form `table.where('a === 1 && 'b === 1).where('a === 1 && 'c === 1)` will now be optimized to ` table.where('a === 1 && 'b === 1 && 'c === 1)` (instead of ` table.where('a === 1 && 'a === 1 && 'b === 1 && 'c === 1)`) ## How was this patch tested? Unit test in `FilterPushdownSuite` Author: Sameer Agarwal <sam...@databricks.com> Closes #11670 from sameeragarwal/combine-filters. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/77ba3021 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/77ba3021 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/77ba3021 Branch: refs/heads/master Commit: 77ba3021c12dc63cb7d831f964f901e0474acd96 Parents: f96997b Author: Sameer Agarwal <sam...@databricks.com> Authored: Wed Mar 16 16:27:46 2016 -0700 Committer: Yin Huai <yh...@databricks.com> Committed: Wed Mar 16 16:27:46 2016 -0700 ---------------------------------------------------------------------- .../spark/sql/catalyst/optimizer/Optimizer.scala | 15 +++++++++++---- .../sql/catalyst/optimizer/FilterPushdownSuite.scala | 15 +++++++++++++++ 2 files changed, 26 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/77ba3021/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 76f50a3..3f57b07 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -818,12 +818,19 @@ object CombineUnions extends Rule[LogicalPlan] { } /** - * Combines two adjacent [[Filter]] operators into one, merging the - * conditions into one conjunctive predicate. + * Combines two adjacent [[Filter]] operators into one, merging the non-redundant conditions into + * one conjunctive predicate. */ -object CombineFilters extends Rule[LogicalPlan] { +object CombineFilters extends Rule[LogicalPlan] with PredicateHelper { def apply(plan: LogicalPlan): LogicalPlan = plan transform { - case ff @ Filter(fc, nf @ Filter(nc, grandChild)) => Filter(And(nc, fc), grandChild) + case ff @ Filter(fc, nf @ Filter(nc, grandChild)) => + (ExpressionSet(splitConjunctivePredicates(fc)) -- + ExpressionSet(splitConjunctivePredicates(nc))).reduceOption(And) match { + case Some(ac) => + Filter(And(ac, nc), grandChild) + case None => + nf + } } } http://git-wip-us.apache.org/repos/asf/spark/blob/77ba3021/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala index a636d63..b84ae7c 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala @@ -81,6 +81,21 @@ class FilterPushdownSuite extends PlanTest { comparePlans(optimized, correctAnswer) } + test("combine redundant filters") { + val originalQuery = + testRelation + .where('a === 1 && 'b === 1) + .where('a === 1 && 'c === 1) + + val optimized = Optimize.execute(originalQuery.analyze) + val correctAnswer = + testRelation + .where('a === 1 && 'b === 1 && 'c === 1) + .analyze + + comparePlans(optimized, correctAnswer) + } + test("can't push without rewrite") { val originalQuery = testRelation --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org