This is an automated email from the ASF dual-hosted git repository. viirya pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new b3ef76c [SPARK-36721][SQL] Simplify boolean equalities if one side is literal b3ef76c is described below commit b3ef76cfb4c642ab2a57c27f905bf0953a068eb2 Author: Kazuyuki Tanimura <ktanim...@apple.com> AuthorDate: Fri Sep 24 10:53:24 2021 -0700 [SPARK-36721][SQL] Simplify boolean equalities if one side is literal ### What changes were proposed in this pull request? This PR proposes to improve simplifications of `EqualTo/EqualNullSafe` binary comparators when one side is a boolean literal. For example: `EqualTo(predicate, TrueLiteral) => predicate`, `EqualNullSafe(predicate, TrueLiteral) => predicate if !predicate.nullable` This PR helps pushing down the filter and reducing unnecessary IO. ### Why are the changes needed? The following query does not push down the filter in the current implementation ``` SELECT * FROM t WHERE (a AND b) = true ``` although the following equivalent query pushes down the filter as expected. ``` SELECT * FROM t WHERE (a AND b) ``` That is because the first query creates `EqualTo(And(a, b), TrueLiteral)` that is simply not in the form that we can push down. However, we should be able to get it simplified to `And(a, b)` It is fair for Spark SQL users to expect `(a AND b) = true` performs the same as `(a AND b)`. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Added unit tests ``` build/sbt "testOnly *BooleanSimplificationSuite -- -z SPARK-36721" ``` Closes #34055 from kazuyukitanimura/SPARK-36721. Authored-by: Kazuyuki Tanimura <ktanim...@apple.com> Signed-off-by: Liang-Chi Hsieh <vii...@gmail.com> --- .../spark/sql/catalyst/optimizer/expressions.scala | 11 +++++++++++ .../BinaryComparisonSimplificationSuite.scala | 22 ++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala index 74eb93c..0ec8bad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala @@ -451,6 +451,7 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper { * 1) Replace '<=>' with 'true' literal. * 2) Replace '=', '<=', and '>=' with 'true' literal if both operands are non-nullable. * 3) Replace '<' and '>' with 'false' literal if both operands are non-nullable. + * 4) Unwrap '=', '<=>' if one side is a boolean literal */ object SimplifyBinaryComparison extends Rule[LogicalPlan] with PredicateHelper with ConstraintHelper { @@ -488,6 +489,16 @@ object SimplifyBinaryComparison // False with inequality case a GreaterThan b if canSimplifyComparison(a, b, notNullExpressions) => FalseLiteral case a LessThan b if canSimplifyComparison(a, b, notNullExpressions) => FalseLiteral + + // Optimize equalities when one side is Literal in order to help pushing down the filters + case a EqualTo TrueLiteral => a + case TrueLiteral EqualTo b => b + case a EqualTo FalseLiteral => Not(a) + case FalseLiteral EqualTo b => Not(b) + case a EqualNullSafe TrueLiteral if !a.nullable => a + case TrueLiteral EqualNullSafe b if !b.nullable => b + case a EqualNullSafe FalseLiteral if !a.nullable => Not(a) + case FalseLiteral EqualNullSafe b if !b.nullable => Not(b) } } } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala index 957f029..d4b97f5 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala @@ -44,8 +44,13 @@ class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper PruneFilters) :: Nil } + private def checkCondition(rel: LocalRelation, input: Expression, expected: Expression): Unit = + comparePlans(Optimize.execute(rel.where(input).analyze), rel.where(expected).analyze) + val nullableRelation = LocalRelation('a.int.withNullability(true)) val nonNullableRelation = LocalRelation('a.int.withNullability(false)) + val boolRelation = LocalRelation('a.boolean, 'b.boolean) + test("Preserve nullable exprs when constraintPropagation is false") { withSQLConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED.key -> "false") { @@ -194,4 +199,21 @@ class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper Optimize.execute(testRelation.select(Coalesce(Seq('b, 'd)).as("out")).analyze), testRelation.select(Coalesce(Seq('b, 'd)).as("out")).analyze) } + + test("SPARK-36721: Simplify boolean equalities if one side is literal") { + checkCondition(boolRelation, And('a, 'b) === TrueLiteral, And('a, 'b)) + checkCondition(boolRelation, TrueLiteral === And('a, 'b), And('a, 'b)) + checkCondition(boolRelation, And('a, 'b) === FalseLiteral, Or(Not('a), Not('b))) + checkCondition(boolRelation, FalseLiteral === And('a, 'b), Or(Not('a), Not('b))) + checkCondition(boolRelation, IsNull('a) <=> TrueLiteral, IsNull('a)) + checkCondition(boolRelation, TrueLiteral <=> IsNull('a), IsNull('a)) + checkCondition(boolRelation, IsNull('a) <=> FalseLiteral, IsNotNull('a)) + checkCondition(boolRelation, FalseLiteral <=> IsNull('a), IsNotNull('a)) + + // Should not optimize for nullable <=> Literal + checkCondition(boolRelation, And('a, 'b) <=> TrueLiteral, And('a, 'b) <=> TrueLiteral) + checkCondition(boolRelation, TrueLiteral <=> And('a, 'b), TrueLiteral <=> And('a, 'b)) + checkCondition(boolRelation, And('a, 'b) <=> FalseLiteral, And('a, 'b) <=> FalseLiteral) + checkCondition(boolRelation, FalseLiteral <=> And('a, 'b), FalseLiteral <=> And('a, 'b)) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org