This is an automated email from the ASF dual-hosted git repository.

viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new b3ef76c  [SPARK-36721][SQL] Simplify boolean equalities if one side is 
literal
b3ef76c is described below

commit b3ef76cfb4c642ab2a57c27f905bf0953a068eb2
Author: Kazuyuki Tanimura <ktanim...@apple.com>
AuthorDate: Fri Sep 24 10:53:24 2021 -0700

    [SPARK-36721][SQL] Simplify boolean equalities if one side is literal
    
    ### What changes were proposed in this pull request?
    This PR proposes to improve simplifications of `EqualTo/EqualNullSafe` 
binary comparators when one side is a boolean literal.
    For example: `EqualTo(predicate, TrueLiteral) => predicate`, 
`EqualNullSafe(predicate, TrueLiteral) => predicate if !predicate.nullable`
    This PR helps pushing down the filter and reducing unnecessary IO.
    
    ### Why are the changes needed?
    The following query does not push down the filter in the current 
implementation
    ```
    SELECT * FROM t WHERE (a AND b) = true
    ```
    although the following equivalent query pushes down the filter as expected.
    ```
    SELECT * FROM t WHERE (a AND b)
    ```
    That is because the first query creates `EqualTo(And(a, b), TrueLiteral)` 
that is simply not in the form that we can push down. However, we should be 
able to get it simplified to `And(a, b)`
    It is fair for Spark SQL users to expect `(a AND b) = true` performs the 
same as `(a AND b)`.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Added unit tests
    ```
    build/sbt "testOnly *BooleanSimplificationSuite  -- -z SPARK-36721"
    ```
    
    Closes #34055 from kazuyukitanimura/SPARK-36721.
    
    Authored-by: Kazuyuki Tanimura <ktanim...@apple.com>
    Signed-off-by: Liang-Chi Hsieh <vii...@gmail.com>
---
 .../spark/sql/catalyst/optimizer/expressions.scala | 11 +++++++++++
 .../BinaryComparisonSimplificationSuite.scala      | 22 ++++++++++++++++++++++
 2 files changed, 33 insertions(+)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index 74eb93c..0ec8bad 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -451,6 +451,7 @@ object BooleanSimplification extends Rule[LogicalPlan] with 
PredicateHelper {
  * 1) Replace '<=>' with 'true' literal.
  * 2) Replace '=', '<=', and '>=' with 'true' literal if both operands are 
non-nullable.
  * 3) Replace '<' and '>' with 'false' literal if both operands are 
non-nullable.
+ * 4) Unwrap '=', '<=>' if one side is a boolean literal
  */
 object SimplifyBinaryComparison
   extends Rule[LogicalPlan] with PredicateHelper with ConstraintHelper {
@@ -488,6 +489,16 @@ object SimplifyBinaryComparison
         // False with inequality
         case a GreaterThan b if canSimplifyComparison(a, b, 
notNullExpressions) => FalseLiteral
         case a LessThan b if canSimplifyComparison(a, b, notNullExpressions) 
=> FalseLiteral
+
+        // Optimize equalities when one side is Literal in order to help 
pushing down the filters
+        case a EqualTo TrueLiteral => a
+        case TrueLiteral EqualTo b => b
+        case a EqualTo FalseLiteral => Not(a)
+        case FalseLiteral EqualTo b => Not(b)
+        case a EqualNullSafe TrueLiteral if !a.nullable => a
+        case TrueLiteral EqualNullSafe b if !b.nullable => b
+        case a EqualNullSafe FalseLiteral if !a.nullable => Not(a)
+        case FalseLiteral EqualNullSafe b if !b.nullable => Not(b)
       }
   }
 }
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
index 957f029..d4b97f5 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
@@ -44,8 +44,13 @@ class BinaryComparisonSimplificationSuite extends PlanTest 
with PredicateHelper
         PruneFilters) :: Nil
   }
 
+  private def checkCondition(rel: LocalRelation, input: Expression, expected: 
Expression): Unit =
+    comparePlans(Optimize.execute(rel.where(input).analyze), 
rel.where(expected).analyze)
+
   val nullableRelation = LocalRelation('a.int.withNullability(true))
   val nonNullableRelation = LocalRelation('a.int.withNullability(false))
+  val boolRelation = LocalRelation('a.boolean, 'b.boolean)
+
 
   test("Preserve nullable exprs when constraintPropagation is false") {
     withSQLConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED.key -> "false") {
@@ -194,4 +199,21 @@ class BinaryComparisonSimplificationSuite extends PlanTest 
with PredicateHelper
       Optimize.execute(testRelation.select(Coalesce(Seq('b, 
'd)).as("out")).analyze),
       testRelation.select(Coalesce(Seq('b, 'd)).as("out")).analyze)
   }
+
+  test("SPARK-36721: Simplify boolean equalities if one side is literal") {
+    checkCondition(boolRelation, And('a, 'b) === TrueLiteral, And('a, 'b))
+    checkCondition(boolRelation, TrueLiteral === And('a, 'b), And('a, 'b))
+    checkCondition(boolRelation, And('a, 'b) === FalseLiteral, Or(Not('a), 
Not('b)))
+    checkCondition(boolRelation, FalseLiteral === And('a, 'b), Or(Not('a), 
Not('b)))
+    checkCondition(boolRelation, IsNull('a) <=> TrueLiteral, IsNull('a))
+    checkCondition(boolRelation, TrueLiteral <=> IsNull('a), IsNull('a))
+    checkCondition(boolRelation, IsNull('a) <=> FalseLiteral, IsNotNull('a))
+    checkCondition(boolRelation, FalseLiteral <=> IsNull('a), IsNotNull('a))
+
+    // Should not optimize for nullable <=> Literal
+    checkCondition(boolRelation, And('a, 'b) <=> TrueLiteral, And('a, 'b) <=> 
TrueLiteral)
+    checkCondition(boolRelation, TrueLiteral <=> And('a, 'b), TrueLiteral <=> 
And('a, 'b))
+    checkCondition(boolRelation, And('a, 'b) <=> FalseLiteral, And('a, 'b) <=> 
FalseLiteral)
+    checkCondition(boolRelation, FalseLiteral <=> And('a, 'b), FalseLiteral 
<=> And('a, 'b))
+  }
 }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to