Repository: spark
Updated Branches:
  refs/heads/master ee56fc343 -> ef10f452e


[SPARK-21652][SQL][FOLLOW-UP] Fix rule conflict caused by 
InferFiltersFromConstraints

## What changes were proposed in this pull request?
The optimizer rule `InferFiltersFromConstraints` could trigger our batch  
`Operator Optimizations` exceeds the max iteration limit (i.e., 100) so that 
the final plan might not be properly optimized. The rule 
`InferFiltersFromConstraints` could conflict with the other Filter/Join 
predicate reduction rules. Thus, we need to separate 
`InferFiltersFromConstraints` from the other rules.

This PR is to separate `InferFiltersFromConstraints ` from the main batch 
`Operator Optimizations` .

## How was this patch tested?
The existing test cases.

Author: gatorsmile <gatorsm...@gmail.com>

Closes #19149 from gatorsmile/inferFilterRule.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ef10f452
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ef10f452
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ef10f452

Branch: refs/heads/master
Commit: ef10f452e62c77d0434e80f7266f6685eb1bcb2c
Parents: ee56fc3
Author: gatorsmile <gatorsm...@gmail.com>
Authored: Tue Dec 19 09:05:47 2017 -0800
Committer: gatorsmile <gatorsm...@gmail.com>
Committed: Tue Dec 19 09:05:47 2017 -0800

----------------------------------------------------------------------
 .../sql/catalyst/optimizer/Optimizer.scala      | 115 +++++++++++--------
 1 file changed, 64 insertions(+), 51 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/ef10f452/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 5acadf8..6a4d1e9 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -47,7 +47,62 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
   protected def fixedPoint = FixedPoint(SQLConf.get.optimizerMaxIterations)
 
   def batches: Seq[Batch] = {
-    Batch("Eliminate Distinct", Once, EliminateDistinct) ::
+    val operatorOptimizationRuleSet =
+      Seq(
+        // Operator push down
+        PushProjectionThroughUnion,
+        ReorderJoin,
+        EliminateOuterJoin,
+        PushPredicateThroughJoin,
+        PushDownPredicate,
+        LimitPushDown,
+        ColumnPruning,
+        InferFiltersFromConstraints,
+        // Operator combine
+        CollapseRepartition,
+        CollapseProject,
+        CollapseWindow,
+        CombineFilters,
+        CombineLimits,
+        CombineUnions,
+        // Constant folding and strength reduction
+        NullPropagation,
+        ConstantPropagation,
+        FoldablePropagation,
+        OptimizeIn,
+        ConstantFolding,
+        ReorderAssociativeOperator,
+        LikeSimplification,
+        BooleanSimplification,
+        SimplifyConditionals,
+        RemoveDispensableExpressions,
+        SimplifyBinaryComparison,
+        PruneFilters,
+        EliminateSorts,
+        SimplifyCasts,
+        SimplifyCaseConversionExpressions,
+        RewriteCorrelatedScalarSubquery,
+        EliminateSerialization,
+        RemoveRedundantAliases,
+        RemoveRedundantProject,
+        SimplifyCreateStructOps,
+        SimplifyCreateArrayOps,
+        SimplifyCreateMapOps,
+        CombineConcats) ++
+        extendedOperatorOptimizationRules
+
+    val operatorOptimizationBatch: Seq[Batch] = {
+      val rulesWithoutInferFiltersFromConstraints =
+        operatorOptimizationRuleSet.filterNot(_ == InferFiltersFromConstraints)
+      Batch("Operator Optimization before Inferring Filters", fixedPoint,
+        rulesWithoutInferFiltersFromConstraints: _*) ::
+      Batch("Infer Filters", Once,
+        InferFiltersFromConstraints) ::
+      Batch("Operator Optimization after Inferring Filters", fixedPoint,
+        rulesWithoutInferFiltersFromConstraints: _*) :: Nil
+    }
+
+    (Batch("Eliminate Distinct", Once, EliminateDistinct) ::
     // Technically some of the rules in Finish Analysis are not optimizer 
rules and belong more
     // in the analyzer, because they are needed for correctness (e.g. 
ComputeCurrentTime).
     // However, because we also use the analyzer to canonicalized queries (for 
view definition),
@@ -81,68 +136,26 @@ abstract class Optimizer(sessionCatalog: SessionCatalog)
       ReplaceDistinctWithAggregate) ::
     Batch("Aggregate", fixedPoint,
       RemoveLiteralFromGroupExpressions,
-      RemoveRepetitionFromGroupExpressions) ::
-    Batch("Operator Optimizations", fixedPoint, Seq(
-      // Operator push down
-      PushProjectionThroughUnion,
-      ReorderJoin,
-      EliminateOuterJoin,
-      InferFiltersFromConstraints,
-      BooleanSimplification,
-      PushPredicateThroughJoin,
-      PushDownPredicate,
-      LimitPushDown,
-      ColumnPruning,
-      // Operator combine
-      CollapseRepartition,
-      CollapseProject,
-      CollapseWindow,
-      CombineFilters,
-      CombineLimits,
-      CombineUnions,
-      // Constant folding and strength reduction
-      NullPropagation,
-      ConstantPropagation,
-      FoldablePropagation,
-      OptimizeIn,
-      ConstantFolding,
-      ReorderAssociativeOperator,
-      LikeSimplification,
-      BooleanSimplification,
-      SimplifyConditionals,
-      RemoveDispensableExpressions,
-      SimplifyBinaryComparison,
-      PruneFilters,
-      EliminateSorts,
-      SimplifyCasts,
-      SimplifyCaseConversionExpressions,
-      RewriteCorrelatedScalarSubquery,
-      EliminateSerialization,
-      RemoveRedundantAliases,
-      RemoveRedundantProject,
-      SimplifyCreateStructOps,
-      SimplifyCreateArrayOps,
-      SimplifyCreateMapOps,
-      CombineConcats) ++
-      extendedOperatorOptimizationRules: _*) ::
+      RemoveRepetitionFromGroupExpressions) :: Nil ++
+    operatorOptimizationBatch) :+
     Batch("Join Reorder", Once,
-      CostBasedJoinReorder) ::
+      CostBasedJoinReorder) :+
     Batch("Decimal Optimizations", fixedPoint,
-      DecimalAggregates) ::
+      DecimalAggregates) :+
     Batch("Object Expressions Optimization", fixedPoint,
       EliminateMapObjects,
-      CombineTypedFilters) ::
+      CombineTypedFilters) :+
     Batch("LocalRelation", fixedPoint,
       ConvertToLocalRelation,
-      PropagateEmptyRelation) ::
+      PropagateEmptyRelation) :+
     // The following batch should be executed after batch "Join Reorder" and 
"LocalRelation".
     Batch("Check Cartesian Products", Once,
-      CheckCartesianProducts) ::
+      CheckCartesianProducts) :+
     Batch("RewriteSubquery", Once,
       RewritePredicateSubquery,
       ColumnPruning,
       CollapseProject,
-      RemoveRedundantProject) :: Nil
+      RemoveRedundantProject)
   }
 
   /**


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to