Repository: spark Updated Branches: refs/heads/master ee56fc343 -> ef10f452e
[SPARK-21652][SQL][FOLLOW-UP] Fix rule conflict caused by InferFiltersFromConstraints ## What changes were proposed in this pull request? The optimizer rule `InferFiltersFromConstraints` could trigger our batch `Operator Optimizations` exceeds the max iteration limit (i.e., 100) so that the final plan might not be properly optimized. The rule `InferFiltersFromConstraints` could conflict with the other Filter/Join predicate reduction rules. Thus, we need to separate `InferFiltersFromConstraints` from the other rules. This PR is to separate `InferFiltersFromConstraints ` from the main batch `Operator Optimizations` . ## How was this patch tested? The existing test cases. Author: gatorsmile <gatorsm...@gmail.com> Closes #19149 from gatorsmile/inferFilterRule. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ef10f452 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ef10f452 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ef10f452 Branch: refs/heads/master Commit: ef10f452e62c77d0434e80f7266f6685eb1bcb2c Parents: ee56fc3 Author: gatorsmile <gatorsm...@gmail.com> Authored: Tue Dec 19 09:05:47 2017 -0800 Committer: gatorsmile <gatorsm...@gmail.com> Committed: Tue Dec 19 09:05:47 2017 -0800 ---------------------------------------------------------------------- .../sql/catalyst/optimizer/Optimizer.scala | 115 +++++++++++-------- 1 file changed, 64 insertions(+), 51 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/ef10f452/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 5acadf8..6a4d1e9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -47,7 +47,62 @@ abstract class Optimizer(sessionCatalog: SessionCatalog) protected def fixedPoint = FixedPoint(SQLConf.get.optimizerMaxIterations) def batches: Seq[Batch] = { - Batch("Eliminate Distinct", Once, EliminateDistinct) :: + val operatorOptimizationRuleSet = + Seq( + // Operator push down + PushProjectionThroughUnion, + ReorderJoin, + EliminateOuterJoin, + PushPredicateThroughJoin, + PushDownPredicate, + LimitPushDown, + ColumnPruning, + InferFiltersFromConstraints, + // Operator combine + CollapseRepartition, + CollapseProject, + CollapseWindow, + CombineFilters, + CombineLimits, + CombineUnions, + // Constant folding and strength reduction + NullPropagation, + ConstantPropagation, + FoldablePropagation, + OptimizeIn, + ConstantFolding, + ReorderAssociativeOperator, + LikeSimplification, + BooleanSimplification, + SimplifyConditionals, + RemoveDispensableExpressions, + SimplifyBinaryComparison, + PruneFilters, + EliminateSorts, + SimplifyCasts, + SimplifyCaseConversionExpressions, + RewriteCorrelatedScalarSubquery, + EliminateSerialization, + RemoveRedundantAliases, + RemoveRedundantProject, + SimplifyCreateStructOps, + SimplifyCreateArrayOps, + SimplifyCreateMapOps, + CombineConcats) ++ + extendedOperatorOptimizationRules + + val operatorOptimizationBatch: Seq[Batch] = { + val rulesWithoutInferFiltersFromConstraints = + operatorOptimizationRuleSet.filterNot(_ == InferFiltersFromConstraints) + Batch("Operator Optimization before Inferring Filters", fixedPoint, + rulesWithoutInferFiltersFromConstraints: _*) :: + Batch("Infer Filters", Once, + InferFiltersFromConstraints) :: + Batch("Operator Optimization after Inferring Filters", fixedPoint, + rulesWithoutInferFiltersFromConstraints: _*) :: Nil + } + + (Batch("Eliminate Distinct", Once, EliminateDistinct) :: // Technically some of the rules in Finish Analysis are not optimizer rules and belong more // in the analyzer, because they are needed for correctness (e.g. ComputeCurrentTime). // However, because we also use the analyzer to canonicalized queries (for view definition), @@ -81,68 +136,26 @@ abstract class Optimizer(sessionCatalog: SessionCatalog) ReplaceDistinctWithAggregate) :: Batch("Aggregate", fixedPoint, RemoveLiteralFromGroupExpressions, - RemoveRepetitionFromGroupExpressions) :: - Batch("Operator Optimizations", fixedPoint, Seq( - // Operator push down - PushProjectionThroughUnion, - ReorderJoin, - EliminateOuterJoin, - InferFiltersFromConstraints, - BooleanSimplification, - PushPredicateThroughJoin, - PushDownPredicate, - LimitPushDown, - ColumnPruning, - // Operator combine - CollapseRepartition, - CollapseProject, - CollapseWindow, - CombineFilters, - CombineLimits, - CombineUnions, - // Constant folding and strength reduction - NullPropagation, - ConstantPropagation, - FoldablePropagation, - OptimizeIn, - ConstantFolding, - ReorderAssociativeOperator, - LikeSimplification, - BooleanSimplification, - SimplifyConditionals, - RemoveDispensableExpressions, - SimplifyBinaryComparison, - PruneFilters, - EliminateSorts, - SimplifyCasts, - SimplifyCaseConversionExpressions, - RewriteCorrelatedScalarSubquery, - EliminateSerialization, - RemoveRedundantAliases, - RemoveRedundantProject, - SimplifyCreateStructOps, - SimplifyCreateArrayOps, - SimplifyCreateMapOps, - CombineConcats) ++ - extendedOperatorOptimizationRules: _*) :: + RemoveRepetitionFromGroupExpressions) :: Nil ++ + operatorOptimizationBatch) :+ Batch("Join Reorder", Once, - CostBasedJoinReorder) :: + CostBasedJoinReorder) :+ Batch("Decimal Optimizations", fixedPoint, - DecimalAggregates) :: + DecimalAggregates) :+ Batch("Object Expressions Optimization", fixedPoint, EliminateMapObjects, - CombineTypedFilters) :: + CombineTypedFilters) :+ Batch("LocalRelation", fixedPoint, ConvertToLocalRelation, - PropagateEmptyRelation) :: + PropagateEmptyRelation) :+ // The following batch should be executed after batch "Join Reorder" and "LocalRelation". Batch("Check Cartesian Products", Once, - CheckCartesianProducts) :: + CheckCartesianProducts) :+ Batch("RewriteSubquery", Once, RewritePredicateSubquery, ColumnPruning, CollapseProject, - RemoveRedundantProject) :: Nil + RemoveRedundantProject) } /** --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org