tanelk commented on pull request #31024:
URL: https://github.com/apache/spark/pull/31024#issuecomment-757121187


   > It would be nice to do this recursively.
   > Currently you split the outer `and` filters and sort them. each of these 
can have an `or` filter as a child and it would be beneficial to split and sort 
those too. Inside `or` the filter selectivity rule must be the other way around.
   
   I was thinking something like this.
   
   ```diff
   diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PredicateReorder.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PredicateReorder.scala
   index 805f9afeba..345c30896f 100644
   --- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PredicateReorder.scala
   +++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PredicateReorder.scala
   @@ -28,7 +28,10 @@ import org.apache.spark.sql.catalyst.rules.Rule
    object PredicateReorder extends Rule[LogicalPlan] with PredicateHelper {
      // Get priority from a Expression. Expressions with higher priority are 
executed in preference
      // to expressions with lower priority.
   -  private def getPriority(exp: Expression, filterEstimation: 
Option[FilterEstimation]) = exp match {
   +  private def getPriority(
   +      exp: Expression,
   +      filterEstimation: Option[FilterEstimation],
   +      conjuction: Boolean) = exp match {
        case e: Expression if 
e.find(_.isInstanceOf[SubqueryExpression]).isDefined => 1.0
        case e: Expression if 
e.find(_.isInstanceOf[UserDefinedExpression]).isDefined => 2.0
        case e: Expression if e.find(_.isInstanceOf[MultiLikeBase]).isDefined ||
   @@ -38,8 +41,11 @@ object PredicateReorder extends Rule[LogicalPlan] with 
PredicateHelper {
            case c: CaseWhen => c.branches.size
          }.max
          3.0 + (1.0 / maxSize)
   -    case e =>
   +    case e if conjuction =>
          filterEstimation.flatMap(_.calculateFilterSelectivity(e, 
false).map(5.0 - _)).getOrElse(4.0)
   +    case e if !conjuction =>
   +      filterEstimation.flatMap(_.calculateFilterSelectivity(e, 
false).map(4.0 + _)).getOrElse(4.0)
   +
      }
   
      private def reorderPredicates(e: Expression, estimation: 
Option[FilterEstimation]): Expression = {
   @@ -47,10 +53,16 @@ object PredicateReorder extends Rule[LogicalPlan] with 
PredicateHelper {
          case _: Or =>
            splitDisjunctivePredicates(e)
              .map(reorderPredicates(_, estimation))
   +          .map(e => (e, getPriority(e, estimation, conjuction = false)))
   +          .sortWith(_._2 > _._2)
   +          .map(_._1)
              .reduceLeft(Or)
          case _: And =>
            splitConjunctivePredicates(e)
   -          .map(e => (e, getPriority(e, estimation))).sortWith(_._2 > 
_._2).map(_._1)
   +          .map(reorderPredicates(_, estimation))
   +          .map(e => (e, getPriority(e, estimation, conjuction = true)))
   +          .sortWith(_._2 > _._2)
   +          .map(_._1)
              .reduceLeft(And)
          case _ => e
        }
   ```


----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to