tanelk commented on pull request #31024: URL: https://github.com/apache/spark/pull/31024#issuecomment-757121187
> It would be nice to do this recursively. > Currently you split the outer `and` filters and sort them. each of these can have an `or` filter as a child and it would be beneficial to split and sort those too. Inside `or` the filter selectivity rule must be the other way around. I was thinking something like this. ```diff diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PredicateReorder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PredicateReorder.scala index 805f9afeba..345c30896f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PredicateReorder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PredicateReorder.scala @@ -28,7 +28,10 @@ import org.apache.spark.sql.catalyst.rules.Rule object PredicateReorder extends Rule[LogicalPlan] with PredicateHelper { // Get priority from a Expression. Expressions with higher priority are executed in preference // to expressions with lower priority. - private def getPriority(exp: Expression, filterEstimation: Option[FilterEstimation]) = exp match { + private def getPriority( + exp: Expression, + filterEstimation: Option[FilterEstimation], + conjuction: Boolean) = exp match { case e: Expression if e.find(_.isInstanceOf[SubqueryExpression]).isDefined => 1.0 case e: Expression if e.find(_.isInstanceOf[UserDefinedExpression]).isDefined => 2.0 case e: Expression if e.find(_.isInstanceOf[MultiLikeBase]).isDefined || @@ -38,8 +41,11 @@ object PredicateReorder extends Rule[LogicalPlan] with PredicateHelper { case c: CaseWhen => c.branches.size }.max 3.0 + (1.0 / maxSize) - case e => + case e if conjuction => filterEstimation.flatMap(_.calculateFilterSelectivity(e, false).map(5.0 - _)).getOrElse(4.0) + case e if !conjuction => + filterEstimation.flatMap(_.calculateFilterSelectivity(e, false).map(4.0 + _)).getOrElse(4.0) + } private def reorderPredicates(e: Expression, estimation: Option[FilterEstimation]): Expression = { @@ -47,10 +53,16 @@ object PredicateReorder extends Rule[LogicalPlan] with PredicateHelper { case _: Or => splitDisjunctivePredicates(e) .map(reorderPredicates(_, estimation)) + .map(e => (e, getPriority(e, estimation, conjuction = false))) + .sortWith(_._2 > _._2) + .map(_._1) .reduceLeft(Or) case _: And => splitConjunctivePredicates(e) - .map(e => (e, getPriority(e, estimation))).sortWith(_._2 > _._2).map(_._1) + .map(reorderPredicates(_, estimation)) + .map(e => (e, getPriority(e, estimation, conjuction = true))) + .sortWith(_._2 > _._2) + .map(_._1) .reduceLeft(And) case _ => e } ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org