AngersZhuuuu commented on a change in pull request #24973: [SPARK-28169] Fix Partition table partition PushDown failed by "OR" expression URL: https://github.com/apache/spark/pull/24973#discussion_r297953462
########## File path: sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala ########## @@ -237,21 +237,75 @@ private[hive] trait HiveStrategies { * applied. */ object HiveTableScans extends Strategy { + + def constructBinaryOperators(left:Expression, right: Expression, op_type: String): Expression ={ + (left == null, right == null) match { + case (true, true) => null + case (true, false) => right + case (false, true) => left + case (false, false) => + if(op_type == "or") + Or(left, right) + else if (op_type == "and") + And(left, right) + else + null + } + } + + def resolveAndExpression(expr: Expression, partitionKeyIds: AttributeSet): Expression = { + expr match { + case and: And => + constructBinaryOperators(resolveAndExpression(and.left, partitionKeyIds), resolveAndExpression(and.right, partitionKeyIds), "and") + case _ => + resolvePredicatesExpression(expr, partitionKeyIds) + } + } + + def resolveOrExpression(or: Or, partitionKeyIds: AttributeSet): Expression = { + (or.left.isInstanceOf[Or],or.right.isInstanceOf[Or]) match { + case (true, true) => constructBinaryOperators(resolveOrExpression(or.left.asInstanceOf[Or], partitionKeyIds) , resolveOrExpression(or.right.asInstanceOf[Or], partitionKeyIds), "or") + case (true, false) => constructBinaryOperators(resolveOrExpression(or.left.asInstanceOf[Or], partitionKeyIds) , resolveAndExpression(or.right, partitionKeyIds), "or") + case (false, true) => constructBinaryOperators(resolveAndExpression(or.left, partitionKeyIds) , resolveOrExpression(or.right.asInstanceOf[Or], partitionKeyIds), "or") + case (false, false) => constructBinaryOperators(resolveAndExpression(or.left, partitionKeyIds) , resolveAndExpression(or.right, partitionKeyIds), "or") + } + } + + def resolvePredicatesExpression(expr: Expression, partitionKeyIds: AttributeSet): Expression ={ + if(!expr.references.isEmpty && expr.references.subsetOf(partitionKeyIds)) + expr + else + null + } + + def extractPushDownPredicate(predicates: Seq[Expression], partitionKeyIds: AttributeSet): Seq[Expression] ={ + predicates.map { + case or: Or => + resolveOrExpression(or, partitionKeyIds) + case predicate => + resolvePredicatesExpression(predicate, partitionKeyIds) + } + } + def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { case PhysicalOperation(projectList, predicates, relation: HiveTableRelation) => // Filter out all predicates that only deal with partition keys, these are given to the // hive table scan operator to be used for partition pruning. val partitionKeyIds = AttributeSet(relation.partitionCols) - val (pruningPredicates, otherPredicates) = predicates.partition { predicate => + val (_, otherPredicates) = predicates.partition { predicate => { !predicate.references.isEmpty && - predicate.references.subsetOf(partitionKeyIds) + predicate.references.subsetOf(partitionKeyIds) } + } + + val extractedPruningPredicates = extractPushDownPredicate(predicates, partitionKeyIds) + .filter(_ != null) Review comment: @HyukjinKwon What I do is to extract condition's about partition keys.For the old code : ` val (pruningPredicates, otherPredicates) = predicates.partition { predicate => !predicate.references.isEmpty && predicate.references.subsetOf(partitionKeyIds) }` If in expression, there contains other key, it won't be a push to HiveTableScanExec, So what I to it to fix this situation, just extract all condition about partition keys, then push it to HiveTableScanExec, HiveTableScanExec will handle complex combine expressions. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org