Github user dbtsai commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22684#discussion_r224179579
  
    --- Diff: 
sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala ---
    @@ -90,32 +107,51 @@ private[orc] object OrcFilters extends Logging {
     
         expression match {
           case And(left, right) =>
    -        // At here, it is not safe to just convert one side if we do not 
understand the
    -        // other side. Here is an example used to explain the reason.
    +        // At here, it is not safe to just convert one side and remove the 
other side
    +        // if we do not understand what the parent filters are.
    +        //
    +        // Here is an example used to explain the reason.
             // Let's say we have NOT(a = 2 AND b in ('1')) and we do not 
understand how to
             // convert b in ('1'). If we only convert a = 2, we will end up 
with a filter
             // NOT(a = 2), which will generate wrong results.
    -        // Pushing one side of AND down is only safe to do at the top 
level.
    -        // You can see ParquetRelation's initializeLocalJobFunc method as 
an example.
    -        for {
    -          _ <- buildSearchArgument(dataTypeMap, left, newBuilder)
    -          _ <- buildSearchArgument(dataTypeMap, right, newBuilder)
    -          lhs <- buildSearchArgument(dataTypeMap, left, builder.startAnd())
    -          rhs <- buildSearchArgument(dataTypeMap, right, lhs)
    -        } yield rhs.end()
    +        //
    +        // Pushing one side of AND down is only safe to do at the top 
level or in the child
    +        // AND before hitting NOT or OR conditions, and in this case, the 
unsupported predicate
    +        // can be safely removed.
    +        val leftBuilderOption = createBuilder(dataTypeMap, left,
    +          newBuilder, canPartialPushDownConjuncts)
    +        val rightBuilderOption =
    +          createBuilder(dataTypeMap, right, newBuilder, 
canPartialPushDownConjuncts)
    +        (leftBuilderOption, rightBuilderOption) match {
    +          case (Some(_), Some(_)) =>
    +            for {
    +              lhs <- createBuilder(dataTypeMap, left,
    +                builder.startAnd(), canPartialPushDownConjuncts)
    +              rhs <- createBuilder(dataTypeMap, right, lhs, 
canPartialPushDownConjuncts)
    +            } yield rhs.end()
    +
    +          case (Some(_), None) if canPartialPushDownConjuncts =>
    +            createBuilder(dataTypeMap, left, builder, 
canPartialPushDownConjuncts)
    +
    +          case (None, Some(_)) if canPartialPushDownConjuncts =>
    +            createBuilder(dataTypeMap, right, builder, 
canPartialPushDownConjuncts)
    +
    +          case _ => None
    +        }
     
           case Or(left, right) =>
             for {
    -          _ <- buildSearchArgument(dataTypeMap, left, newBuilder)
    -          _ <- buildSearchArgument(dataTypeMap, right, newBuilder)
    -          lhs <- buildSearchArgument(dataTypeMap, left, builder.startOr())
    -          rhs <- buildSearchArgument(dataTypeMap, right, lhs)
    +          _ <- createBuilder(dataTypeMap, left, newBuilder, 
canPartialPushDownConjuncts = false)
    +          _ <- createBuilder(dataTypeMap, right, newBuilder, 
canPartialPushDownConjuncts = false)
    +          lhs <- createBuilder(dataTypeMap, left,
    +            builder.startOr(), canPartialPushDownConjuncts = false)
    +          rhs <- createBuilder(dataTypeMap, right, lhs, 
canPartialPushDownConjuncts = false)
             } yield rhs.end()
     
           case Not(child) =>
             for {
    -          _ <- buildSearchArgument(dataTypeMap, child, newBuilder)
    -          negate <- buildSearchArgument(dataTypeMap, child, 
builder.startNot())
    +          _ <- createBuilder(dataTypeMap, child, newBuilder, 
canPartialPushDownConjuncts = false)
    +          negate <- createBuilder(dataTypeMap, child, builder.startNot(), 
false)
    --- End diff --
    
    ditto


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to