Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/23176#discussion_r237382322
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
 ---
    @@ -367,11 +367,29 @@ case class InSet(child: Expression, hset: Set[Any]) 
extends UnaryExpression with
       }
     
       @transient lazy val set: Set[Any] = child.dataType match {
    -    case _: AtomicType => hset
    +    case t: AtomicType if !t.isInstanceOf[BinaryType] => hset
         case _: NullType => hset
         case _ =>
    +      val ord = TypeUtils.getInterpretedOrdering(child.dataType)
    +      val ordering = if (hasNull) {
    +        new Ordering[Any] {
    +          override def compare(x: Any, y: Any): Int = {
    +            if (x == null && y == null) {
    +              0
    +            } else if (x == null) {
    +              -1
    +            } else if (y == null) {
    +              1
    +            } else {
    +              ord.compare(x, y)
    +            }
    +          }
    +        }
    +      } else {
    +        ord
    +      }
           // for structs use interpreted ordering to be able to compare 
UnsafeRows with non-UnsafeRows
    -      TreeSet.empty(TypeUtils.getInterpretedOrdering(child.dataType)) ++ 
hset
    +      TreeSet.empty(ordering) ++ hset
    --- End diff --
    
    shall we just filter out nulls when building the tree set?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to