Github user kiszk commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21102#discussion_r205930794
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
    @@ -3805,3 +3801,339 @@ object ArrayUnion {
         new GenericArrayData(arrayBuffer)
       }
     }
    +
    +/**
    + * Returns an array of the elements in the intersect of x and y, without 
duplicates
    + */
    +@ExpressionDescription(
    +  usage = """
    +  _FUNC_(array1, array2) - Returns an array of the elements in the 
intersection of array1 and
    +    array2, without duplicates.
    +  """,
    +  examples = """
    +    Examples:Fun
    +      > SELECT _FUNC_(array(1, 2, 3), array(1, 3, 5));
    +       array(1, 3)
    +  """,
    +  since = "2.4.0")
    +case class ArrayIntersect(left: Expression, right: Expression) extends 
ArraySetLike {
    +  override def dataType: DataType = ArrayType(elementType,
    +    left.dataType.asInstanceOf[ArrayType].containsNull &&
    +      right.dataType.asInstanceOf[ArrayType].containsNull)
    +
    +  var hsInt: OpenHashSet[Int] = _
    +  var hsResultInt: OpenHashSet[Int] = _
    +  var hsLong: OpenHashSet[Long] = _
    +  var hsResultLong: OpenHashSet[Long] = _
    +
    +  def assignInt(array: ArrayData, idx: Int, resultArray: ArrayData, pos: 
Int): Boolean = {
    +    val elem = array.getInt(idx)
    +    if (hsInt.contains(elem) && !hsResultInt.contains(elem)) {
    +      if (resultArray != null) {
    +        resultArray.setInt(pos, elem)
    +      }
    +      hsResultInt.add(elem)
    +      true
    +    } else {
    +      false
    +    }
    +  }
    +
    +  def assignLong(array: ArrayData, idx: Int, resultArray: ArrayData, pos: 
Int): Boolean = {
    +    val elem = array.getLong(idx)
    +    if (hsLong.contains(elem) && !hsResultLong.contains(elem)) {
    +      if (resultArray != null) {
    +        resultArray.setLong(pos, elem)
    +      }
    +      hsResultLong.add(elem)
    +      true
    +    } else {
    +      false
    +    }
    +  }
    +
    +  def evalIntLongPrimitiveType(
    +      array1: ArrayData,
    +      array2: ArrayData,
    +      resultArray: ArrayData,
    +      initFoundNullElement: Boolean,
    +      isLongType: Boolean): (Int, Boolean) = {
    +    // store elements into resultArray
    +    var i = 0
    +    var foundNullElement = initFoundNullElement
    +    if (resultArray == null) {
    +      // hsInt or hsLong is updated only once since it is not changed
    +      while (i < array1.numElements()) {
    --- End diff --
    
    You are right, fixed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to