Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21102#discussion_r205930794 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -3805,3 +3801,339 @@ object ArrayUnion { new GenericArrayData(arrayBuffer) } } + +/** + * Returns an array of the elements in the intersect of x and y, without duplicates + */ +@ExpressionDescription( + usage = """ + _FUNC_(array1, array2) - Returns an array of the elements in the intersection of array1 and + array2, without duplicates. + """, + examples = """ + Examples:Fun + > SELECT _FUNC_(array(1, 2, 3), array(1, 3, 5)); + array(1, 3) + """, + since = "2.4.0") +case class ArrayIntersect(left: Expression, right: Expression) extends ArraySetLike { + override def dataType: DataType = ArrayType(elementType, + left.dataType.asInstanceOf[ArrayType].containsNull && + right.dataType.asInstanceOf[ArrayType].containsNull) + + var hsInt: OpenHashSet[Int] = _ + var hsResultInt: OpenHashSet[Int] = _ + var hsLong: OpenHashSet[Long] = _ + var hsResultLong: OpenHashSet[Long] = _ + + def assignInt(array: ArrayData, idx: Int, resultArray: ArrayData, pos: Int): Boolean = { + val elem = array.getInt(idx) + if (hsInt.contains(elem) && !hsResultInt.contains(elem)) { + if (resultArray != null) { + resultArray.setInt(pos, elem) + } + hsResultInt.add(elem) + true + } else { + false + } + } + + def assignLong(array: ArrayData, idx: Int, resultArray: ArrayData, pos: Int): Boolean = { + val elem = array.getLong(idx) + if (hsLong.contains(elem) && !hsResultLong.contains(elem)) { + if (resultArray != null) { + resultArray.setLong(pos, elem) + } + hsResultLong.add(elem) + true + } else { + false + } + } + + def evalIntLongPrimitiveType( + array1: ArrayData, + array2: ArrayData, + resultArray: ArrayData, + initFoundNullElement: Boolean, + isLongType: Boolean): (Int, Boolean) = { + // store elements into resultArray + var i = 0 + var foundNullElement = initFoundNullElement + if (resultArray == null) { + // hsInt or hsLong is updated only once since it is not changed + while (i < array1.numElements()) { --- End diff -- You are right, fixed.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org