Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21061#discussion_r192330635 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -1882,3 +1882,311 @@ case class ArrayRepeat(left: Expression, right: Expression) } } + +object ArraySetLike { + val kindUnion = 1 + + private val MAX_ARRAY_LENGTH: Int = ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH + + def toArrayDataInt(hs: OpenHashSet[Int]): ArrayData = { + val array = new Array[Int](hs.size) + var pos = hs.nextPos(0) + var i = 0 + while (pos != OpenHashSet.INVALID_POS) { + array(i) = hs.getValue(pos) + pos = hs.nextPos(pos + 1) + i += 1 + } + + val numBytes = 4L * array.length + val unsafeArraySizeInBytes = UnsafeArrayData.calculateHeaderPortionInBytes(array.length) + + org.apache.spark.unsafe.array.ByteArrayMethods.roundNumberOfBytesToNearestWord(numBytes) + // Since UnsafeArrayData.fromPrimitiveArray() uses long[], max elements * 8 bytes can be used + if (unsafeArraySizeInBytes <= Integer.MAX_VALUE * 8) { + UnsafeArrayData.fromPrimitiveArray(array) + } else { + new GenericArrayData(array) + } + } + + def toArrayDataLong(hs: OpenHashSet[Long]): ArrayData = { + val array = new Array[Long](hs.size) + var pos = hs.nextPos(0) + var i = 0 + while (pos != OpenHashSet.INVALID_POS) { + array(i) = hs.getValue(pos) + pos = hs.nextPos(pos + 1) + i += 1 + } + + val numBytes = 8L * array.length + val unsafeArraySizeInBytes = UnsafeArrayData.calculateHeaderPortionInBytes(array.length) + + org.apache.spark.unsafe.array.ByteArrayMethods.roundNumberOfBytesToNearestWord(numBytes) + // Since UnsafeArrayData.fromPrimitiveArray() uses long[], max elements * 8 bytes can be used + if (unsafeArraySizeInBytes <= Integer.MAX_VALUE * 8) { --- End diff -- `8` means of `sizeof(long)` in Java primitive.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org