Github user kiszk commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21061#discussion_r196671471
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
    @@ -2355,3 +2355,297 @@ case class ArrayRemove(left: Expression, right: 
Expression)
     
       override def prettyName: String = "array_remove"
     }
    +
    +object ArraySetLike {
    +  def useGenericArrayData(elementSize: Int, length: Int): Boolean = {
    +    // Use the same calculation in UnsafeArrayData.fromPrimitiveArray()
    +    val headerInBytes = 
UnsafeArrayData.calculateHeaderPortionInBytes(length)
    +    val valueRegionInBytes = elementSize.toLong * length
    +    val totalSizeInLongs = (headerInBytes + valueRegionInBytes + 7) / 8
    +    totalSizeInLongs > Integer.MAX_VALUE / 8
    +  }
    +
    +  def throwUnionLengthOverflowException(length: Int): Unit = {
    +    throw new RuntimeException(s"Unsuccessful try to union arrays with 
$length " +
    +      s"elements due to exceeding the array size limit " +
    +      s"${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}.")
    +  }
    +}
    +
    +
    +abstract class ArraySetLike extends BinaryArrayExpressionWithImplicitCast {
    +  override def dataType: DataType = left.dataType
    +
    +  override def checkInputDataTypes(): TypeCheckResult = {
    +    val typeCheckResult = super.checkInputDataTypes()
    +    if (typeCheckResult.isSuccess) {
    +      
TypeUtils.checkForOrderingExpr(dataType.asInstanceOf[ArrayType].elementType,
    +        s"function $prettyName")
    +    } else {
    +      typeCheckResult
    +    }
    +  }
    +
    +  protected def cn = left.dataType.asInstanceOf[ArrayType].containsNull ||
    +    right.dataType.asInstanceOf[ArrayType].containsNull
    +
    +  @transient protected lazy val ordering: Ordering[Any] =
    +    TypeUtils.getInterpretedOrdering(elementType)
    +
    +  @transient protected lazy val elementTypeSupportEquals = elementType 
match {
    +    case BinaryType => false
    +    case _: AtomicType => true
    +    case _ => false
    +  }
    +}
    +
    +/**
    + * Returns an array of the elements in the union of x and y, without 
duplicates
    + */
    +@ExpressionDescription(
    +  usage = """
    +    _FUNC_(array1, array2) - Returns an array of the elements in the union 
of array1 and array2,
    +      without duplicates.
    +  """,
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(array(1, 2, 3), array(1, 3, 5));
    +       array(1, 2, 3, 5)
    +  """,
    +  since = "2.4.0")
    +case class ArrayUnion(left: Expression, right: Expression) extends 
ArraySetLike {
    +
    +  override def nullSafeEval(input1: Any, input2: Any): Any = {
    +    val array1 = input1.asInstanceOf[ArrayData]
    +    val array2 = input2.asInstanceOf[ArrayData]
    +
    +    if (elementTypeSupportEquals && !cn) {
    --- End diff --
    
    Ah, are you suggesting whether all of the element does not have `null` in 
the first loop (e.g. counting the result array size) even when `containsNull = 
true`?
    If we can ensure no `null` element in the array with `containsNull = true`, 
we can avoid boxing.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to