Github user kiszk commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21061#discussion_r182532583
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
    @@ -417,3 +418,156 @@ case class ArrayMax(child: Expression) extends 
UnaryExpression with ImplicitCast
     
       override def prettyName: String = "array_max"
     }
    +
    +abstract class ArraySetUtils extends BinaryExpression with 
ExpectsInputTypes {
    +  val kindUnion = 1
    +  def typeId: Int
    +
    +  def array1: Expression
    +  def array2: Expression
    +
    +  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
ArrayType)
    +
    +  override def checkInputDataTypes(): TypeCheckResult = {
    +    val r = super.checkInputDataTypes()
    +    if ((r == TypeCheckResult.TypeCheckSuccess) &&
    +      (array1.dataType.asInstanceOf[ArrayType].elementType !=
    +        array2.dataType.asInstanceOf[ArrayType].elementType)) {
    +      TypeCheckResult.TypeCheckFailure("Element type in both arrays must 
be the same")
    +    } else {
    +      r
    +    }
    +  }
    +
    +  override def dataType: DataType = array1.dataType
    +
    +  private def elementType = dataType.asInstanceOf[ArrayType].elementType
    +  private def cn1 = array1.dataType.asInstanceOf[ArrayType].containsNull
    +  private def cn2 = array2.dataType.asInstanceOf[ArrayType].containsNull
    +
    +  override def nullSafeEval(input1: Any, input2: Any): Any = {
    +    val ary1 = input1.asInstanceOf[ArrayData]
    +    val ary2 = input2.asInstanceOf[ArrayData]
    +
    +    if (!cn1 && !cn2) {
    +      elementType match {
    +        case IntegerType =>
    +          // avoid boxing of primitive int array elements
    +          val hs = new OpenHashSet[Int]
    +          var i = 0
    +          while (i < ary1.numElements()) {
    +            hs.add(ary1.getInt(i))
    +            i += 1
    +          }
    +          i = 0
    +          while (i < ary2.numElements()) {
    --- End diff --
    
    
[Here](https://github.com/apache/spark/pull/21103/files#diff-9853dcf5ce3d2ac1e94d473197ff5768R510)
 is the final version of `ArraySetUtils` that supports three functions.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to