Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/21061#discussion_r182690674 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -417,3 +418,156 @@ case class ArrayMax(child: Expression) extends UnaryExpression with ImplicitCast override def prettyName: String = "array_max" } + +abstract class ArraySetUtils extends BinaryExpression with ExpectsInputTypes { + val kindUnion = 1 + def typeId: Int + + def array1: Expression + def array2: Expression + + override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, ArrayType) + + override def checkInputDataTypes(): TypeCheckResult = { + val r = super.checkInputDataTypes() + if ((r == TypeCheckResult.TypeCheckSuccess) && + (array1.dataType.asInstanceOf[ArrayType].elementType != + array2.dataType.asInstanceOf[ArrayType].elementType)) { + TypeCheckResult.TypeCheckFailure("Element type in both arrays must be the same") + } else { + r + } + } + + override def dataType: DataType = array1.dataType + + private def elementType = dataType.asInstanceOf[ArrayType].elementType + private def cn1 = array1.dataType.asInstanceOf[ArrayType].containsNull + private def cn2 = array2.dataType.asInstanceOf[ArrayType].containsNull + + override def nullSafeEval(input1: Any, input2: Any): Any = { + val ary1 = input1.asInstanceOf[ArrayData] + val ary2 = input2.asInstanceOf[ArrayData] + + if (!cn1 && !cn2) { + elementType match { + case IntegerType => + // avoid boxing of primitive int array elements + val hs = new OpenHashSet[Int] + var i = 0 + while (i < ary1.numElements()) { + hs.add(ary1.getInt(i)) + i += 1 + } + i = 0 + while (i < ary2.numElements()) { --- End diff -- Thank you for your good suggestion. I will create a new abstract method for this part which will be overridden by each of three subclasses
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org