Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/22439#discussion_r217997181 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala --- @@ -75,87 +75,60 @@ case class CreateArray(children: Seq[Expression]) extends Expression { private [sql] object GenArrayData { /** - * Return Java code pieces based on DataType and isPrimitive to allocate ArrayData class + * Return Java code pieces based on DataType and array size to allocate ArrayData class * * @param ctx a [[CodegenContext]] * @param elementType data type of underlying array elements * @param elementsCode concatenated set of [[ExprCode]] for each element of an underlying array * @param isMapKey if true, throw an exception when the element is null - * @return (code pre-assignments, concatenated assignments to each array elements, - * code post-assignments, arrayData name) + * @param functionName string to include in the error message + * @return (array allocation, concatenated assignments to each array elements, arrayData name) */ def genCodeToCreateArrayData( ctx: CodegenContext, elementType: DataType, elementsCode: Seq[ExprCode], - isMapKey: Boolean): (String, String, String, String) = { + isMapKey: Boolean, + functionName: String): (String, String, String) = { val arrayDataName = ctx.freshName("arrayData") - val numElements = elementsCode.length + val numElements = s"${elementsCode.length}L" - if (!CodeGenerator.isPrimitiveType(elementType)) { - val arrayName = ctx.freshName("arrayObject") - val genericArrayClass = classOf[GenericArrayData].getName + val initialization = CodeGenerator.createArrayData( + arrayDataName, elementType, numElements, s" $functionName failed.") - val assignments = elementsCode.zipWithIndex.map { case (eval, i) => + val assignments = elementsCode.zipWithIndex.map { case (eval, i) => + val setArrayElement = CodeGenerator.setArrayElement( + arrayDataName, elementType, i.toString, eval.value) + + val assignment = if (eval.isNull == FalseLiteral) { + s"\n$setArrayElement\n" + } else { val isNullAssignment = if (!isMapKey) { - s"$arrayName[$i] = null;" + s"$arrayDataName.setNullAt($i);" } else { "throw new RuntimeException(\"Cannot use null as map key!\");" } - eval.code + s""" - if (${eval.isNull}) { - $isNullAssignment - } else { - $arrayName[$i] = ${eval.value}; - } - """ - } - val assignmentString = ctx.splitExpressionsWithCurrentInputs( - expressions = assignments, - funcName = "apply", - extraArguments = ("Object[]", arrayName) :: Nil) - - (s"Object[] $arrayName = new Object[$numElements];", - assignmentString, - s"final ArrayData $arrayDataName = new $genericArrayClass($arrayName);", - arrayDataName) - } else { - val arrayName = ctx.freshName("array") - val unsafeArraySizeInBytes = - UnsafeArrayData.calculateHeaderPortionInBytes(numElements) + - ByteArrayMethods.roundNumberOfBytesToNearestWord(elementType.defaultSize * numElements) - val baseOffset = Platform.BYTE_ARRAY_OFFSET - - val primitiveValueTypeName = CodeGenerator.primitiveTypeName(elementType) - val assignments = elementsCode.zipWithIndex.map { case (eval, i) => - val isNullAssignment = if (!isMapKey) { - s"$arrayDataName.setNullAt($i);" + + if (eval.isNull == TrueLiteral) { --- End diff -- Since I saw the following case, I added this condition to reduce the generated Java byte code size. ``` if (true) { ... } else { ... } ``` I am neutral on keeping or removing this.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org