Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/19728#discussion_r150469019 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala --- @@ -63,15 +63,28 @@ case class Concat(children: Seq[Expression]) extends Expression with ImplicitCas override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val evals = children.map(_.genCode(ctx)) - val inputs = evals.map { eval => - s"${eval.isNull} ? null : ${eval.value}" - }.mkString(", ") - ev.copy(evals.map(_.code).mkString("\n") + s""" - boolean ${ev.isNull} = false; - UTF8String ${ev.value} = UTF8String.concat($inputs); - if (${ev.value} == null) { - ${ev.isNull} = true; + val argNums = evals.length + val args = ctx.freshName("argLen") + ctx.addMutableState("UTF8String[]", args, "") + + val inputs = evals.zipWithIndex.map { case (eval, index) => + if (eval.isNull != "true") { + s""" + ${eval.code} + if (!${eval.isNull}) { + $args[$index] = ${eval.value}; + } --- End diff -- When the next row is processed, `$argsnew UTF8String[$argNums]` is executed again in `apply()` method. In other words, current implementation does not reuse `UTF8String[]` between different rows.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org