Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19777#discussion_r151951926
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
 ---
    @@ -125,19 +125,43 @@ case class ConcatWs(children: Seq[Expression])
         if (children.forall(_.dataType == StringType)) {
           // All children are strings. In that case we can construct a fixed 
size array.
           val evals = children.map(_.genCode(ctx))
    -
    -      val inputs = evals.map { eval =>
    -        s"${eval.isNull} ? (UTF8String) null : ${eval.value}"
    -      }.mkString(", ")
    -
    -      ev.copy(evals.map(_.code).mkString("\n") + s"""
    -        UTF8String ${ev.value} = UTF8String.concatWs($inputs);
    +      val separator = evals.head
    +      val strings = evals.tail
    +      val numArgs = strings.length
    +      val args = ctx.freshName("args")
    +
    +      val inputs = strings.zipWithIndex.map { case (eval, index) =>
    +        if (eval.isNull != "true") {
    +          s"""
    +             ${eval.code}
    +             if (!${eval.isNull}) {
    +               $args[$index] = ${eval.value};
    +             }
    +           """
    +        } else {
    +          ""
    +        }
    +      }
    +      val codes = if (ctx.INPUT_ROW != null && ctx.currentVars == null) {
    +        ctx.splitExpressions(inputs, "valueConcatWs",
    +          ("InternalRow", ctx.INPUT_ROW) :: ("UTF8String[]", args) :: Nil)
    +      } else {
    +        inputs.mkString("\n")
    +      }
    +      ev.copy(s"""
    +        UTF8String[] $args = new UTF8String[$numArgs];
    +        ${separator.code}
    +        $codes
    +        UTF8String ${ev.value} = UTF8String.concatWs(${separator.value}, 
$args);
             boolean ${ev.isNull} = ${ev.value} == null;
           """)
         } else {
           val array = ctx.freshName("array")
    +      ctx.addMutableState("UTF8String[]", array, "")
           val varargNum = ctx.freshName("varargNum")
    +      ctx.addMutableState("int", varargNum, "")
           val idxInVararg = ctx.freshName("idxInVararg")
    +      ctx.addMutableState("int", idxInVararg, "")
    --- End diff --
    
    can we do better? I think we can avoid these global vaiables.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to