Repository: spark Updated Branches: refs/heads/branch-2.2 53a6076b2 -> 710d618f3
[SPARK-22498][SQL] Fix 64KB JVM bytecode limit problem with concat ## What changes were proposed in this pull request? This PR changes `concat` code generation to place generated code for expression for arguments into separated methods if these size could be large. This PR resolved the case of `concat` with a lot of argument ## How was this patch tested? Added new test cases into `StringExpressionsSuite` Author: Kazuaki Ishizaki <ishiz...@jp.ibm.com> Closes #19728 from kiszk/SPARK-22498. (cherry picked from commit d54bfec2e07f2eb934185402f915558fe27b9312) Signed-off-by: Wenchen Fan <wenc...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/710d618f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/710d618f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/710d618f Branch: refs/heads/branch-2.2 Commit: 710d618f3f3a424363e225cefa80a6e70bd8f255 Parents: 53a6076 Author: Kazuaki Ishizaki <ishiz...@jp.ibm.com> Authored: Sat Nov 18 19:40:06 2017 +0100 Committer: Wenchen Fan <wenc...@databricks.com> Committed: Sat Nov 18 19:40:18 2017 +0100 ---------------------------------------------------------------------- .../expressions/stringExpressions.scala | 30 ++++++++++++++------ .../expressions/StringExpressionsSuite.scala | 6 ++++ 2 files changed, 27 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/710d618f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index a5e253b..0a22332 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -62,15 +62,27 @@ case class Concat(children: Seq[Expression]) extends Expression with ImplicitCas override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val evals = children.map(_.genCode(ctx)) - val inputs = evals.map { eval => - s"${eval.isNull} ? null : ${eval.value}" - }.mkString(", ") - ev.copy(evals.map(_.code).mkString("\n") + s""" - boolean ${ev.isNull} = false; - UTF8String ${ev.value} = UTF8String.concat($inputs); - if (${ev.value} == null) { - ${ev.isNull} = true; - } + val args = ctx.freshName("args") + + val inputs = evals.zipWithIndex.map { case (eval, index) => + s""" + ${eval.code} + if (!${eval.isNull}) { + $args[$index] = ${eval.value}; + } + """ + } + val codes = if (ctx.INPUT_ROW != null && ctx.currentVars == null) { + ctx.splitExpressions(inputs, "valueConcat", + ("InternalRow", ctx.INPUT_ROW) :: ("UTF8String[]", args) :: Nil) + } else { + inputs.mkString("\n") + } + ev.copy(s""" + UTF8String[] $args = new UTF8String[${evals.length}]; + $codes + UTF8String ${ev.value} = UTF8String.concat($args); + boolean ${ev.isNull} = ${ev.value} == null; """) } } http://git-wip-us.apache.org/repos/asf/spark/blob/710d618f/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index 26978a0..044e9b8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -46,6 +46,12 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { // scalastyle:on } + test("SPARK-22498: Concat should not generate codes beyond 64KB") { + val N = 5000 + val strs = (1 to N).map(x => s"s$x") + checkEvaluation(Concat(strs.map(Literal.create(_, StringType))), strs.mkString, EmptyRow) + } + test("concat_ws") { def testConcatWs(expected: String, sep: String, inputs: Any*): Unit = { val inputExprs = inputs.map { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org