Repository: spark
Updated Branches:
  refs/heads/branch-2.2 53a6076b2 -> 710d618f3


[SPARK-22498][SQL] Fix 64KB JVM bytecode limit problem with concat

## What changes were proposed in this pull request?

This PR changes `concat` code generation to place generated code for expression 
for arguments into separated methods if these size could be large.
This PR resolved the case of `concat` with a lot of argument

## How was this patch tested?

Added new test cases into `StringExpressionsSuite`

Author: Kazuaki Ishizaki <ishiz...@jp.ibm.com>

Closes #19728 from kiszk/SPARK-22498.

(cherry picked from commit d54bfec2e07f2eb934185402f915558fe27b9312)
Signed-off-by: Wenchen Fan <wenc...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/710d618f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/710d618f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/710d618f

Branch: refs/heads/branch-2.2
Commit: 710d618f3f3a424363e225cefa80a6e70bd8f255
Parents: 53a6076
Author: Kazuaki Ishizaki <ishiz...@jp.ibm.com>
Authored: Sat Nov 18 19:40:06 2017 +0100
Committer: Wenchen Fan <wenc...@databricks.com>
Committed: Sat Nov 18 19:40:18 2017 +0100

----------------------------------------------------------------------
 .../expressions/stringExpressions.scala         | 30 ++++++++++++++------
 .../expressions/StringExpressionsSuite.scala    |  6 ++++
 2 files changed, 27 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/710d618f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index a5e253b..0a22332 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -62,15 +62,27 @@ case class Concat(children: Seq[Expression]) extends 
Expression with ImplicitCas
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): 
ExprCode = {
     val evals = children.map(_.genCode(ctx))
-    val inputs = evals.map { eval =>
-      s"${eval.isNull} ? null : ${eval.value}"
-    }.mkString(", ")
-    ev.copy(evals.map(_.code).mkString("\n") + s"""
-      boolean ${ev.isNull} = false;
-      UTF8String ${ev.value} = UTF8String.concat($inputs);
-      if (${ev.value} == null) {
-        ${ev.isNull} = true;
-      }
+    val args = ctx.freshName("args")
+
+    val inputs = evals.zipWithIndex.map { case (eval, index) =>
+      s"""
+        ${eval.code}
+        if (!${eval.isNull}) {
+          $args[$index] = ${eval.value};
+        }
+      """
+    }
+    val codes = if (ctx.INPUT_ROW != null && ctx.currentVars == null) {
+      ctx.splitExpressions(inputs, "valueConcat",
+        ("InternalRow", ctx.INPUT_ROW) :: ("UTF8String[]", args) :: Nil)
+    } else {
+      inputs.mkString("\n")
+    }
+    ev.copy(s"""
+      UTF8String[] $args = new UTF8String[${evals.length}];
+      $codes
+      UTF8String ${ev.value} = UTF8String.concat($args);
+      boolean ${ev.isNull} = ${ev.value} == null;
     """)
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/710d618f/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index 26978a0..044e9b8 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -46,6 +46,12 @@ class StringExpressionsSuite extends SparkFunSuite with 
ExpressionEvalHelper {
     // scalastyle:on
   }
 
+  test("SPARK-22498: Concat should not generate codes beyond 64KB") {
+    val N = 5000
+    val strs = (1 to N).map(x => s"s$x")
+    checkEvaluation(Concat(strs.map(Literal.create(_, StringType))), 
strs.mkString, EmptyRow)
+  }
+
   test("concat_ws") {
     def testConcatWs(expected: String, sep: String, inputs: Any*): Unit = {
       val inputExprs = inputs.map {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to