Repository: spark
Updated Branches:
  refs/heads/master 0bdb4e516 -> ba0e79f57


[SPARK-22772][SQL] Use splitExpressionsWithCurrentInputs to split codes in elt

## What changes were proposed in this pull request?

In SPARK-22550 which fixes 64KB JVM bytecode limit problem with elt, 
`buildCodeBlocks` is used to split codes. However, we should use 
`splitExpressionsWithCurrentInputs` because it considers both normal and 
wholestage codgen (it is not supported yet, so it simply doesn't split the 
codes).

## How was this patch tested?

Existing tests.

Author: Liang-Chi Hsieh <vii...@gmail.com>

Closes #19964 from viirya/SPARK-22772.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ba0e79f5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ba0e79f5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ba0e79f5

Branch: refs/heads/master
Commit: ba0e79f57caa279773fb014b7883ee5d69dd0a68
Parents: 0bdb4e5
Author: Liang-Chi Hsieh <vii...@gmail.com>
Authored: Wed Dec 13 13:54:16 2017 -0800
Committer: gatorsmile <gatorsm...@gmail.com>
Committed: Wed Dec 13 13:54:16 2017 -0800

----------------------------------------------------------------------
 .../expressions/codegen/CodeGenerator.scala     |  2 +-
 .../expressions/stringExpressions.scala         | 81 ++++++++++----------
 2 files changed, 43 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/ba0e79f5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index 257c3f1..b1d9311 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -878,7 +878,7 @@ class CodegenContext {
    *
    * @param expressions the codes to evaluate expressions.
    */
-  def buildCodeBlocks(expressions: Seq[String]): Seq[String] = {
+  private def buildCodeBlocks(expressions: Seq[String]): Seq[String] = {
     val blocks = new ArrayBuffer[String]()
     val blockBuilder = new StringBuilder()
     var length = 0

http://git-wip-us.apache.org/repos/asf/spark/blob/ba0e79f5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 47f0b57..8c4d2fd 100755
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -289,53 +289,56 @@ case class Elt(children: Seq[Expression])
     val index = indexExpr.genCode(ctx)
     val strings = stringExprs.map(_.genCode(ctx))
     val indexVal = ctx.freshName("index")
+    val indexMatched = ctx.freshName("eltIndexMatched")
+
     val stringVal = ctx.freshName("stringVal")
+    ctx.addMutableState(ctx.javaType(dataType), stringVal)
+
     val assignStringValue = strings.zipWithIndex.map { case (eval, index) =>
       s"""
-        case ${index + 1}:
-          ${eval.code}
-          $stringVal = ${eval.isNull} ? null : ${eval.value};
-          break;
-      """
+         |if ($indexVal == ${index + 1}) {
+         |  ${eval.code}
+         |  $stringVal = ${eval.isNull} ? null : ${eval.value};
+         |  $indexMatched = true;
+         |  continue;
+         |}
+      """.stripMargin
     }
 
-    val cases = ctx.buildCodeBlocks(assignStringValue)
-    val codes = if (cases.length == 1) {
-      s"""
-        UTF8String $stringVal = null;
-        switch ($indexVal) {
-          ${cases.head}
-        }
-       """
-    } else {
-      var prevFunc = "null"
-      for (c <- cases.reverse) {
-        val funcName = ctx.freshName("eltFunc")
-        val funcBody = s"""
-         private UTF8String $funcName(InternalRow ${ctx.INPUT_ROW}, int 
$indexVal) {
-           UTF8String $stringVal = null;
-           switch ($indexVal) {
-             $c
-             default:
-               return $prevFunc;
-           }
-           return $stringVal;
-         }
-        """
-        val fullFuncName = ctx.addNewFunction(funcName, funcBody)
-        prevFunc = s"$fullFuncName(${ctx.INPUT_ROW}, $indexVal)"
-      }
-      s"UTF8String $stringVal = $prevFunc;"
-    }
+    val codes = ctx.splitExpressionsWithCurrentInputs(
+      expressions = assignStringValue,
+      funcName = "eltFunc",
+      extraArguments = ("int", indexVal) :: Nil,
+      returnType = ctx.JAVA_BOOLEAN,
+      makeSplitFunction = body =>
+        s"""
+           |${ctx.JAVA_BOOLEAN} $indexMatched = false;
+           |do {
+           |  $body
+           |} while (false);
+           |return $indexMatched;
+         """.stripMargin,
+      foldFunctions = _.map { funcCall =>
+        s"""
+           |$indexMatched = $funcCall;
+           |if ($indexMatched) {
+           |  continue;
+           |}
+         """.stripMargin
+      }.mkString)
 
     ev.copy(
       s"""
-      ${index.code}
-      final int $indexVal = ${index.value};
-      $codes
-      UTF8String ${ev.value} = $stringVal;
-      final boolean ${ev.isNull} = ${ev.value} == null;
-    """)
+         |${index.code}
+         |final int $indexVal = ${index.value};
+         |${ctx.JAVA_BOOLEAN} $indexMatched = false;
+         |$stringVal = null;
+         |do {
+         |  $codes
+         |} while (false);
+         |final UTF8String ${ev.value} = $stringVal;
+         |final boolean ${ev.isNull} = ${ev.value} == null;
+       """.stripMargin)
   }
 }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to