Repository: spark Updated Branches: refs/heads/branch-1.6 a50ef3d9a -> 23f9faa40
[SPARK-16845][SQL][BRANCH-1.6] GeneratedClass$SpecificOrdering` grows beyond 64 KB ## What changes were proposed in this pull request? This is a backport pr of #15480 into `branch-1.6`. ## How was this patch tested? Existing tests. Author: Liwei Lin <lwl...@gmail.com> Closes #17158 from ueshin/issues/SPARK-16845_1.6. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/23f9faa4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/23f9faa4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/23f9faa4 Branch: refs/heads/branch-1.6 Commit: 23f9faa40d1a853233cd8b0666185eb7a9ad589a Parents: a50ef3d Author: Liwei Lin <lwl...@gmail.com> Authored: Mon Mar 6 13:12:45 2017 -0800 Committer: Wenchen Fan <wenc...@databricks.com> Committed: Mon Mar 6 13:12:45 2017 -0800 ---------------------------------------------------------------------- .../expressions/codegen/CodeGenerator.scala | 32 +++++++++++++++++--- .../expressions/codegen/GenerateOrdering.scala | 27 +++++++++++++++-- .../catalyst/expressions/OrderingSuite.scala | 10 ++++++ 3 files changed, 62 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/23f9faa4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 0dec50e..0e9d9fd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -364,6 +364,27 @@ class CodeGenContext { * @param expressions the codes to evaluate expressions. */ def splitExpressions(row: String, expressions: Seq[String]): String = { + splitExpressions(expressions, "apply", ("InternalRow", row) :: Nil) + } + + /** + * Splits the generated code of expressions into multiple functions, because function has + * 64kb code size limit in JVM + * + * @param expressions the codes to evaluate expressions. + * @param funcName the split function name base. + * @param arguments the list of (type, name) of the arguments of the split function. + * @param returnType the return type of the split function. + * @param makeSplitFunction makes split function body, e.g. add preparation or cleanup. + * @param foldFunctions folds the split function calls. + */ + def splitExpressions( + expressions: Seq[String], + funcName: String, + arguments: Seq[(String, String)], + returnType: String = "void", + makeSplitFunction: String => String = identity, + foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = { val blocks = new ArrayBuffer[String]() val blockBuilder = new StringBuilder() for (code <- expressions) { @@ -380,19 +401,20 @@ class CodeGenContext { // inline execution if only one block blocks.head } else { - val apply = freshName("apply") + val func = freshName(funcName) + val argString = arguments.map { case (t, name) => s"$t $name" }.mkString(", ") val functions = blocks.zipWithIndex.map { case (body, i) => - val name = s"${apply}_$i" + val name = s"${func}_$i" val code = s""" - |private void $name(InternalRow $row) { - | $body + |private $returnType $name($argString) { + | ${makeSplitFunction(body)} |} """.stripMargin addNewFunction(name, code) name } - functions.map(name => s"$name($row);").mkString("\n") + foldFunctions(functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")})")) } } http://git-wip-us.apache.org/repos/asf/spark/blob/23f9faa4/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index 1ecebbae..6135fd7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -103,8 +103,31 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR } } """ - }.mkString("\n") - comparisons + } + + ctx.splitExpressions( + expressions = comparisons, + funcName = "compare", + arguments = Seq(("InternalRow", "a"), ("InternalRow", "b")), + returnType = "int", + makeSplitFunction = { body => + s""" + InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated. + $body + return 0; + """ + }, + foldFunctions = { funCalls => + funCalls.zipWithIndex.map { case (funCall, i) => + val comp = ctx.freshName("comp") + s""" + int $comp = $funCall; + if ($comp != 0) { + return $comp; + } + """ + }.mkString + }) } protected def create(ordering: Seq[SortOrder]): BaseOrdering = { http://git-wip-us.apache.org/repos/asf/spark/blob/23f9faa4/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala index 7ad8657..64537d1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala @@ -121,4 +121,14 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper { } } } + + test("SPARK-16845: GeneratedClass$SpecificOrdering grows beyond 64 KB") { + val sortOrder = Literal("abc").asc + + // this is passing prior to SPARK-16845, and it should also be passing after SPARK-16845 + GenerateOrdering.generate(Array.fill(40)(sortOrder)) + + // verify that we can support up to 5000 ordering comparisons, which should be sufficient + GenerateOrdering.generate(Array.fill(5000)(sortOrder)) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org