Repository: spark Updated Branches: refs/heads/master dd59a4be3 -> fc2944630
[SPARK-22699][SQL] GenerateSafeProjection should not use global variables for struct ## What changes were proposed in this pull request? GenerateSafeProjection is defining a mutable state for each struct, which is not needed. This is bad for the well known issues related to constant pool limits. The PR replace the global variable with a local one. ## How was this patch tested? added UT Author: Marco Gaido <marcogaid...@gmail.com> Closes #19914 from mgaido91/SPARK-22699. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fc294463 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fc294463 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fc294463 Branch: refs/heads/master Commit: fc294463007c184715d604c718c0e913c83969f3 Parents: dd59a4b Author: Marco Gaido <marcogaid...@gmail.com> Authored: Thu Dec 7 21:18:27 2017 +0800 Committer: Wenchen Fan <wenc...@databricks.com> Committed: Thu Dec 7 21:18:27 2017 +0800 ---------------------------------------------------------------------- .../codegen/GenerateSafeProjection.scala | 18 ++++++++---------- .../codegen/GeneratedProjectionSuite.scala | 11 +++++++++++ 2 files changed, 19 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/fc294463/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala index 44e7148..3dcbb51 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala @@ -49,8 +49,6 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection] val tmpInput = ctx.freshName("tmpInput") val output = ctx.freshName("safeRow") val values = ctx.freshName("values") - // These expressions could be split into multiple functions - ctx.addMutableState("Object[]", values, s"$values = null;") val rowClass = classOf[GenericInternalRow].getName @@ -66,15 +64,15 @@ object GenerateSafeProjection extends CodeGenerator[Seq[Expression], Projection] val allFields = ctx.splitExpressions( expressions = fieldWriters, funcName = "writeFields", - arguments = Seq("InternalRow" -> tmpInput) + arguments = Seq("InternalRow" -> tmpInput, "Object[]" -> values) ) - val code = s""" - final InternalRow $tmpInput = $input; - $values = new Object[${schema.length}]; - $allFields - final InternalRow $output = new $rowClass($values); - $values = null; - """ + val code = + s""" + |final InternalRow $tmpInput = $input; + |final Object[] $values = new Object[${schema.length}]; + |$allFields + |final InternalRow $output = new $rowClass($values); + """.stripMargin ExprCode(code, "false", output) } http://git-wip-us.apache.org/repos/asf/spark/blob/fc294463/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala index 0cd0d88..6031bdf 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala @@ -208,4 +208,15 @@ class GeneratedProjectionSuite extends SparkFunSuite { unsafeProj.apply(InternalRow(InternalRow(UTF8String.fromString("b")))) assert(row.getStruct(0, 1).getString(0).toString == "a") } + + test("SPARK-22699: GenerateSafeProjection should not use global variables for struct") { + val safeProj = GenerateSafeProjection.generate( + Seq(BoundReference(0, new StructType().add("i", IntegerType), true))) + val globalVariables = safeProj.getClass.getDeclaredFields + // We need always 3 variables: + // - one is a reference to this + // - one is the references object + // - one is the mutableRow + assert(globalVariables.length == 3) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org