Repository: spark
Updated Branches:
  refs/heads/master dd59a4be3 -> fc2944630


[SPARK-22699][SQL] GenerateSafeProjection should not use global variables for 
struct

## What changes were proposed in this pull request?

GenerateSafeProjection is defining a mutable state for each struct, which is 
not needed. This is bad for the well known issues related to constant pool 
limits.
The PR replace the global variable with a local one.

## How was this patch tested?

added UT

Author: Marco Gaido <marcogaid...@gmail.com>

Closes #19914 from mgaido91/SPARK-22699.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fc294463
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fc294463
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fc294463

Branch: refs/heads/master
Commit: fc294463007c184715d604c718c0e913c83969f3
Parents: dd59a4b
Author: Marco Gaido <marcogaid...@gmail.com>
Authored: Thu Dec 7 21:18:27 2017 +0800
Committer: Wenchen Fan <wenc...@databricks.com>
Committed: Thu Dec 7 21:18:27 2017 +0800

----------------------------------------------------------------------
 .../codegen/GenerateSafeProjection.scala          | 18 ++++++++----------
 .../codegen/GeneratedProjectionSuite.scala        | 11 +++++++++++
 2 files changed, 19 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/fc294463/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
index 44e7148..3dcbb51 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateSafeProjection.scala
@@ -49,8 +49,6 @@ object GenerateSafeProjection extends 
CodeGenerator[Seq[Expression], Projection]
     val tmpInput = ctx.freshName("tmpInput")
     val output = ctx.freshName("safeRow")
     val values = ctx.freshName("values")
-    // These expressions could be split into multiple functions
-    ctx.addMutableState("Object[]", values, s"$values = null;")
 
     val rowClass = classOf[GenericInternalRow].getName
 
@@ -66,15 +64,15 @@ object GenerateSafeProjection extends 
CodeGenerator[Seq[Expression], Projection]
     val allFields = ctx.splitExpressions(
       expressions = fieldWriters,
       funcName = "writeFields",
-      arguments = Seq("InternalRow" -> tmpInput)
+      arguments = Seq("InternalRow" -> tmpInput, "Object[]" -> values)
     )
-    val code = s"""
-      final InternalRow $tmpInput = $input;
-      $values = new Object[${schema.length}];
-      $allFields
-      final InternalRow $output = new $rowClass($values);
-      $values = null;
-    """
+    val code =
+      s"""
+         |final InternalRow $tmpInput = $input;
+         |final Object[] $values = new Object[${schema.length}];
+         |$allFields
+         |final InternalRow $output = new $rowClass($values);
+       """.stripMargin
 
     ExprCode(code, "false", output)
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/fc294463/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
index 0cd0d88..6031bdf 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
@@ -208,4 +208,15 @@ class GeneratedProjectionSuite extends SparkFunSuite {
     unsafeProj.apply(InternalRow(InternalRow(UTF8String.fromString("b"))))
     assert(row.getStruct(0, 1).getString(0).toString == "a")
   }
+
+  test("SPARK-22699: GenerateSafeProjection should not use global variables 
for struct") {
+    val safeProj = GenerateSafeProjection.generate(
+      Seq(BoundReference(0, new StructType().add("i", IntegerType), true)))
+    val globalVariables = safeProj.getClass.getDeclaredFields
+    // We need always 3 variables:
+    // - one is a reference to this
+    // - one is the references object
+    // - one is the mutableRow
+    assert(globalVariables.length == 3)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to