Repository: spark
Updated Branches:
  refs/heads/branch-1.5 2803e8b2e -> e5fbe4f24


[SPARK-10038] [SQL] fix bug in generated unsafe projection when there is binary 
in ArrayData

The type for array of array in Java is slightly different than array of others.

cc cloud-fan

Author: Davies Liu <dav...@databricks.com>

Closes #8250 from davies/array_binary.

(cherry picked from commit 5af3838d2e59ed83766f85634e26918baa53819f)
Signed-off-by: Reynold Xin <r...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e5fbe4f2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e5fbe4f2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e5fbe4f2

Branch: refs/heads/branch-1.5
Commit: e5fbe4f24aa805d78546e5a11122aa60dde83709
Parents: 2803e8b
Author: Davies Liu <dav...@databricks.com>
Authored: Mon Aug 17 23:27:55 2015 -0700
Committer: Reynold Xin <r...@databricks.com>
Committed: Mon Aug 17 23:28:02 2015 -0700

----------------------------------------------------------------------
 .../codegen/GenerateUnsafeProjection.scala      | 12 ++++++++---
 .../codegen/GeneratedProjectionSuite.scala      | 21 +++++++++++++++++++-
 2 files changed, 29 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/e5fbe4f2/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
index b2fb913..b570fe8 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala
@@ -224,7 +224,7 @@ object GenerateUnsafeProjection extends 
CodeGenerator[Seq[Expression], UnsafePro
 
     // go through the input array to calculate how many bytes we need.
     val calculateNumBytes = elementType match {
-      case _ if (ctx.isPrimitiveType(elementType)) =>
+      case _ if ctx.isPrimitiveType(elementType) =>
         // Should we do word align?
         val elementSize = elementType.defaultSize
         s"""
@@ -237,6 +237,7 @@ object GenerateUnsafeProjection extends 
CodeGenerator[Seq[Expression], UnsafePro
       case _ =>
         val writer = getWriter(elementType)
         val elementSize = s"$writer.getSize($elements[$index])"
+        // TODO(davies): avoid the copy
         val unsafeType = elementType match {
           case _: StructType => "UnsafeRow"
           case _: ArrayType => "UnsafeArrayData"
@@ -249,8 +250,13 @@ object GenerateUnsafeProjection extends 
CodeGenerator[Seq[Expression], UnsafePro
           case _ => ""
         }
 
+        val newElements = if (elementType == BinaryType) {
+          s"new byte[$numElements][]"
+        } else {
+          s"new $unsafeType[$numElements]"
+        }
         s"""
-          final $unsafeType[] $elements = new $unsafeType[$numElements];
+          final $unsafeType[] $elements = $newElements;
           for (int $index = 0; $index < $numElements; $index++) {
             ${convertedElement.code}
             if (!${convertedElement.isNull}) {
@@ -262,7 +268,7 @@ object GenerateUnsafeProjection extends 
CodeGenerator[Seq[Expression], UnsafePro
     }
 
     val writeElement = elementType match {
-      case _ if (ctx.isPrimitiveType(elementType)) =>
+      case _ if ctx.isPrimitiveType(elementType) =>
         // Should we do word align?
         val elementSize = elementType.defaultSize
         s"""

http://git-wip-us.apache.org/repos/asf/spark/blob/e5fbe4f2/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
index 8c7ee87..098944a 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.expressions.codegen
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.types.{StringType, IntegerType, StructField, 
StructType}
+import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
@@ -79,4 +79,23 @@ class GeneratedProjectionSuite extends SparkFunSuite {
     val row2 = mutableProj(result)
     assert(result === row2)
   }
+
+  test("generated unsafe projection with array of binary") {
+    val row = InternalRow(
+      Array[Byte](1, 2),
+      new GenericArrayData(Array(Array[Byte](1, 2), null, Array[Byte](3, 4))))
+    val fields = (BinaryType :: ArrayType(BinaryType) :: Nil).toArray[DataType]
+
+    val unsafeProj = UnsafeProjection.create(fields)
+    val unsafeRow: UnsafeRow = unsafeProj(row)
+    assert(java.util.Arrays.equals(unsafeRow.getBinary(0), Array[Byte](1, 2)))
+    assert(java.util.Arrays.equals(unsafeRow.getArray(1).getBinary(0), 
Array[Byte](1, 2)))
+    assert(unsafeRow.getArray(1).isNullAt(1))
+    assert(unsafeRow.getArray(1).getBinary(1) === null)
+    assert(java.util.Arrays.equals(unsafeRow.getArray(1).getBinary(2), 
Array[Byte](3, 4)))
+
+    val safeProj = FromUnsafeProjection(fields)
+    val row2 = safeProj(unsafeRow)
+    assert(row2 === row)
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to