Repository: spark Updated Branches: refs/heads/branch-1.5 2803e8b2e -> e5fbe4f24
[SPARK-10038] [SQL] fix bug in generated unsafe projection when there is binary in ArrayData The type for array of array in Java is slightly different than array of others. cc cloud-fan Author: Davies Liu <dav...@databricks.com> Closes #8250 from davies/array_binary. (cherry picked from commit 5af3838d2e59ed83766f85634e26918baa53819f) Signed-off-by: Reynold Xin <r...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e5fbe4f2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e5fbe4f2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e5fbe4f2 Branch: refs/heads/branch-1.5 Commit: e5fbe4f24aa805d78546e5a11122aa60dde83709 Parents: 2803e8b Author: Davies Liu <dav...@databricks.com> Authored: Mon Aug 17 23:27:55 2015 -0700 Committer: Reynold Xin <r...@databricks.com> Committed: Mon Aug 17 23:28:02 2015 -0700 ---------------------------------------------------------------------- .../codegen/GenerateUnsafeProjection.scala | 12 ++++++++--- .../codegen/GeneratedProjectionSuite.scala | 21 +++++++++++++++++++- 2 files changed, 29 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/e5fbe4f2/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala index b2fb913..b570fe8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala @@ -224,7 +224,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro // go through the input array to calculate how many bytes we need. val calculateNumBytes = elementType match { - case _ if (ctx.isPrimitiveType(elementType)) => + case _ if ctx.isPrimitiveType(elementType) => // Should we do word align? val elementSize = elementType.defaultSize s""" @@ -237,6 +237,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro case _ => val writer = getWriter(elementType) val elementSize = s"$writer.getSize($elements[$index])" + // TODO(davies): avoid the copy val unsafeType = elementType match { case _: StructType => "UnsafeRow" case _: ArrayType => "UnsafeArrayData" @@ -249,8 +250,13 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro case _ => "" } + val newElements = if (elementType == BinaryType) { + s"new byte[$numElements][]" + } else { + s"new $unsafeType[$numElements]" + } s""" - final $unsafeType[] $elements = new $unsafeType[$numElements]; + final $unsafeType[] $elements = $newElements; for (int $index = 0; $index < $numElements; $index++) { ${convertedElement.code} if (!${convertedElement.isNull}) { @@ -262,7 +268,7 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro } val writeElement = elementType match { - case _ if (ctx.isPrimitiveType(elementType)) => + case _ if ctx.isPrimitiveType(elementType) => // Should we do word align? val elementSize = elementType.defaultSize s""" http://git-wip-us.apache.org/repos/asf/spark/blob/e5fbe4f2/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala index 8c7ee87..098944a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GeneratedProjectionSuite.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.expressions.codegen import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.types.{StringType, IntegerType, StructField, StructType} +import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String /** @@ -79,4 +79,23 @@ class GeneratedProjectionSuite extends SparkFunSuite { val row2 = mutableProj(result) assert(result === row2) } + + test("generated unsafe projection with array of binary") { + val row = InternalRow( + Array[Byte](1, 2), + new GenericArrayData(Array(Array[Byte](1, 2), null, Array[Byte](3, 4)))) + val fields = (BinaryType :: ArrayType(BinaryType) :: Nil).toArray[DataType] + + val unsafeProj = UnsafeProjection.create(fields) + val unsafeRow: UnsafeRow = unsafeProj(row) + assert(java.util.Arrays.equals(unsafeRow.getBinary(0), Array[Byte](1, 2))) + assert(java.util.Arrays.equals(unsafeRow.getArray(1).getBinary(0), Array[Byte](1, 2))) + assert(unsafeRow.getArray(1).isNullAt(1)) + assert(unsafeRow.getArray(1).getBinary(1) === null) + assert(java.util.Arrays.equals(unsafeRow.getArray(1).getBinary(2), Array[Byte](3, 4))) + + val safeProj = FromUnsafeProjection(fields) + val row2 = safeProj(unsafeRow) + assert(row2 === row) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org