spark git commit: [SPARK-23047][PYTHON][SQL] Change MapVector to NullableMapVector in ArrowColumnVector

2018-01-17 Thread gurwls223
Repository: spark
Updated Branches:
  refs/heads/branch-2.3 79ccd0cad -> 6e509fde3


[SPARK-23047][PYTHON][SQL] Change MapVector to NullableMapVector in 
ArrowColumnVector

## What changes were proposed in this pull request?
This PR changes usage of `MapVector` in Spark codebase to use 
`NullableMapVector`.

`MapVector` is an internal Arrow class that is not supposed to be used 
directly. We should use `NullableMapVector` instead.

## How was this patch tested?

Existing test.

Author: Li Jin 

Closes #20239 from icexelloss/arrow-map-vector.

(cherry picked from commit 4e6f8fb150ae09c7d1de6beecb2b98e5afa5da19)
Signed-off-by: hyukjinkwon 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6e509fde
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6e509fde
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6e509fde

Branch: refs/heads/branch-2.3
Commit: 6e509fde3f056316f46c71b672a7d69adb1b4f8e
Parents: 79ccd0c
Author: Li Jin 
Authored: Thu Jan 18 07:26:43 2018 +0900
Committer: hyukjinkwon 
Committed: Thu Jan 18 07:26:57 2018 +0900

--
 .../spark/sql/vectorized/ArrowColumnVector.java | 13 +--
 .../vectorized/ArrowColumnVectorSuite.scala | 36 
 2 files changed, 46 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/6e509fde/sql/core/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
--
diff --git 
a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java 
b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
index 7083332..eb69001 100644
--- 
a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
+++ 
b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
@@ -247,8 +247,8 @@ public final class ArrowColumnVector extends ColumnVector {
 
   childColumns = new ArrowColumnVector[1];
   childColumns[0] = new ArrowColumnVector(listVector.getDataVector());
-} else if (vector instanceof MapVector) {
-  MapVector mapVector = (MapVector) vector;
+} else if (vector instanceof NullableMapVector) {
+  NullableMapVector mapVector = (NullableMapVector) vector;
   accessor = new StructAccessor(mapVector);
 
   childColumns = new ArrowColumnVector[mapVector.size()];
@@ -553,9 +553,16 @@ public final class ArrowColumnVector extends ColumnVector {
 }
   }
 
+  /**
+   * Any call to "get" method will throw UnsupportedOperationException.
+   *
+   * Access struct values in a ArrowColumnVector doesn't use this accessor. 
Instead, it uses getStruct() method defined
+   * in the parent class. Any call to "get" method in this class is a bug in 
the code.
+   *
+   */
   private static class StructAccessor extends ArrowVectorAccessor {
 
-StructAccessor(MapVector vector) {
+StructAccessor(NullableMapVector vector) {
   super(vector);
 }
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/6e509fde/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala
index 7304803..5343266 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala
@@ -322,6 +322,42 @@ class ArrowColumnVectorSuite extends SparkFunSuite {
 allocator.close()
   }
 
+  test("non nullable struct") {
+val allocator = ArrowUtils.rootAllocator.newChildAllocator("struct", 0, 
Long.MaxValue)
+val schema = new StructType().add("int", IntegerType).add("long", LongType)
+val vector = ArrowUtils.toArrowField("struct", schema, nullable = false, 
null)
+  .createVector(allocator).asInstanceOf[NullableMapVector]
+
+vector.allocateNew()
+val intVector = vector.getChildByOrdinal(0).asInstanceOf[IntVector]
+val longVector = vector.getChildByOrdinal(1).asInstanceOf[BigIntVector]
+
+vector.setIndexDefined(0)
+intVector.setSafe(0, 1)
+longVector.setSafe(0, 1L)
+
+vector.setIndexDefined(1)
+intVector.setSafe(1, 2)
+longVector.setNull(1)
+
+vector.setValueCount(2)
+
+val columnVector = new ArrowColumnVector(vector)
+assert(columnVector.dataType === schema)
+assert(columnVector.numNulls === 0)
+
+val row0 = columnVector.getStruct(0, 2)
+assert(row0.getInt(0) === 1)
+assert(row0.getLong(1) === 1L)
+
+val row1 = columnVector.getStruct(1, 2)

spark git commit: [SPARK-23047][PYTHON][SQL] Change MapVector to NullableMapVector in ArrowColumnVector

2018-01-17 Thread gurwls223
Repository: spark
Updated Branches:
  refs/heads/master e946c63dd -> 4e6f8fb15


[SPARK-23047][PYTHON][SQL] Change MapVector to NullableMapVector in 
ArrowColumnVector

## What changes were proposed in this pull request?
This PR changes usage of `MapVector` in Spark codebase to use 
`NullableMapVector`.

`MapVector` is an internal Arrow class that is not supposed to be used 
directly. We should use `NullableMapVector` instead.

## How was this patch tested?

Existing test.

Author: Li Jin 

Closes #20239 from icexelloss/arrow-map-vector.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4e6f8fb1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4e6f8fb1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4e6f8fb1

Branch: refs/heads/master
Commit: 4e6f8fb150ae09c7d1de6beecb2b98e5afa5da19
Parents: e946c63
Author: Li Jin 
Authored: Thu Jan 18 07:26:43 2018 +0900
Committer: hyukjinkwon 
Committed: Thu Jan 18 07:26:43 2018 +0900

--
 .../spark/sql/vectorized/ArrowColumnVector.java | 13 +--
 .../vectorized/ArrowColumnVectorSuite.scala | 36 
 2 files changed, 46 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/4e6f8fb1/sql/core/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
--
diff --git 
a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java 
b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
index 7083332..eb69001 100644
--- 
a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
+++ 
b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
@@ -247,8 +247,8 @@ public final class ArrowColumnVector extends ColumnVector {
 
   childColumns = new ArrowColumnVector[1];
   childColumns[0] = new ArrowColumnVector(listVector.getDataVector());
-} else if (vector instanceof MapVector) {
-  MapVector mapVector = (MapVector) vector;
+} else if (vector instanceof NullableMapVector) {
+  NullableMapVector mapVector = (NullableMapVector) vector;
   accessor = new StructAccessor(mapVector);
 
   childColumns = new ArrowColumnVector[mapVector.size()];
@@ -553,9 +553,16 @@ public final class ArrowColumnVector extends ColumnVector {
 }
   }
 
+  /**
+   * Any call to "get" method will throw UnsupportedOperationException.
+   *
+   * Access struct values in a ArrowColumnVector doesn't use this accessor. 
Instead, it uses getStruct() method defined
+   * in the parent class. Any call to "get" method in this class is a bug in 
the code.
+   *
+   */
   private static class StructAccessor extends ArrowVectorAccessor {
 
-StructAccessor(MapVector vector) {
+StructAccessor(NullableMapVector vector) {
   super(vector);
 }
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/4e6f8fb1/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala
index 7304803..5343266 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ArrowColumnVectorSuite.scala
@@ -322,6 +322,42 @@ class ArrowColumnVectorSuite extends SparkFunSuite {
 allocator.close()
   }
 
+  test("non nullable struct") {
+val allocator = ArrowUtils.rootAllocator.newChildAllocator("struct", 0, 
Long.MaxValue)
+val schema = new StructType().add("int", IntegerType).add("long", LongType)
+val vector = ArrowUtils.toArrowField("struct", schema, nullable = false, 
null)
+  .createVector(allocator).asInstanceOf[NullableMapVector]
+
+vector.allocateNew()
+val intVector = vector.getChildByOrdinal(0).asInstanceOf[IntVector]
+val longVector = vector.getChildByOrdinal(1).asInstanceOf[BigIntVector]
+
+vector.setIndexDefined(0)
+intVector.setSafe(0, 1)
+longVector.setSafe(0, 1L)
+
+vector.setIndexDefined(1)
+intVector.setSafe(1, 2)
+longVector.setNull(1)
+
+vector.setValueCount(2)
+
+val columnVector = new ArrowColumnVector(vector)
+assert(columnVector.dataType === schema)
+assert(columnVector.numNulls === 0)
+
+val row0 = columnVector.getStruct(0, 2)
+assert(row0.getInt(0) === 1)
+assert(row0.getLong(1) === 1L)
+
+val row1 = columnVector.getStruct(1, 2)
+assert(row1.getInt(0) === 2)
+assert(row1.isNullAt(1))
+
+columnVector.close()
+allocato