Repository: hive
Updated Branches:
  refs/heads/branch-1 32069e334 -> 8a59b85a6


HIVE-13632: Hive failing on insert empty array into parquet table. (Yongzhi 
Chen, reviewed by Sergio Pena)

Conflicts:
        
ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8a59b85a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8a59b85a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8a59b85a

Branch: refs/heads/branch-1
Commit: 8a59b85a6beadee51d6bdb57ced55d46a6ed9556
Parents: 32069e3
Author: Yongzhi Chen <ych...@apache.org>
Authored: Thu Apr 28 14:52:16 2016 -0400
Committer: Yongzhi Chen <ych...@apache.org>
Committed: Thu May 5 11:05:44 2016 -0400

----------------------------------------------------------------------
 .../serde/AbstractParquetMapInspector.java      |  4 +-
 .../serde/ParquetHiveArrayInspector.java        |  4 +-
 .../ql/io/parquet/write/DataWritableWriter.java | 90 ++++++++++----------
 .../ql/io/parquet/TestDataWritableWriter.java   | 29 +++++++
 .../serde/TestAbstractParquetMapInspector.java  |  4 +-
 .../serde/TestParquetHiveArrayInspector.java    |  4 +-
 .../parquet_array_map_emptynullvals.q           | 20 +++++
 .../parquet_array_map_emptynullvals.q.out       | 87 +++++++++++++++++++
 8 files changed, 189 insertions(+), 53 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/8a59b85a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/AbstractParquetMapInspector.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/AbstractParquetMapInspector.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/AbstractParquetMapInspector.java
index 49bf1c5..e80206e 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/AbstractParquetMapInspector.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/AbstractParquetMapInspector.java
@@ -60,7 +60,7 @@ public abstract class AbstractParquetMapInspector implements 
SettableMapObjectIn
 
     if (data instanceof ArrayWritable) {
       final Writable[] mapArray = ((ArrayWritable) data).get();
-      if (mapArray == null || mapArray.length == 0) {
+      if (mapArray == null) {
         return null;
       }
 
@@ -90,7 +90,7 @@ public abstract class AbstractParquetMapInspector implements 
SettableMapObjectIn
     if (data instanceof ArrayWritable) {
       final Writable[] mapArray = ((ArrayWritable) data).get();
 
-      if (mapArray == null || mapArray.length == 0) {
+      if (mapArray == null) {
         return -1;
       } else {
         return mapArray.length;

http://git-wip-us.apache.org/repos/asf/hive/blob/8a59b85a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveArrayInspector.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveArrayInspector.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveArrayInspector.java
index 05e92b5..55614a3 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveArrayInspector.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveArrayInspector.java
@@ -83,7 +83,7 @@ public class ParquetHiveArrayInspector implements 
SettableListObjectInspector {
 
     if (data instanceof ArrayWritable) {
       final Writable[] array = ((ArrayWritable) data).get();
-      if (array == null || array.length == 0) {
+      if (array == null) {
         return -1;
       }
 
@@ -105,7 +105,7 @@ public class ParquetHiveArrayInspector implements 
SettableListObjectInspector {
 
     if (data instanceof ArrayWritable) {
       final Writable[] array = ((ArrayWritable) data).get();
-      if (array == null || array.length == 0) {
+      if (array == null) {
         return null;
       }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/8a59b85a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
index c195c3e..24ad948 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java
@@ -163,28 +163,28 @@ public class DataWritableWriter {
   private void writeArray(final Object value, final ListObjectInspector 
inspector, final GroupType type) {
     // Get the internal array structure
     GroupType repeatedType = type.getType(0).asGroupType();
-
     recordConsumer.startGroup();
-    recordConsumer.startField(repeatedType.getName(), 0);
 
     List<?> arrayValues = inspector.getList(value);
-    ObjectInspector elementInspector = 
inspector.getListElementObjectInspector();
-
-    Type elementType = repeatedType.getType(0);
-    String elementName = elementType.getName();
-
-    for (Object element : arrayValues) {
-      recordConsumer.startGroup();
-      if (element != null) {
-        recordConsumer.startField(elementName, 0);
-        writeValue(element, elementInspector, elementType);
-        recordConsumer.endField(elementName, 0);
+    if (arrayValues != null && arrayValues.size() > 0 ) {
+      recordConsumer.startField(repeatedType.getName(), 0);
+      ObjectInspector elementInspector = 
inspector.getListElementObjectInspector();
+      Type elementType = repeatedType.getType(0);
+      String elementName = elementType.getName();
+
+      for (Object element : arrayValues) {
+        recordConsumer.startGroup();
+        if (element != null) {
+          recordConsumer.startField(elementName, 0);
+          writeValue(element, elementInspector, elementType);
+          recordConsumer.endField(elementName, 0);
+        }
+        recordConsumer.endGroup();
       }
-      recordConsumer.endGroup();
-    }
 
-    recordConsumer.endField(repeatedType.getName(), 0);
-    recordConsumer.endGroup();
+      recordConsumer.endField(repeatedType.getName(), 0);
+   }
+   recordConsumer.endGroup();
   }
 
   /**
@@ -206,39 +206,39 @@ public class DataWritableWriter {
     GroupType repeatedType = type.getType(0).asGroupType();
 
     recordConsumer.startGroup();
-    recordConsumer.startField(repeatedType.getName(), 0);
 
     Map<?, ?> mapValues = inspector.getMap(value);
-
-    Type keyType = repeatedType.getType(0);
-    String keyName = keyType.getName();
-    ObjectInspector keyInspector = inspector.getMapKeyObjectInspector();
-
-    Type valuetype = repeatedType.getType(1);
-    String valueName = valuetype.getName();
-    ObjectInspector valueInspector = inspector.getMapValueObjectInspector();
-
-    for (Map.Entry<?, ?> keyValue : mapValues.entrySet()) {
-      recordConsumer.startGroup();
-      if (keyValue != null) {
-        // write key element
-        Object keyElement = keyValue.getKey();
-        recordConsumer.startField(keyName, 0);
-        writeValue(keyElement, keyInspector, keyType);
-        recordConsumer.endField(keyName, 0);
-
-        // write value element
-        Object valueElement = keyValue.getValue();
-        if (valueElement != null) {
-          recordConsumer.startField(valueName, 1);
-          writeValue(valueElement, valueInspector, valuetype);
-          recordConsumer.endField(valueName, 1);
+    if (mapValues != null && mapValues.size() > 0) {
+      recordConsumer.startField(repeatedType.getName(), 0);
+      Type keyType = repeatedType.getType(0);
+      String keyName = keyType.getName();
+      ObjectInspector keyInspector = inspector.getMapKeyObjectInspector();
+      Type valuetype = repeatedType.getType(1);
+      String valueName = valuetype.getName();
+      ObjectInspector valueInspector = inspector.getMapValueObjectInspector();
+
+      for (Map.Entry<?, ?> keyValue : mapValues.entrySet()) {
+        recordConsumer.startGroup();
+        if (keyValue != null) {
+          // write key element
+          Object keyElement = keyValue.getKey();
+          recordConsumer.startField(keyName, 0);
+          writeValue(keyElement, keyInspector, keyType);
+          recordConsumer.endField(keyName, 0);
+
+          // write value element
+          Object valueElement = keyValue.getValue();
+          if (valueElement != null) {
+            recordConsumer.startField(valueName, 1);
+            writeValue(valueElement, valueInspector, valuetype);
+            recordConsumer.endField(valueName, 1);
+          }
         }
+        recordConsumer.endGroup();
       }
-      recordConsumer.endGroup();
-    }
 
-    recordConsumer.endField(repeatedType.getName(), 0);
+      recordConsumer.endField(repeatedType.getName(), 0);
+    }
     recordConsumer.endGroup();
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/8a59b85a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java 
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java
index 7049139..934ae9f 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java
@@ -411,6 +411,35 @@ public class TestDataWritableWriter {
   }
 
   @Test
+  public void testEmptyArrays() throws Exception {
+    String columnNames = "arrayCol";
+    String columnTypes = "array<int>";
+
+    String fileSchema = "message hive_schema {\n"
+        + "  optional group arrayCol (LIST) {\n"
+        + "    repeated group array {\n"
+        + "      optional int32 array_element;\n"
+        + "    }\n"
+        + "  }\n"
+        + "}\n";
+
+    ArrayWritable hiveRecord = createGroup(
+       new ArrayWritable(Writable.class) // Empty array
+    );
+
+   // Write record to Parquet format
+    writeParquetRecord(fileSchema, getParquetWritable(columnNames, 
columnTypes, hiveRecord));
+
+    // Verify record was written correctly to Parquet
+    startMessage();
+      startField("arrayCol", 0);
+        startGroup();
+        endGroup();
+      endField("arrayCol", 0);
+    endMessage();
+  }
+
+  @Test
   public void testArrayOfArrays() throws Exception {
     String columnNames = "array_of_arrays";
     String columnTypes = "array<array<int>>";

http://git-wip-us.apache.org/repos/asf/hive/blob/8a59b85a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestAbstractParquetMapInspector.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestAbstractParquetMapInspector.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestAbstractParquetMapInspector.java
index f5d9cb4..6af8c53 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestAbstractParquetMapInspector.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestAbstractParquetMapInspector.java
@@ -62,8 +62,8 @@ public class TestAbstractParquetMapInspector extends TestCase 
{
   @Test
   public void testEmptyContainer() {
     final ArrayWritable map = new ArrayWritable(ArrayWritable.class, new 
ArrayWritable[0]);
-    assertEquals("Wrong size", -1, inspector.getMapSize(map));
-    assertNull("Should be null", inspector.getMap(map));
+    assertEquals("Wrong size", 0, inspector.getMapSize(map));
+    assertNotNull("Should not be null", inspector.getMap(map));
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/hive/blob/8a59b85a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetHiveArrayInspector.java
----------------------------------------------------------------------
diff --git 
a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetHiveArrayInspector.java
 
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetHiveArrayInspector.java
index 0ce654d..9e0c1ff 100644
--- 
a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetHiveArrayInspector.java
+++ 
b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetHiveArrayInspector.java
@@ -51,8 +51,8 @@ public class TestParquetHiveArrayInspector extends TestCase {
   @Test
   public void testEmptyContainer() {
     final ArrayWritable list = new ArrayWritable(ArrayWritable.class, new 
ArrayWritable[0]);
-    assertEquals("Wrong size", -1, inspector.getListLength(list));
-    assertNull("Should be null", inspector.getList(list));
+    assertEquals("Wrong size", 0, inspector.getListLength(list));
+    assertNotNull("Should not be null", inspector.getList(list));
     assertNull("Should be null", inspector.getListElement(list, 0));
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/8a59b85a/ql/src/test/queries/clientpositive/parquet_array_map_emptynullvals.q
----------------------------------------------------------------------
diff --git 
a/ql/src/test/queries/clientpositive/parquet_array_map_emptynullvals.q 
b/ql/src/test/queries/clientpositive/parquet_array_map_emptynullvals.q
new file mode 100644
index 0000000..eeae5cf
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/parquet_array_map_emptynullvals.q
@@ -0,0 +1,20 @@
+drop table if exists testSets;
+drop table if exists testSets2;
+create table testSets (
+key string,
+arrayValues array<string>,
+mapValues map<string,string>)
+stored as parquet;
+
+insert into table testSets select 'abcd', array(), map() from src limit 1;
+
+create table testSets2 (
+key string,
+arrayValues array<string>,
+mapValues map<string,string>)
+stored as parquet;
+insert into table testSets2 select * from testSets;
+select * from testSets2;
+drop table testSets;
+drop table testSets2;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/8a59b85a/ql/src/test/results/clientpositive/parquet_array_map_emptynullvals.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/parquet_array_map_emptynullvals.q.out 
b/ql/src/test/results/clientpositive/parquet_array_map_emptynullvals.q.out
new file mode 100644
index 0000000..4608607
--- /dev/null
+++ b/ql/src/test/results/clientpositive/parquet_array_map_emptynullvals.q.out
@@ -0,0 +1,87 @@
+PREHOOK: query: drop table if exists testSets
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists testSets
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists testSets2
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists testSets2
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table testSets (
+key string,
+arrayValues array<string>,
+mapValues map<string,string>)
+stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@testSets
+POSTHOOK: query: create table testSets (
+key string,
+arrayValues array<string>,
+mapValues map<string,string>)
+stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@testSets
+PREHOOK: query: insert into table testSets select 'abcd', array(), map() from 
src limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@testsets
+POSTHOOK: query: insert into table testSets select 'abcd', array(), map() from 
src limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@testsets
+POSTHOOK: Lineage: testsets.arrayvalues EXPRESSION []
+POSTHOOK: Lineage: testsets.key SIMPLE []
+POSTHOOK: Lineage: testsets.mapvalues EXPRESSION []
+PREHOOK: query: create table testSets2 (
+key string,
+arrayValues array<string>,
+mapValues map<string,string>)
+stored as parquet
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@testSets2
+POSTHOOK: query: create table testSets2 (
+key string,
+arrayValues array<string>,
+mapValues map<string,string>)
+stored as parquet
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@testSets2
+PREHOOK: query: insert into table testSets2 select * from testSets
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testsets
+PREHOOK: Output: default@testsets2
+POSTHOOK: query: insert into table testSets2 select * from testSets
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testsets
+POSTHOOK: Output: default@testsets2
+POSTHOOK: Lineage: testsets2.arrayvalues SIMPLE 
[(testsets)testsets.FieldSchema(name:arrayvalues, type:array<string>, 
comment:null), ]
+POSTHOOK: Lineage: testsets2.key SIMPLE 
[(testsets)testsets.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: testsets2.mapvalues SIMPLE 
[(testsets)testsets.FieldSchema(name:mapvalues, type:map<string,string>, 
comment:null), ]
+PREHOOK: query: select * from testSets2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@testsets2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from testSets2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@testsets2
+#### A masked pattern was here ####
+abcd   []      {}
+PREHOOK: query: drop table testSets
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@testsets
+PREHOOK: Output: default@testsets
+POSTHOOK: query: drop table testSets
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@testsets
+POSTHOOK: Output: default@testsets
+PREHOOK: query: drop table testSets2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@testsets2
+PREHOOK: Output: default@testsets2
+POSTHOOK: query: drop table testSets2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@testsets2
+POSTHOOK: Output: default@testsets2

Reply via email to