Repository: hive Updated Branches: refs/heads/branch-1 32069e334 -> 8a59b85a6
HIVE-13632: Hive failing on insert empty array into parquet table. (Yongzhi Chen, reviewed by Sergio Pena) Conflicts: ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/8a59b85a Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/8a59b85a Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/8a59b85a Branch: refs/heads/branch-1 Commit: 8a59b85a6beadee51d6bdb57ced55d46a6ed9556 Parents: 32069e3 Author: Yongzhi Chen <ych...@apache.org> Authored: Thu Apr 28 14:52:16 2016 -0400 Committer: Yongzhi Chen <ych...@apache.org> Committed: Thu May 5 11:05:44 2016 -0400 ---------------------------------------------------------------------- .../serde/AbstractParquetMapInspector.java | 4 +- .../serde/ParquetHiveArrayInspector.java | 4 +- .../ql/io/parquet/write/DataWritableWriter.java | 90 ++++++++++---------- .../ql/io/parquet/TestDataWritableWriter.java | 29 +++++++ .../serde/TestAbstractParquetMapInspector.java | 4 +- .../serde/TestParquetHiveArrayInspector.java | 4 +- .../parquet_array_map_emptynullvals.q | 20 +++++ .../parquet_array_map_emptynullvals.q.out | 87 +++++++++++++++++++ 8 files changed, 189 insertions(+), 53 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/8a59b85a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/AbstractParquetMapInspector.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/AbstractParquetMapInspector.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/AbstractParquetMapInspector.java index 49bf1c5..e80206e 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/AbstractParquetMapInspector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/AbstractParquetMapInspector.java @@ -60,7 +60,7 @@ public abstract class AbstractParquetMapInspector implements SettableMapObjectIn if (data instanceof ArrayWritable) { final Writable[] mapArray = ((ArrayWritable) data).get(); - if (mapArray == null || mapArray.length == 0) { + if (mapArray == null) { return null; } @@ -90,7 +90,7 @@ public abstract class AbstractParquetMapInspector implements SettableMapObjectIn if (data instanceof ArrayWritable) { final Writable[] mapArray = ((ArrayWritable) data).get(); - if (mapArray == null || mapArray.length == 0) { + if (mapArray == null) { return -1; } else { return mapArray.length; http://git-wip-us.apache.org/repos/asf/hive/blob/8a59b85a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveArrayInspector.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveArrayInspector.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveArrayInspector.java index 05e92b5..55614a3 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveArrayInspector.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveArrayInspector.java @@ -83,7 +83,7 @@ public class ParquetHiveArrayInspector implements SettableListObjectInspector { if (data instanceof ArrayWritable) { final Writable[] array = ((ArrayWritable) data).get(); - if (array == null || array.length == 0) { + if (array == null) { return -1; } @@ -105,7 +105,7 @@ public class ParquetHiveArrayInspector implements SettableListObjectInspector { if (data instanceof ArrayWritable) { final Writable[] array = ((ArrayWritable) data).get(); - if (array == null || array.length == 0) { + if (array == null) { return null; } http://git-wip-us.apache.org/repos/asf/hive/blob/8a59b85a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java index c195c3e..24ad948 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/write/DataWritableWriter.java @@ -163,28 +163,28 @@ public class DataWritableWriter { private void writeArray(final Object value, final ListObjectInspector inspector, final GroupType type) { // Get the internal array structure GroupType repeatedType = type.getType(0).asGroupType(); - recordConsumer.startGroup(); - recordConsumer.startField(repeatedType.getName(), 0); List<?> arrayValues = inspector.getList(value); - ObjectInspector elementInspector = inspector.getListElementObjectInspector(); - - Type elementType = repeatedType.getType(0); - String elementName = elementType.getName(); - - for (Object element : arrayValues) { - recordConsumer.startGroup(); - if (element != null) { - recordConsumer.startField(elementName, 0); - writeValue(element, elementInspector, elementType); - recordConsumer.endField(elementName, 0); + if (arrayValues != null && arrayValues.size() > 0 ) { + recordConsumer.startField(repeatedType.getName(), 0); + ObjectInspector elementInspector = inspector.getListElementObjectInspector(); + Type elementType = repeatedType.getType(0); + String elementName = elementType.getName(); + + for (Object element : arrayValues) { + recordConsumer.startGroup(); + if (element != null) { + recordConsumer.startField(elementName, 0); + writeValue(element, elementInspector, elementType); + recordConsumer.endField(elementName, 0); + } + recordConsumer.endGroup(); } - recordConsumer.endGroup(); - } - recordConsumer.endField(repeatedType.getName(), 0); - recordConsumer.endGroup(); + recordConsumer.endField(repeatedType.getName(), 0); + } + recordConsumer.endGroup(); } /** @@ -206,39 +206,39 @@ public class DataWritableWriter { GroupType repeatedType = type.getType(0).asGroupType(); recordConsumer.startGroup(); - recordConsumer.startField(repeatedType.getName(), 0); Map<?, ?> mapValues = inspector.getMap(value); - - Type keyType = repeatedType.getType(0); - String keyName = keyType.getName(); - ObjectInspector keyInspector = inspector.getMapKeyObjectInspector(); - - Type valuetype = repeatedType.getType(1); - String valueName = valuetype.getName(); - ObjectInspector valueInspector = inspector.getMapValueObjectInspector(); - - for (Map.Entry<?, ?> keyValue : mapValues.entrySet()) { - recordConsumer.startGroup(); - if (keyValue != null) { - // write key element - Object keyElement = keyValue.getKey(); - recordConsumer.startField(keyName, 0); - writeValue(keyElement, keyInspector, keyType); - recordConsumer.endField(keyName, 0); - - // write value element - Object valueElement = keyValue.getValue(); - if (valueElement != null) { - recordConsumer.startField(valueName, 1); - writeValue(valueElement, valueInspector, valuetype); - recordConsumer.endField(valueName, 1); + if (mapValues != null && mapValues.size() > 0) { + recordConsumer.startField(repeatedType.getName(), 0); + Type keyType = repeatedType.getType(0); + String keyName = keyType.getName(); + ObjectInspector keyInspector = inspector.getMapKeyObjectInspector(); + Type valuetype = repeatedType.getType(1); + String valueName = valuetype.getName(); + ObjectInspector valueInspector = inspector.getMapValueObjectInspector(); + + for (Map.Entry<?, ?> keyValue : mapValues.entrySet()) { + recordConsumer.startGroup(); + if (keyValue != null) { + // write key element + Object keyElement = keyValue.getKey(); + recordConsumer.startField(keyName, 0); + writeValue(keyElement, keyInspector, keyType); + recordConsumer.endField(keyName, 0); + + // write value element + Object valueElement = keyValue.getValue(); + if (valueElement != null) { + recordConsumer.startField(valueName, 1); + writeValue(valueElement, valueInspector, valuetype); + recordConsumer.endField(valueName, 1); + } } + recordConsumer.endGroup(); } - recordConsumer.endGroup(); - } - recordConsumer.endField(repeatedType.getName(), 0); + recordConsumer.endField(repeatedType.getName(), 0); + } recordConsumer.endGroup(); } http://git-wip-us.apache.org/repos/asf/hive/blob/8a59b85a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java index 7049139..934ae9f 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestDataWritableWriter.java @@ -411,6 +411,35 @@ public class TestDataWritableWriter { } @Test + public void testEmptyArrays() throws Exception { + String columnNames = "arrayCol"; + String columnTypes = "array<int>"; + + String fileSchema = "message hive_schema {\n" + + " optional group arrayCol (LIST) {\n" + + " repeated group array {\n" + + " optional int32 array_element;\n" + + " }\n" + + " }\n" + + "}\n"; + + ArrayWritable hiveRecord = createGroup( + new ArrayWritable(Writable.class) // Empty array + ); + + // Write record to Parquet format + writeParquetRecord(fileSchema, getParquetWritable(columnNames, columnTypes, hiveRecord)); + + // Verify record was written correctly to Parquet + startMessage(); + startField("arrayCol", 0); + startGroup(); + endGroup(); + endField("arrayCol", 0); + endMessage(); + } + + @Test public void testArrayOfArrays() throws Exception { String columnNames = "array_of_arrays"; String columnTypes = "array<array<int>>"; http://git-wip-us.apache.org/repos/asf/hive/blob/8a59b85a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestAbstractParquetMapInspector.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestAbstractParquetMapInspector.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestAbstractParquetMapInspector.java index f5d9cb4..6af8c53 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestAbstractParquetMapInspector.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestAbstractParquetMapInspector.java @@ -62,8 +62,8 @@ public class TestAbstractParquetMapInspector extends TestCase { @Test public void testEmptyContainer() { final ArrayWritable map = new ArrayWritable(ArrayWritable.class, new ArrayWritable[0]); - assertEquals("Wrong size", -1, inspector.getMapSize(map)); - assertNull("Should be null", inspector.getMap(map)); + assertEquals("Wrong size", 0, inspector.getMapSize(map)); + assertNotNull("Should not be null", inspector.getMap(map)); } @Test http://git-wip-us.apache.org/repos/asf/hive/blob/8a59b85a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetHiveArrayInspector.java ---------------------------------------------------------------------- diff --git a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetHiveArrayInspector.java b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetHiveArrayInspector.java index 0ce654d..9e0c1ff 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetHiveArrayInspector.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/serde/TestParquetHiveArrayInspector.java @@ -51,8 +51,8 @@ public class TestParquetHiveArrayInspector extends TestCase { @Test public void testEmptyContainer() { final ArrayWritable list = new ArrayWritable(ArrayWritable.class, new ArrayWritable[0]); - assertEquals("Wrong size", -1, inspector.getListLength(list)); - assertNull("Should be null", inspector.getList(list)); + assertEquals("Wrong size", 0, inspector.getListLength(list)); + assertNotNull("Should not be null", inspector.getList(list)); assertNull("Should be null", inspector.getListElement(list, 0)); } http://git-wip-us.apache.org/repos/asf/hive/blob/8a59b85a/ql/src/test/queries/clientpositive/parquet_array_map_emptynullvals.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/parquet_array_map_emptynullvals.q b/ql/src/test/queries/clientpositive/parquet_array_map_emptynullvals.q new file mode 100644 index 0000000..eeae5cf --- /dev/null +++ b/ql/src/test/queries/clientpositive/parquet_array_map_emptynullvals.q @@ -0,0 +1,20 @@ +drop table if exists testSets; +drop table if exists testSets2; +create table testSets ( +key string, +arrayValues array<string>, +mapValues map<string,string>) +stored as parquet; + +insert into table testSets select 'abcd', array(), map() from src limit 1; + +create table testSets2 ( +key string, +arrayValues array<string>, +mapValues map<string,string>) +stored as parquet; +insert into table testSets2 select * from testSets; +select * from testSets2; +drop table testSets; +drop table testSets2; + http://git-wip-us.apache.org/repos/asf/hive/blob/8a59b85a/ql/src/test/results/clientpositive/parquet_array_map_emptynullvals.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/parquet_array_map_emptynullvals.q.out b/ql/src/test/results/clientpositive/parquet_array_map_emptynullvals.q.out new file mode 100644 index 0000000..4608607 --- /dev/null +++ b/ql/src/test/results/clientpositive/parquet_array_map_emptynullvals.q.out @@ -0,0 +1,87 @@ +PREHOOK: query: drop table if exists testSets +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists testSets +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists testSets2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists testSets2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table testSets ( +key string, +arrayValues array<string>, +mapValues map<string,string>) +stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testSets +POSTHOOK: query: create table testSets ( +key string, +arrayValues array<string>, +mapValues map<string,string>) +stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testSets +PREHOOK: query: insert into table testSets select 'abcd', array(), map() from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@testsets +POSTHOOK: query: insert into table testSets select 'abcd', array(), map() from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@testsets +POSTHOOK: Lineage: testsets.arrayvalues EXPRESSION [] +POSTHOOK: Lineage: testsets.key SIMPLE [] +POSTHOOK: Lineage: testsets.mapvalues EXPRESSION [] +PREHOOK: query: create table testSets2 ( +key string, +arrayValues array<string>, +mapValues map<string,string>) +stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@testSets2 +POSTHOOK: query: create table testSets2 ( +key string, +arrayValues array<string>, +mapValues map<string,string>) +stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@testSets2 +PREHOOK: query: insert into table testSets2 select * from testSets +PREHOOK: type: QUERY +PREHOOK: Input: default@testsets +PREHOOK: Output: default@testsets2 +POSTHOOK: query: insert into table testSets2 select * from testSets +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testsets +POSTHOOK: Output: default@testsets2 +POSTHOOK: Lineage: testsets2.arrayvalues SIMPLE [(testsets)testsets.FieldSchema(name:arrayvalues, type:array<string>, comment:null), ] +POSTHOOK: Lineage: testsets2.key SIMPLE [(testsets)testsets.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: testsets2.mapvalues SIMPLE [(testsets)testsets.FieldSchema(name:mapvalues, type:map<string,string>, comment:null), ] +PREHOOK: query: select * from testSets2 +PREHOOK: type: QUERY +PREHOOK: Input: default@testsets2 +#### A masked pattern was here #### +POSTHOOK: query: select * from testSets2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@testsets2 +#### A masked pattern was here #### +abcd [] {} +PREHOOK: query: drop table testSets +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@testsets +PREHOOK: Output: default@testsets +POSTHOOK: query: drop table testSets +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@testsets +POSTHOOK: Output: default@testsets +PREHOOK: query: drop table testSets2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@testsets2 +PREHOOK: Output: default@testsets2 +POSTHOOK: query: drop table testSets2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@testsets2 +POSTHOOK: Output: default@testsets2