>From Preetham Poluparthi <[email protected]>:

Preetham Poluparthi has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20525?usp=email )


Change subject: WIP: parquet read null fix
......................................................................

WIP: parquet read null fix

Change-Id: Ib4cd841fa40d4fdd5bd330d2127a6437e5bd6565
---
M 
asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.05.adm
M 
asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.08.adm
M 
asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null2/parquet-null2.04.adm
M 
asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-simple/parquet-simple.04.adm
M 
asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.05.adm
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/nested/ObjectConverter.java
M 
asterixdb/asterix-om/src/main/java/org/apache/asterix/om/lazy/NullLazyVisitablePointable.java
7 files changed, 47 insertions(+), 15 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/25/20525/1

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.05.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.05.adm
index 763b652..84a38f7 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.05.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.05.adm
@@ -1,5 +1,5 @@
-{ "id": 8, "nested": { "List": [ 100, 300 ] }, "obj_array": [ { "first": 
"first" }, {  }, { "first": "second" } ], "a": {  }, "arr": [ [ 1, 2 ] ] }
-{ "id": 10, "name": "Virat", "nested": { "List": [  ] }, "obj_array": [ { 
"first": "first" }, { "first": "second" } ], "a": {  }, "c": {  }, "f": [  ], 
"arr": [ [ 1, 2 ], [  ] ] }
-{ "id": 28, "name": "Virat", "nested": { "List": [  ], "A": "a" }, 
"obj_array": [ { "first": "first" }, { "first": "second" } ], "a": { "b": 1 }, 
"c": { "d": 1 }, "f": [ 1.0 ], "arr": [ [ 1, 2 ], [  ] ] }
-{ "id": 34, "nested": { "randomK": "randomV" }, "obj_array": [ { "first": 
"first" }, { "first": "second" } ], "c": { "e": 1 }, "f": [ 2.0, 3.0 ], "arr": 
[ [  ] ] }
-{ "id": 37, "name": "Kohli", "nested": { "List": [ 1, 2, 3 ], "A": "a" }, 
"obj_array": [ { "first": "first" }, { "first": "second" } ], "a": { "b": 1 }, 
"c": { "d": 1, "e": 1 }, "f": [ 3.5999999046325684, 4.0 ], "arr": [ [ 1, 2, 3 ] 
] }
+{ "id": 8, "nested": { "List": [ 100, 300 ], "A": null, "randomK": null }, 
"obj_array": [ { "first": "first" }, { "first": null }, { "first": "second" } 
], "a": { "b": null }, "arr": [ [ 1, 2 ] ], "c": null, "f": null, "name": null }
+{ "id": 10, "name": "Virat", "nested": { "List": [  ], "A": null, "randomK": 
null }, "obj_array": [ { "first": "first" }, { "first": "second" } ], "a": { 
"b": null }, "c": { "d": null, "e": null }, "f": [  ], "arr": [ [ 1, 2 ], [  ] 
] }
+{ "id": 28, "name": "Virat", "nested": { "List": [  ], "A": "a", "randomK": 
null }, "obj_array": [ { "first": "first" }, { "first": "second" } ], "a": { 
"b": 1 }, "c": { "d": 1, "e": null }, "f": [ 1.0 ], "arr": [ [ 1, 2 ], [  ] ] }
+{ "id": 34, "nested": { "randomK": "randomV", "A": null, "List": null }, 
"obj_array": [ { "first": "first" }, { "first": "second" } ], "c": { "e": 1, 
"d": null }, "f": [ 2.0, 3.0 ], "arr": [ [  ] ], "a": null, "name": null }
+{ "id": 37, "name": "Kohli", "nested": { "List": [ 1, 2, 3 ], "A": "a", 
"randomK": null }, "obj_array": [ { "first": "first" }, { "first": "second" } 
], "a": { "b": 1 }, "c": { "d": 1, "e": 1 }, "f": [ 3.5999999046325684, 4.0 ], 
"arr": [ [ 1, 2, 3 ] ] }
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.08.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.08.adm
index 628b82f..30e6687 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.08.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null-type/parquet-null-type.08.adm
@@ -1,5 +1,5 @@
-{ "arr": [ [ 1, 2 ] ], "a": {  }, "id": 8, "nested": { "List": [ 100, 300 ] }, 
"obj_array": [ { "first": "first" }, {  }, { "first": "second" } ] }
-{ "arr": [ [ 1, 2 ], [  ] ], "a": {  }, "c": {  }, "f": [  ], "name": "Virat", 
"id": 10, "nested": { "List": [  ] }, "obj_array": [ { "first": "first" }, { 
"first": "second" } ] }
-{ "arr": [ [ 1, 2 ], [  ] ], "a": { "b": 1 }, "c": { "d": 1 }, "f": [ 1.0 ], 
"name": "Virat", "id": 28, "nested": { "A": "a", "List": [  ] }, "obj_array": [ 
{ "first": "first" }, { "first": "second" } ] }
-{ "arr": [ [  ] ], "c": { "e": 1 }, "f": [ 2.0, 3.0 ], "id": 34, "nested": { 
"randomK": "randomV" }, "obj_array": [ { "first": "first" }, { "first": 
"second" } ] }
-{ "arr": [ [ 1, 2, 3 ] ], "a": { "b": 1 }, "c": { "d": 1, "e": 1 }, "f": [ 
3.6, 4.0 ], "name": "Kohli", "id": 37, "nested": { "A": "a", "List": [ 1, 2, 3 
] }, "obj_array": [ { "first": "first" }, { "first": "second" } ] }
+{ "arr": [ [ 1, 2 ] ], "a": { "b": null }, "id": 8, "nested": { "List": [ 100, 
300 ], "A": null, "randomK": null }, "obj_array": [ { "first": "first" }, { 
"first": null }, { "first": "second" } ], "c": null, "f": null, "name": null }
+{ "arr": [ [ 1, 2 ], [  ] ], "a": { "b": null }, "c": { "d": null, "e": null 
}, "f": [  ], "name": "Virat", "id": 10, "nested": { "List": [  ], "A": null, 
"randomK": null }, "obj_array": [ { "first": "first" }, { "first": "second" } ] 
}
+{ "arr": [ [ 1, 2 ], [  ] ], "a": { "b": 1 }, "c": { "d": 1, "e": null }, "f": 
[ 1.0 ], "name": "Virat", "id": 28, "nested": { "A": "a", "List": [  ], 
"randomK": null }, "obj_array": [ { "first": "first" }, { "first": "second" } ] 
}
+{ "arr": [ [  ] ], "c": { "e": 1, "d": null }, "f": [ 2.0, 3.0 ], "id": 34, 
"nested": { "randomK": "randomV", "A": null, "List": null }, "obj_array": [ { 
"first": "first" }, { "first": "second" } ], "a": null, "name": null }
+{ "arr": [ [ 1, 2, 3 ] ], "a": { "b": 1 }, "c": { "d": 1, "e": 1 }, "f": [ 
3.6, 4.0 ], "name": "Kohli", "id": 37, "nested": { "A": "a", "List": [ 1, 2, 3 
], "randomK": null }, "obj_array": [ { "first": "first" }, { "first": "second" 
} ] }
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null2/parquet-null2.04.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null2/parquet-null2.04.adm
index 29ca9ec..5122755 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null2/parquet-null2.04.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-null2/parquet-null2.04.adm
@@ -1,3 +1,3 @@
 { "c": { "col2": { "centuries": [  ], "name": "aqay awil", "id": 1 } } }
 { "c": { "col2": { "centuries": [  ], "id": 2 } } }
-{ "c": { "col2": { "centuries": [  ], "id": 3 } } }
+{ "c": { "col2": { "centuries": [  ], "id": 3, "name": null } } }
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-simple/parquet-simple.04.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-simple/parquet-simple.04.adm
index bf567b2..3bb97b9 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-simple/parquet-simple.04.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-simple/parquet-simple.04.adm
@@ -1 +1 @@
-{ "id": "123" }
\ No newline at end of file
+{ "id": "123", "name": null }
\ No newline at end of file
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.05.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.05.adm
index 4fd973e..42c906a 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.05.adm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/copy-to/parquet-type-hierarchy/parquet-type-hierarchy.05.adm
@@ -1,4 +1,4 @@
-{ "ratings": [  ], "id": 2 }
+{ "ratings": [  ], "id": 2, "rating": null }
 { "ratings": [  ], "rating": 1.0, "id": 5 }
 { "ratings": [ 1 ], "rating": 2.0, "id": 8 }
 { "ratings": [ 1, 2, 3 ], "rating": 3.0, "id": 10 }
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/nested/ObjectConverter.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/nested/ObjectConverter.java
index 6b63a7b..f86f9f0 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/nested/ObjectConverter.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/hdfs/parquet/converter/nested/ObjectConverter.java
@@ -19,6 +19,8 @@
 package 
org.apache.asterix.external.input.record.reader.hdfs.parquet.converter.nested;

 import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;

 import org.apache.asterix.builders.IARecordBuilder;
 import 
org.apache.asterix.external.input.filter.embedder.IExternalFilterValueEmbedder;
@@ -26,6 +28,7 @@
 import 
org.apache.asterix.external.input.record.reader.hdfs.parquet.converter.IFieldValue;
 import 
org.apache.asterix.external.input.record.reader.hdfs.parquet.converter.ParquetConverterContext;
 import 
org.apache.asterix.external.input.record.reader.hdfs.parquet.converter.primitve.PrimitiveConverterProvider;
+import org.apache.asterix.om.lazy.NullLazyVisitablePointable;
 import org.apache.asterix.om.pointables.base.DefaultOpenFieldType;
 import org.apache.asterix.om.types.ATypeTag;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
@@ -42,14 +45,21 @@
      */
     private boolean ignore = false;

+    private Set<String> fieldNames;
+    private GroupType parquetType;
+
     public ObjectConverter(AbstractComplexConverter parent, int index, 
GroupType parquetType,
             ParquetConverterContext context) throws IOException {
         super(parent, index, parquetType, context);
+        fieldNames = new HashSet<>();
+        this.parquetType = parquetType;
     }

     public ObjectConverter(AbstractComplexConverter parent, String 
stringFieldName, int index, GroupType parquetType,
             ParquetConverterContext context) throws IOException {
         super(parent, stringFieldName, index, parquetType, context);
+        fieldNames = new HashSet<>();
+        this.parquetType = parquetType;
     }

     @Override
@@ -63,12 +73,32 @@
         } else {
             ignore = checkValueEmbedder(valueEmbedder);
         }
+
+        for (int i = 0; i < parquetType.getFieldCount(); i++) {
+            fieldNames.add(parquetType.getFieldName(i));
+        }
     }

     @Override
     public void end() {
         closeDirectRepeatedChildren();
         if (!ignore) {
+            IExternalFilterValueEmbedder valueEmbedder = 
context.getValueEmbedder();
+            for (String fieldNameStr : fieldNames) {
+                try {
+                    if (valueEmbedder.shouldEmbed(fieldNameStr, 
ATypeTag.NULL)) {
+                        
builder.addField(context.getSerializedFieldName(fieldNameStr),
+                                valueEmbedder.getEmbeddedValue());
+                    } else {
+                        
builder.addField(context.getSerializedFieldName(fieldNameStr),
+                                NullLazyVisitablePointable.INSTANCE);
+                    }
+                } catch (HyracksDataException e) {
+                    throw new IllegalStateException(e);
+                } catch (IOException e) {
+                    throw new RuntimeException(e);
+                }
+            }
             writeToParent();
             context.getValueEmbedder().exitObject();
         }
@@ -92,8 +122,10 @@
         }
         IExternalFilterValueEmbedder valueEmbedder = 
context.getValueEmbedder();
         IValueReference fieldName = value.getFieldName();
+        String fieldNameStr = value.getStringFieldName();
+        fieldNames.remove(fieldNameStr);
         try {
-            if (valueEmbedder.shouldEmbed(value.getStringFieldName(), 
value.getTypeTag())) {
+            if (valueEmbedder.shouldEmbed(fieldNameStr, value.getTypeTag())) {
                 builder.addField(fieldName, valueEmbedder.getEmbeddedValue());
             } else {
                 builder.addField(fieldName, getValue());
diff --git 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/lazy/NullLazyVisitablePointable.java
 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/lazy/NullLazyVisitablePointable.java
index 95e0f5b..9aa7afa 100644
--- 
a/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/lazy/NullLazyVisitablePointable.java
+++ 
b/asterixdb/asterix-om/src/main/java/org/apache/asterix/om/lazy/NullLazyVisitablePointable.java
@@ -20,7 +20,7 @@

 import org.apache.asterix.om.types.ATypeTag;

-class NullLazyVisitablePointable extends FlatLazyVisitablePointable {
+public class NullLazyVisitablePointable extends FlatLazyVisitablePointable {
     public static final AbstractLazyVisitablePointable INSTANCE = new 
NullLazyVisitablePointable();

     public NullLazyVisitablePointable() {

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20525?usp=email
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings?usp=email

Gerrit-MessageType: newchange
Gerrit-Project: asterixdb
Gerrit-Branch: phoenix
Gerrit-Change-Id: Ib4cd841fa40d4fdd5bd330d2127a6437e5bd6565
Gerrit-Change-Number: 20525
Gerrit-PatchSet: 1
Gerrit-Owner: Preetham Poluparthi <[email protected]>

Reply via email to