[ 
https://issues.apache.org/jira/browse/DRILL-4824?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16506642#comment-16506642
 ] 

ASF GitHub Bot commented on DRILL-4824:
---------------------------------------

ilooner closed pull request #580: DRILL-4824: JSON with complex nested data 
produces incorrect output w…
URL: https://github.com/apache/drill/pull/580
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java
index 1168e37455..0487e06c94 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonReader.java
@@ -91,9 +91,7 @@ public void testFieldSelectionBug() throws Exception {
           .baselineColumns("col_1", "col_2")
           .baselineValues(
               mapOf(),
-              mapOf(
-                  "inner_1", listOf(),
-                  "inner_3", mapOf()))
+              mapOf())
           .baselineValues(
               mapOf("inner_object_field_1", "2"),
               mapOf(
@@ -104,8 +102,7 @@ public void testFieldSelectionBug() throws Exception {
               mapOf(),
               mapOf(
                   "inner_1", listOf("4", "5", "6"),
-                  "inner_2", "3",
-                  "inner_3", mapOf()))
+                  "inner_2", "3"))
           .go();
     } finally {
       test("alter session set `store.json.all_text_mode` = false");
@@ -128,7 +125,7 @@ public void testSplitAndTransferFailure() throws Exception {
         .sqlQuery("select flatten(config) as flat from 
cp.`/store/json/null_list_v2.json`")
         .ordered()
         .baselineColumns("flat")
-        .baselineValues(mapOf("repeated_varchar", listOf()))
+        .baselineValues(mapOf())
         .baselineValues(mapOf("repeated_varchar", listOf(testVal)))
         .go();
 
diff --git 
a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java 
b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
index e76e674d50..d998e6aad2 100644
--- 
a/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
+++ 
b/exec/vector/src/main/java/org/apache/drill/exec/vector/complex/MapVector.java
@@ -317,6 +317,12 @@ public Object getObject(int index) {
         if (v != null && index < v.getAccessor().getValueCount()) {
           Object value = v.getAccessor().getObject(index);
           if (value != null) {
+            if ((v.getAccessor().getObject(index) instanceof Map
+                    && ((Map) v.getAccessor().getObject(index)).size() == 0)
+                || (v.getAccessor().getObject(index) instanceof List
+                    && ((List) v.getAccessor().getObject(index)).size() == 0)) 
{
+              continue;
+            }
             vv.put(child, value);
           }
         }


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> Null maps / lists and non-provided state support for JSON fields. Numeric 
> types promotion.
> ------------------------------------------------------------------------------------------
>
>                 Key: DRILL-4824
>                 URL: https://issues.apache.org/jira/browse/DRILL-4824
>             Project: Apache Drill
>          Issue Type: Improvement
>          Components: Storage - JSON
>    Affects Versions: 1.0.0
>            Reporter: Roman Kulyk
>            Assignee: Volodymyr Vysotskyi
>            Priority: Major
>
> There is incorrect output in case of JSON file with complex nested data.
> _JSON:_
> {code:none|title=example.json|borderStyle=solid}
> {
>         "Field1" : {
>         }
> }
> {
>         "Field1" : {
>                 "InnerField1": {"key1":"value1"},
>                 "InnerField2": {"key2":"value2"}
>         }
> }
> {
>         "Field1" : {
>                 "InnerField3" : ["value3", "value4"],
>                 "InnerField4" : ["value5", "value6"]
>         }
> }
> {code}
> _Query:_
> {code:sql}
> select Field1 from dfs.`/tmp/example.json`
> {code}
> _Incorrect result:_
> {code:none}
> +---------------------------+
> |          Field1           |
> +---------------------------+
> {"InnerField1":{},"InnerField2":{},"InnerField3":[],"InnerField4":[]}
> {"InnerField1":{"key1":"value1"},"InnerField2" 
> {"key2":"value2"},"InnerField3":[],"InnerField4":[]}
> {"InnerField1":{},"InnerField2":{},"InnerField3":["value3","value4"],"InnerField4":["value5","value6"]}
> +--------------------------+
> {code}
> Theres is no need to output missing fields. In case of deeply nested 
> structure we will get unreadable result for user.
> _Correct result:_
> {code:none}
> +--------------------------+
> |         Field1           |
> +--------------------------+
> |{}                                                                     
> {"InnerField1":{"key1":"value1"},"InnerField2":{"key2":"value2"}}
> {"InnerField3":["value3","value4"],"InnerField4":["value5","value6"]}
> +--------------------------+
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to