Repository: arrow
Updated Branches:
  refs/heads/master 2f8449337 -> 3d2e4df21


ARROW-337: UnionListWriter.list() is doing more than it should, this …

…can cause data corruption

The general idea is to use the "inner" writer's position to update the offset. 
This involves making sure various writers do indeed update their positions.

UnionListWriter.startList() should explicitly set the inner writer position in 
case setPosition() was called to move the union list writer's position

Author: adeneche <adene...@dremio.com>

Closes #183 from adeneche/ARROW-337 and squashes the following commits:

1ae7e00 [adeneche] updated TestComplexWriter to ensure position is set properly 
by the various writers
7d5aefc [adeneche] ARROW-337: UnionListWriter.list() is doing more than it 
should, this can cause data corruption


Project: http://git-wip-us.apache.org/repos/asf/arrow/repo
Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/3d2e4df2
Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/3d2e4df2
Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/3d2e4df2

Branch: refs/heads/master
Commit: 3d2e4df219d6b06a3d78821bbca6ba17188908c2
Parents: 2f84493
Author: adeneche <adene...@dremio.com>
Authored: Wed Oct 26 12:09:26 2016 -0700
Committer: Julien Le Dem <jul...@dremio.com>
Committed: Wed Oct 26 12:09:26 2016 -0700

----------------------------------------------------------------------
 .../AbstractPromotableFieldWriter.java          |   2 +
 .../src/main/codegen/templates/MapWriters.java  |   1 +
 .../main/codegen/templates/UnionListWriter.java |  32 +--
 .../org/apache/arrow/vector/TestListVector.java |   4 -
 .../complex/writer/TestComplexWriter.java       | 201 +++++++++++++------
 5 files changed, 154 insertions(+), 86 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/arrow/blob/3d2e4df2/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java
----------------------------------------------------------------------
diff --git 
a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java 
b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java
index d21dcd0..60dd0c7 100644
--- a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java
+++ b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java
@@ -58,6 +58,7 @@ abstract class AbstractPromotableFieldWriter extends 
AbstractFieldWriter {
   @Override
   public void end() {
     getWriter(MinorType.MAP).end();
+    setPosition(idx() + 1);
   }
 
   @Override
@@ -68,6 +69,7 @@ abstract class AbstractPromotableFieldWriter extends 
AbstractFieldWriter {
   @Override
   public void endList() {
     getWriter(MinorType.LIST).endList();
+    setPosition(idx() + 1);
   }
 
   <#list vv.types as type><#list type.minor as minor><#assign name = 
minor.class?cap_first />

http://git-wip-us.apache.org/repos/asf/arrow/blob/3d2e4df2/java/vector/src/main/codegen/templates/MapWriters.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/codegen/templates/MapWriters.java 
b/java/vector/src/main/codegen/templates/MapWriters.java
index 51327b4..f41b600 100644
--- a/java/vector/src/main/codegen/templates/MapWriters.java
+++ b/java/vector/src/main/codegen/templates/MapWriters.java
@@ -185,6 +185,7 @@ public class ${mode}MapWriter extends AbstractFieldWriter {
 
   @Override
   public void end() {
+    setPosition(idx()+1);
   }
 
   <#list vv.types as type><#list type.minor as minor>

http://git-wip-us.apache.org/repos/asf/arrow/blob/3d2e4df2/java/vector/src/main/codegen/templates/UnionListWriter.java
----------------------------------------------------------------------
diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java 
b/java/vector/src/main/codegen/templates/UnionListWriter.java
index 04531a7..bb39fe8 100644
--- a/java/vector/src/main/codegen/templates/UnionListWriter.java
+++ b/java/vector/src/main/codegen/templates/UnionListWriter.java
@@ -101,11 +101,7 @@ public class UnionListWriter extends AbstractFieldWriter {
   public ${name}Writer <#if uncappedName == 
"int">integer<#else>${uncappedName}</#if>(String name) {
 //    assert inMap;
     mapName = name;
-    final int nextOffset = offsets.getAccessor().get(idx() + 1);
-    vector.getMutator().setNotNull(idx());
-    writer.setPosition(nextOffset);
-    ${name}Writer ${uncappedName}Writer = writer.<#if uncappedName == 
"int">integer<#else>${uncappedName}</#if>(name);
-    return ${uncappedName}Writer;
+    return writer.<#if uncappedName == 
"int">integer<#else>${uncappedName}</#if>(name);
   }
 
   </#if>
@@ -120,18 +116,11 @@ public class UnionListWriter extends AbstractFieldWriter {
 
   @Override
   public ListWriter list() {
-    final int nextOffset = offsets.getAccessor().get(idx() + 1);
-    vector.getMutator().setNotNull(idx());
-    offsets.getMutator().setSafe(idx() + 1, nextOffset + 1);
-    writer.setPosition(nextOffset);
     return writer;
   }
 
   @Override
   public ListWriter list(String name) {
-    final int nextOffset = offsets.getAccessor().get(idx() + 1);
-    vector.getMutator().setNotNull(idx());
-    writer.setPosition(nextOffset);
     ListWriter listWriter = writer.list(name);
     return listWriter;
   }
@@ -145,30 +134,26 @@ public class UnionListWriter extends AbstractFieldWriter {
   @Override
   public void startList() {
     vector.getMutator().startNewValue(idx());
+    writer.setPosition(offsets.getAccessor().get(idx() + 1));
   }
 
   @Override
   public void endList() {
-
+    offsets.getMutator().set(idx() + 1, writer.idx());
+    setPosition(idx() + 1);
   }
 
   @Override
   public void start() {
 //    assert inMap;
-    final int nextOffset = offsets.getAccessor().get(idx() + 1);
-    vector.getMutator().setNotNull(idx());
-    offsets.getMutator().setSafe(idx() + 1, nextOffset);
-    writer.setPosition(nextOffset);
     writer.start();
   }
 
   @Override
   public void end() {
 //    if (inMap) {
-      writer.end();
-      inMap = false;
-      final int nextOffset = offsets.getAccessor().get(idx() + 1);
-      offsets.getMutator().setSafe(idx() + 1, nextOffset + 1);
+    writer.end();
+    inMap = false;
 //    }
   }
 
@@ -181,11 +166,8 @@ public class UnionListWriter extends AbstractFieldWriter {
   @Override
   public void write${name}(<#list fields as field>${field.type} 
${field.name}<#if field_has_next>, </#if></#list>) {
 //    assert !inMap;
-    final int nextOffset = offsets.getAccessor().get(idx() + 1);
-    vector.getMutator().setNotNull(idx());
-    writer.setPosition(nextOffset);
     writer.write${name}(<#list fields as field>${field.name}<#if 
field_has_next>, </#if></#list>);
-    offsets.getMutator().setSafe(idx() + 1, nextOffset + 1);
+    writer.setPosition(writer.idx()+1);
   }
 
   </#if>

http://git-wip-us.apache.org/repos/asf/arrow/blob/3d2e4df2/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
----------------------------------------------------------------------
diff --git 
a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java 
b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
index bb71033..1f0baae 100644
--- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
+++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java
@@ -19,18 +19,14 @@ package org.apache.arrow.vector;
 
 import org.apache.arrow.memory.BufferAllocator;
 import org.apache.arrow.vector.complex.ListVector;
-import org.apache.arrow.vector.complex.impl.ComplexCopier;
-import org.apache.arrow.vector.complex.impl.UnionListReader;
 import org.apache.arrow.vector.complex.impl.UnionListWriter;
 import org.apache.arrow.vector.complex.reader.FieldReader;
-import org.apache.arrow.vector.types.pojo.Field;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
 public class TestListVector {
-  private final static String EMPTY_SCHEMA_PATH = "";
 
   private BufferAllocator allocator;
 

http://git-wip-us.apache.org/repos/asf/arrow/blob/3d2e4df2/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
----------------------------------------------------------------------
diff --git 
a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
 
b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
index 9419f88..6e0e617 100644
--- 
a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
+++ 
b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
@@ -65,10 +65,10 @@ public class TestComplexWriter {
     IntWriter intWriter = rootWriter.integer("int");
     BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt");
     for (int i = 0; i < COUNT; i++) {
-      intWriter.setPosition(i);
+      rootWriter.start();
       intWriter.writeInt(i);
-      bigIntWriter.setPosition(i);
       bigIntWriter.writeBigInt(i);
+      rootWriter.end();
     }
     writer.setValueCount(COUNT);
     MapReader rootReader = new SingleMapReaderImpl(parent).reader("root");
@@ -83,23 +83,52 @@ public class TestComplexWriter {
 
   @Test
   public void nullableMap() {
-    MapVector parent = new MapVector("parent", allocator, null);
-    ComplexWriter writer = new ComplexWriterImpl("root", parent);
-    MapWriter rootWriter = writer.rootAsMap();
-    for (int i = 0; i < COUNT; i++) {
-      rootWriter.setPosition(i);
-      rootWriter.start();
-      if (i % 2 == 0) {
-        MapWriter mapWriter = rootWriter.map("map");
-        mapWriter.setPosition(i);
-        mapWriter.start();
-        mapWriter.bigInt("nested").writeBigInt(i);
-        mapWriter.end();
+    try (MapVector mapVector = new MapVector("parent", allocator, null)) {
+      ComplexWriter writer = new ComplexWriterImpl("root", mapVector);
+      MapWriter rootWriter = writer.rootAsMap();
+      for (int i = 0; i < COUNT; i++) {
+        rootWriter.start();
+        if (i % 2 == 0) {
+          MapWriter mapWriter = rootWriter.map("map");
+          mapWriter.setPosition(i);
+          mapWriter.start();
+          mapWriter.bigInt("nested").writeBigInt(i);
+          mapWriter.end();
+        }
+        rootWriter.end();
       }
-      rootWriter.end();
+      writer.setValueCount(COUNT);
+      checkNullableMap(mapVector);
     }
-    writer.setValueCount(COUNT);
-    MapReader rootReader = new SingleMapReaderImpl(parent).reader("root");
+  }
+
+  /**
+   * This test is similar to {@link #nullableMap()} ()} but we get the inner 
map writer once at the beginning
+   */
+  @Test
+  public void nullableMap2() {
+    try (MapVector mapVector = new MapVector("parent", allocator, null)) {
+      ComplexWriter writer = new ComplexWriterImpl("root", mapVector);
+      MapWriter rootWriter = writer.rootAsMap();
+      MapWriter mapWriter = rootWriter.map("map");
+
+      for (int i = 0; i < COUNT; i++) {
+        rootWriter.start();
+        if (i % 2 == 0) {
+          mapWriter.setPosition(i);
+          mapWriter.start();
+          mapWriter.bigInt("nested").writeBigInt(i);
+          mapWriter.end();
+        }
+        rootWriter.end();
+      }
+      writer.setValueCount(COUNT);
+      checkNullableMap(mapVector);
+    }
+  }
+
+  private void checkNullableMap(MapVector mapVector) {
+    MapReader rootReader = new SingleMapReaderImpl(mapVector).reader("root");
     for (int i = 0; i < COUNT; i++) {
       rootReader.setPosition(i);
       assertTrue("index is set: " + i, rootReader.isSet());
@@ -113,11 +142,10 @@ public class TestComplexWriter {
         assertNull("index is not set: " + i, map.readObject());
       }
     }
-    parent.close();
   }
 
   @Test
-  public void listOfLists() {
+  public void testList() {
     MapVector parent = new MapVector("parent", allocator, null);
     ComplexWriter writer = new ComplexWriterImpl("root", parent);
     MapWriter rootWriter = writer.rootAsMap();
@@ -129,7 +157,6 @@ public class TestComplexWriter {
     rootWriter.list("list").endList();
     rootWriter.end();
 
-    rootWriter.setPosition(1);
     rootWriter.start();
     rootWriter.bigInt("int").writeBigInt(1);
     rootWriter.end();
@@ -152,7 +179,6 @@ public class TestComplexWriter {
     listVector.allocateNew();
     UnionListWriter listWriter = new UnionListWriter(listVector);
     for (int i = 0; i < COUNT; i++) {
-      listWriter.setPosition(i);
       listWriter.startList();
       for (int j = 0; j < i % 7; j++) {
         listWriter.writeInt(j);
@@ -206,7 +232,6 @@ public class TestComplexWriter {
     UnionListWriter listWriter = new UnionListWriter(listVector);
     MapWriter mapWriter = listWriter.map();
     for (int i = 0; i < COUNT; i++) {
-      listWriter.setPosition(i);
       listWriter.startList();
       for (int j = 0; j < i % 7; j++) {
         mapWriter.start();
@@ -230,23 +255,53 @@ public class TestComplexWriter {
 
   @Test
   public void listListType() {
-    ListVector listVector = new ListVector("list", allocator, null);
-    listVector.allocateNew();
-    UnionListWriter listWriter = new UnionListWriter(listVector);
-    for (int i = 0; i < COUNT; i++) {
-      listWriter.setPosition(i);
-      listWriter.startList();
-      for (int j = 0; j < i % 7; j++) {
-        ListWriter innerListWriter = listWriter.list();
-        innerListWriter.startList();
-        for (int k = 0; k < i % 13; k++) {
-          innerListWriter.integer().writeInt(k);
+    try (ListVector listVector = new ListVector("list", allocator, null)) {
+      listVector.allocateNew();
+      UnionListWriter listWriter = new UnionListWriter(listVector);
+      for (int i = 0; i < COUNT; i++) {
+        listWriter.startList();
+        for (int j = 0; j < i % 7; j++) {
+          ListWriter innerListWriter = listWriter.list();
+          innerListWriter.startList();
+          for (int k = 0; k < i % 13; k++) {
+            innerListWriter.integer().writeInt(k);
+          }
+          innerListWriter.endList();
         }
-        innerListWriter.endList();
+        listWriter.endList();
       }
-      listWriter.endList();
+      listWriter.setValueCount(COUNT);
+      checkListOfLists(listVector);
     }
-    listWriter.setValueCount(COUNT);
+  }
+
+  /**
+   * This test is similar to {@link #listListType()} but we get the inner list 
writer once at the beginning
+   */
+  @Test
+  public void listListType2() {
+    try (ListVector listVector = new ListVector("list", allocator, null)) {
+      listVector.allocateNew();
+      UnionListWriter listWriter = new UnionListWriter(listVector);
+      ListWriter innerListWriter = listWriter.list();
+
+      for (int i = 0; i < COUNT; i++) {
+        listWriter.startList();
+        for (int j = 0; j < i % 7; j++) {
+          innerListWriter.startList();
+          for (int k = 0; k < i % 13; k++) {
+            innerListWriter.integer().writeInt(k);
+          }
+          innerListWriter.endList();
+        }
+        listWriter.endList();
+      }
+      listWriter.setValueCount(COUNT);
+      checkListOfLists(listVector);
+    }
+  }
+
+  private void checkListOfLists(final ListVector listVector) {
     UnionListReader listReader = new UnionListReader(listVector);
     for (int i = 0; i < COUNT; i++) {
       listReader.setPosition(i);
@@ -259,32 +314,65 @@ public class TestComplexWriter {
         }
       }
     }
-    listVector.clear();
   }
 
   @Test
   public void unionListListType() {
-    ListVector listVector = new ListVector("list", allocator, null);
-    listVector.allocateNew();
-    UnionListWriter listWriter = new UnionListWriter(listVector);
-    for (int i = 0; i < COUNT; i++) {
-      listWriter.setPosition(i);
-      listWriter.startList();
-      for (int j = 0; j < i % 7; j++) {
-        ListWriter innerListWriter = listWriter.list();
-        innerListWriter.startList();
-        for (int k = 0; k < i % 13; k++) {
-          if (k % 2 == 0) {
-            innerListWriter.integer().writeInt(k);
-          } else {
-            innerListWriter.bigInt().writeBigInt(k);
+    try (ListVector listVector = new ListVector("list", allocator, null)) {
+      listVector.allocateNew();
+      UnionListWriter listWriter = new UnionListWriter(listVector);
+      for (int i = 0; i < COUNT; i++) {
+        listWriter.startList();
+        for (int j = 0; j < i % 7; j++) {
+          ListWriter innerListWriter = listWriter.list();
+          innerListWriter.startList();
+          for (int k = 0; k < i % 13; k++) {
+            if (k % 2 == 0) {
+              innerListWriter.integer().writeInt(k);
+            } else {
+              innerListWriter.bigInt().writeBigInt(k);
+            }
           }
+          innerListWriter.endList();
         }
-        innerListWriter.endList();
+        listWriter.endList();
       }
-      listWriter.endList();
+      listWriter.setValueCount(COUNT);
+      checkUnionList(listVector);
     }
-    listWriter.setValueCount(COUNT);
+  }
+
+  /**
+   * This test is similar to {@link #unionListListType()} but we get the inner 
list writer once at the beginning
+   */
+  @Test
+  public void unionListListType2() {
+    try (ListVector listVector = new ListVector("list", allocator, null)) {
+      listVector.allocateNew();
+      UnionListWriter listWriter = new UnionListWriter(listVector);
+      ListWriter innerListWriter = listWriter.list();
+
+      for (int i = 0; i < COUNT; i++) {
+        listWriter.startList();
+        for (int j = 0; j < i % 7; j++) {
+          innerListWriter.startList();
+          for (int k = 0; k < i % 13; k++) {
+            if (k % 2 == 0) {
+              innerListWriter.integer().writeInt(k);
+            } else {
+              innerListWriter.bigInt().writeBigInt(k);
+            }
+          }
+          innerListWriter.endList();
+        }
+        listWriter.endList();
+      }
+      listWriter.setValueCount(COUNT);
+      checkUnionList(listVector);
+    }
+  }
+
+  private void checkUnionList(ListVector listVector) {
     UnionListReader listReader = new UnionListReader(listVector);
     for (int i = 0; i < COUNT; i++) {
       listReader.setPosition(i);
@@ -301,7 +389,6 @@ public class TestComplexWriter {
         }
       }
     }
-    listVector.clear();
   }
 
   @Test
@@ -384,8 +471,8 @@ public class TestComplexWriter {
     MapVector parent = new MapVector("parent", allocator, null);
     ComplexWriter writer = new ComplexWriterImpl("root", parent);
     MapWriter rootWriter = writer.rootAsMap();
-    BigIntWriter bigIntWriter = rootWriter.bigInt("a");
-    VarCharWriter varCharWriter = rootWriter.varChar("a");
+    rootWriter.bigInt("a");
+    rootWriter.varChar("a");
 
     Field field = parent.getField().getChildren().get(0).getChildren().get(0);
     Assert.assertEquals("a", field.getName());

Reply via email to