Repository: arrow Updated Branches: refs/heads/master 2f8449337 -> 3d2e4df21
ARROW-337: UnionListWriter.list() is doing more than it should, this ⦠â¦can cause data corruption The general idea is to use the "inner" writer's position to update the offset. This involves making sure various writers do indeed update their positions. UnionListWriter.startList() should explicitly set the inner writer position in case setPosition() was called to move the union list writer's position Author: adeneche <adene...@dremio.com> Closes #183 from adeneche/ARROW-337 and squashes the following commits: 1ae7e00 [adeneche] updated TestComplexWriter to ensure position is set properly by the various writers 7d5aefc [adeneche] ARROW-337: UnionListWriter.list() is doing more than it should, this can cause data corruption Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/3d2e4df2 Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/3d2e4df2 Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/3d2e4df2 Branch: refs/heads/master Commit: 3d2e4df219d6b06a3d78821bbca6ba17188908c2 Parents: 2f84493 Author: adeneche <adene...@dremio.com> Authored: Wed Oct 26 12:09:26 2016 -0700 Committer: Julien Le Dem <jul...@dremio.com> Committed: Wed Oct 26 12:09:26 2016 -0700 ---------------------------------------------------------------------- .../AbstractPromotableFieldWriter.java | 2 + .../src/main/codegen/templates/MapWriters.java | 1 + .../main/codegen/templates/UnionListWriter.java | 32 +-- .../org/apache/arrow/vector/TestListVector.java | 4 - .../complex/writer/TestComplexWriter.java | 201 +++++++++++++------ 5 files changed, 154 insertions(+), 86 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/3d2e4df2/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java index d21dcd0..60dd0c7 100644 --- a/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java +++ b/java/vector/src/main/codegen/templates/AbstractPromotableFieldWriter.java @@ -58,6 +58,7 @@ abstract class AbstractPromotableFieldWriter extends AbstractFieldWriter { @Override public void end() { getWriter(MinorType.MAP).end(); + setPosition(idx() + 1); } @Override @@ -68,6 +69,7 @@ abstract class AbstractPromotableFieldWriter extends AbstractFieldWriter { @Override public void endList() { getWriter(MinorType.LIST).endList(); + setPosition(idx() + 1); } <#list vv.types as type><#list type.minor as minor><#assign name = minor.class?cap_first /> http://git-wip-us.apache.org/repos/asf/arrow/blob/3d2e4df2/java/vector/src/main/codegen/templates/MapWriters.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/codegen/templates/MapWriters.java b/java/vector/src/main/codegen/templates/MapWriters.java index 51327b4..f41b600 100644 --- a/java/vector/src/main/codegen/templates/MapWriters.java +++ b/java/vector/src/main/codegen/templates/MapWriters.java @@ -185,6 +185,7 @@ public class ${mode}MapWriter extends AbstractFieldWriter { @Override public void end() { + setPosition(idx()+1); } <#list vv.types as type><#list type.minor as minor> http://git-wip-us.apache.org/repos/asf/arrow/blob/3d2e4df2/java/vector/src/main/codegen/templates/UnionListWriter.java ---------------------------------------------------------------------- diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java index 04531a7..bb39fe8 100644 --- a/java/vector/src/main/codegen/templates/UnionListWriter.java +++ b/java/vector/src/main/codegen/templates/UnionListWriter.java @@ -101,11 +101,7 @@ public class UnionListWriter extends AbstractFieldWriter { public ${name}Writer <#if uncappedName == "int">integer<#else>${uncappedName}</#if>(String name) { // assert inMap; mapName = name; - final int nextOffset = offsets.getAccessor().get(idx() + 1); - vector.getMutator().setNotNull(idx()); - writer.setPosition(nextOffset); - ${name}Writer ${uncappedName}Writer = writer.<#if uncappedName == "int">integer<#else>${uncappedName}</#if>(name); - return ${uncappedName}Writer; + return writer.<#if uncappedName == "int">integer<#else>${uncappedName}</#if>(name); } </#if> @@ -120,18 +116,11 @@ public class UnionListWriter extends AbstractFieldWriter { @Override public ListWriter list() { - final int nextOffset = offsets.getAccessor().get(idx() + 1); - vector.getMutator().setNotNull(idx()); - offsets.getMutator().setSafe(idx() + 1, nextOffset + 1); - writer.setPosition(nextOffset); return writer; } @Override public ListWriter list(String name) { - final int nextOffset = offsets.getAccessor().get(idx() + 1); - vector.getMutator().setNotNull(idx()); - writer.setPosition(nextOffset); ListWriter listWriter = writer.list(name); return listWriter; } @@ -145,30 +134,26 @@ public class UnionListWriter extends AbstractFieldWriter { @Override public void startList() { vector.getMutator().startNewValue(idx()); + writer.setPosition(offsets.getAccessor().get(idx() + 1)); } @Override public void endList() { - + offsets.getMutator().set(idx() + 1, writer.idx()); + setPosition(idx() + 1); } @Override public void start() { // assert inMap; - final int nextOffset = offsets.getAccessor().get(idx() + 1); - vector.getMutator().setNotNull(idx()); - offsets.getMutator().setSafe(idx() + 1, nextOffset); - writer.setPosition(nextOffset); writer.start(); } @Override public void end() { // if (inMap) { - writer.end(); - inMap = false; - final int nextOffset = offsets.getAccessor().get(idx() + 1); - offsets.getMutator().setSafe(idx() + 1, nextOffset + 1); + writer.end(); + inMap = false; // } } @@ -181,11 +166,8 @@ public class UnionListWriter extends AbstractFieldWriter { @Override public void write${name}(<#list fields as field>${field.type} ${field.name}<#if field_has_next>, </#if></#list>) { // assert !inMap; - final int nextOffset = offsets.getAccessor().get(idx() + 1); - vector.getMutator().setNotNull(idx()); - writer.setPosition(nextOffset); writer.write${name}(<#list fields as field>${field.name}<#if field_has_next>, </#if></#list>); - offsets.getMutator().setSafe(idx() + 1, nextOffset + 1); + writer.setPosition(writer.idx()+1); } </#if> http://git-wip-us.apache.org/repos/asf/arrow/blob/3d2e4df2/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java ---------------------------------------------------------------------- diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index bb71033..1f0baae 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -19,18 +19,14 @@ package org.apache.arrow.vector; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.complex.ListVector; -import org.apache.arrow.vector.complex.impl.ComplexCopier; -import org.apache.arrow.vector.complex.impl.UnionListReader; import org.apache.arrow.vector.complex.impl.UnionListWriter; import org.apache.arrow.vector.complex.reader.FieldReader; -import org.apache.arrow.vector.types.pojo.Field; import org.junit.After; import org.junit.Assert; import org.junit.Before; import org.junit.Test; public class TestListVector { - private final static String EMPTY_SCHEMA_PATH = ""; private BufferAllocator allocator; http://git-wip-us.apache.org/repos/asf/arrow/blob/3d2e4df2/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java ---------------------------------------------------------------------- diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java index 9419f88..6e0e617 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java @@ -65,10 +65,10 @@ public class TestComplexWriter { IntWriter intWriter = rootWriter.integer("int"); BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt"); for (int i = 0; i < COUNT; i++) { - intWriter.setPosition(i); + rootWriter.start(); intWriter.writeInt(i); - bigIntWriter.setPosition(i); bigIntWriter.writeBigInt(i); + rootWriter.end(); } writer.setValueCount(COUNT); MapReader rootReader = new SingleMapReaderImpl(parent).reader("root"); @@ -83,23 +83,52 @@ public class TestComplexWriter { @Test public void nullableMap() { - MapVector parent = new MapVector("parent", allocator, null); - ComplexWriter writer = new ComplexWriterImpl("root", parent); - MapWriter rootWriter = writer.rootAsMap(); - for (int i = 0; i < COUNT; i++) { - rootWriter.setPosition(i); - rootWriter.start(); - if (i % 2 == 0) { - MapWriter mapWriter = rootWriter.map("map"); - mapWriter.setPosition(i); - mapWriter.start(); - mapWriter.bigInt("nested").writeBigInt(i); - mapWriter.end(); + try (MapVector mapVector = new MapVector("parent", allocator, null)) { + ComplexWriter writer = new ComplexWriterImpl("root", mapVector); + MapWriter rootWriter = writer.rootAsMap(); + for (int i = 0; i < COUNT; i++) { + rootWriter.start(); + if (i % 2 == 0) { + MapWriter mapWriter = rootWriter.map("map"); + mapWriter.setPosition(i); + mapWriter.start(); + mapWriter.bigInt("nested").writeBigInt(i); + mapWriter.end(); + } + rootWriter.end(); } - rootWriter.end(); + writer.setValueCount(COUNT); + checkNullableMap(mapVector); } - writer.setValueCount(COUNT); - MapReader rootReader = new SingleMapReaderImpl(parent).reader("root"); + } + + /** + * This test is similar to {@link #nullableMap()} ()} but we get the inner map writer once at the beginning + */ + @Test + public void nullableMap2() { + try (MapVector mapVector = new MapVector("parent", allocator, null)) { + ComplexWriter writer = new ComplexWriterImpl("root", mapVector); + MapWriter rootWriter = writer.rootAsMap(); + MapWriter mapWriter = rootWriter.map("map"); + + for (int i = 0; i < COUNT; i++) { + rootWriter.start(); + if (i % 2 == 0) { + mapWriter.setPosition(i); + mapWriter.start(); + mapWriter.bigInt("nested").writeBigInt(i); + mapWriter.end(); + } + rootWriter.end(); + } + writer.setValueCount(COUNT); + checkNullableMap(mapVector); + } + } + + private void checkNullableMap(MapVector mapVector) { + MapReader rootReader = new SingleMapReaderImpl(mapVector).reader("root"); for (int i = 0; i < COUNT; i++) { rootReader.setPosition(i); assertTrue("index is set: " + i, rootReader.isSet()); @@ -113,11 +142,10 @@ public class TestComplexWriter { assertNull("index is not set: " + i, map.readObject()); } } - parent.close(); } @Test - public void listOfLists() { + public void testList() { MapVector parent = new MapVector("parent", allocator, null); ComplexWriter writer = new ComplexWriterImpl("root", parent); MapWriter rootWriter = writer.rootAsMap(); @@ -129,7 +157,6 @@ public class TestComplexWriter { rootWriter.list("list").endList(); rootWriter.end(); - rootWriter.setPosition(1); rootWriter.start(); rootWriter.bigInt("int").writeBigInt(1); rootWriter.end(); @@ -152,7 +179,6 @@ public class TestComplexWriter { listVector.allocateNew(); UnionListWriter listWriter = new UnionListWriter(listVector); for (int i = 0; i < COUNT; i++) { - listWriter.setPosition(i); listWriter.startList(); for (int j = 0; j < i % 7; j++) { listWriter.writeInt(j); @@ -206,7 +232,6 @@ public class TestComplexWriter { UnionListWriter listWriter = new UnionListWriter(listVector); MapWriter mapWriter = listWriter.map(); for (int i = 0; i < COUNT; i++) { - listWriter.setPosition(i); listWriter.startList(); for (int j = 0; j < i % 7; j++) { mapWriter.start(); @@ -230,23 +255,53 @@ public class TestComplexWriter { @Test public void listListType() { - ListVector listVector = new ListVector("list", allocator, null); - listVector.allocateNew(); - UnionListWriter listWriter = new UnionListWriter(listVector); - for (int i = 0; i < COUNT; i++) { - listWriter.setPosition(i); - listWriter.startList(); - for (int j = 0; j < i % 7; j++) { - ListWriter innerListWriter = listWriter.list(); - innerListWriter.startList(); - for (int k = 0; k < i % 13; k++) { - innerListWriter.integer().writeInt(k); + try (ListVector listVector = new ListVector("list", allocator, null)) { + listVector.allocateNew(); + UnionListWriter listWriter = new UnionListWriter(listVector); + for (int i = 0; i < COUNT; i++) { + listWriter.startList(); + for (int j = 0; j < i % 7; j++) { + ListWriter innerListWriter = listWriter.list(); + innerListWriter.startList(); + for (int k = 0; k < i % 13; k++) { + innerListWriter.integer().writeInt(k); + } + innerListWriter.endList(); } - innerListWriter.endList(); + listWriter.endList(); } - listWriter.endList(); + listWriter.setValueCount(COUNT); + checkListOfLists(listVector); } - listWriter.setValueCount(COUNT); + } + + /** + * This test is similar to {@link #listListType()} but we get the inner list writer once at the beginning + */ + @Test + public void listListType2() { + try (ListVector listVector = new ListVector("list", allocator, null)) { + listVector.allocateNew(); + UnionListWriter listWriter = new UnionListWriter(listVector); + ListWriter innerListWriter = listWriter.list(); + + for (int i = 0; i < COUNT; i++) { + listWriter.startList(); + for (int j = 0; j < i % 7; j++) { + innerListWriter.startList(); + for (int k = 0; k < i % 13; k++) { + innerListWriter.integer().writeInt(k); + } + innerListWriter.endList(); + } + listWriter.endList(); + } + listWriter.setValueCount(COUNT); + checkListOfLists(listVector); + } + } + + private void checkListOfLists(final ListVector listVector) { UnionListReader listReader = new UnionListReader(listVector); for (int i = 0; i < COUNT; i++) { listReader.setPosition(i); @@ -259,32 +314,65 @@ public class TestComplexWriter { } } } - listVector.clear(); } @Test public void unionListListType() { - ListVector listVector = new ListVector("list", allocator, null); - listVector.allocateNew(); - UnionListWriter listWriter = new UnionListWriter(listVector); - for (int i = 0; i < COUNT; i++) { - listWriter.setPosition(i); - listWriter.startList(); - for (int j = 0; j < i % 7; j++) { - ListWriter innerListWriter = listWriter.list(); - innerListWriter.startList(); - for (int k = 0; k < i % 13; k++) { - if (k % 2 == 0) { - innerListWriter.integer().writeInt(k); - } else { - innerListWriter.bigInt().writeBigInt(k); + try (ListVector listVector = new ListVector("list", allocator, null)) { + listVector.allocateNew(); + UnionListWriter listWriter = new UnionListWriter(listVector); + for (int i = 0; i < COUNT; i++) { + listWriter.startList(); + for (int j = 0; j < i % 7; j++) { + ListWriter innerListWriter = listWriter.list(); + innerListWriter.startList(); + for (int k = 0; k < i % 13; k++) { + if (k % 2 == 0) { + innerListWriter.integer().writeInt(k); + } else { + innerListWriter.bigInt().writeBigInt(k); + } } + innerListWriter.endList(); } - innerListWriter.endList(); + listWriter.endList(); } - listWriter.endList(); + listWriter.setValueCount(COUNT); + checkUnionList(listVector); } - listWriter.setValueCount(COUNT); + } + + /** + * This test is similar to {@link #unionListListType()} but we get the inner list writer once at the beginning + */ + @Test + public void unionListListType2() { + try (ListVector listVector = new ListVector("list", allocator, null)) { + listVector.allocateNew(); + UnionListWriter listWriter = new UnionListWriter(listVector); + ListWriter innerListWriter = listWriter.list(); + + for (int i = 0; i < COUNT; i++) { + listWriter.startList(); + for (int j = 0; j < i % 7; j++) { + innerListWriter.startList(); + for (int k = 0; k < i % 13; k++) { + if (k % 2 == 0) { + innerListWriter.integer().writeInt(k); + } else { + innerListWriter.bigInt().writeBigInt(k); + } + } + innerListWriter.endList(); + } + listWriter.endList(); + } + listWriter.setValueCount(COUNT); + checkUnionList(listVector); + } + } + + private void checkUnionList(ListVector listVector) { UnionListReader listReader = new UnionListReader(listVector); for (int i = 0; i < COUNT; i++) { listReader.setPosition(i); @@ -301,7 +389,6 @@ public class TestComplexWriter { } } } - listVector.clear(); } @Test @@ -384,8 +471,8 @@ public class TestComplexWriter { MapVector parent = new MapVector("parent", allocator, null); ComplexWriter writer = new ComplexWriterImpl("root", parent); MapWriter rootWriter = writer.rootAsMap(); - BigIntWriter bigIntWriter = rootWriter.bigInt("a"); - VarCharWriter varCharWriter = rootWriter.varChar("a"); + rootWriter.bigInt("a"); + rootWriter.varChar("a"); Field field = parent.getField().getChildren().get(0).getChildren().get(0); Assert.assertEquals("a", field.getName());