This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new ea5ce0305d GH-36375: [Java] Added creating MapWriter in ComplexWriter.
(#36351)
ea5ce0305d is described below
commit ea5ce0305d0792517183408e2eed25bdac267e54
Author: Ivan Chesnov <[email protected]>
AuthorDate: Wed Jul 5 18:30:56 2023 +0300
GH-36375: [Java] Added creating MapWriter in ComplexWriter. (#36351)
Added new method rootAsMap() to ComplexWriter and implement it in
ComplexWriterImpl for supporting map type.
Previously in dremio side:
When i trying to return map like output ComplexWrite
with this code:
org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter mapWriter =
out.rootAsList().map(false);
mapWriter.startMap();
for (java.util.Map.Entry<java.lang.Integer, java.lang.Integer> element :
map.entrySet()) {
mapWriter.startEntry();
mapWriter.key().integer().writeInt((Integer) element.getKey());
mapWriter.value().integer().writeInt((Integer) element.getValue());
mapWriter.endEntry();
}
mapWriter.endMap();
It use UnionMapWriter and generate schema like:
EXPR$0: Map(false)<$data$: Union(Sparse, [1, 39])<struct: Struct<key:
Int(32, true) not null, value: Int(32, true) not null> not null, map:
Map(false)<entries: Struct<key: Int(32, true) not null, value: Int(32, true)>
not null>>>
But in OutputDerivation impl class where i should create output Complete
type
List<Field> children = Arrays.asList( CompleteType.INT.toField("key",
false), CompleteType.INT.toField("value", false));
return new CompleteType(CompleteType.MAP.getType(),
CompleteType.struct(children).toField(MapVector.DATA_VECTOR_NAME, false));
(This is only one valid case, because
MapVector.initializeChildrenFromFields())
return
EXPR$0::map<key::int32, value::int32> I found a place where it start using
union - PromotableWriter.promoteToUnion.
And in the end i have
SCHEMA_CHANGE ERROR: Schema changed during projection. Schema was
schema(EXPR$0::map<key::int32, value::int32>)
but then changed to
schema(EXPR$0::map<struct::struct<key::int32, value::int32>,
map::map<key::int32, value::int32>>)
* Closes: #36375
Authored-by: Ivan Chesnov <[email protected]>
Signed-off-by: David Li <[email protected]>
---
.../src/main/codegen/templates/BaseWriter.java | 1 +
.../vector/complex/impl/ComplexWriterImpl.java | 37 +++++++-
.../vector/complex/writer/TestComplexWriter.java | 104 +++++++++++++++++++++
3 files changed, 141 insertions(+), 1 deletion(-)
diff --git a/java/vector/src/main/codegen/templates/BaseWriter.java
b/java/vector/src/main/codegen/templates/BaseWriter.java
index 4d63fb73e9..3b35d22692 100644
--- a/java/vector/src/main/codegen/templates/BaseWriter.java
+++ b/java/vector/src/main/codegen/templates/BaseWriter.java
@@ -106,6 +106,7 @@ public interface BaseWriter extends AutoCloseable,
Positionable {
void copyReader(FieldReader reader);
StructWriter rootAsStruct();
ListWriter rootAsList();
+ MapWriter rootAsMap(boolean keysSorted);
void setPosition(int index);
void setValueCount(int count);
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java
b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java
index 13b26bb67d..8d2694b6df 100644
---
a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java
+++
b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/ComplexWriterImpl.java
@@ -19,6 +19,7 @@ package org.apache.arrow.vector.complex.impl;
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.complex.ListVector;
+import org.apache.arrow.vector.complex.MapVector;
import org.apache.arrow.vector.complex.NonNullableStructVector;
import org.apache.arrow.vector.complex.StateTool;
import org.apache.arrow.vector.complex.StructVector;
@@ -32,6 +33,7 @@ public class ComplexWriterImpl extends AbstractFieldWriter
implements ComplexWri
private NullableStructWriter structRoot;
private UnionListWriter listRoot;
+ private UnionMapWriter mapRoot;
private final NonNullableStructVector container;
Mode mode = Mode.INIT;
@@ -39,7 +41,7 @@ public class ComplexWriterImpl extends AbstractFieldWriter
implements ComplexWri
private final boolean unionEnabled;
private final NullableStructWriterFactory nullableStructWriterFactory;
- private enum Mode { INIT, STRUCT, LIST }
+ private enum Mode { INIT, STRUCT, LIST, MAP }
/**
* Constructs a new instance.
@@ -107,6 +109,9 @@ public class ComplexWriterImpl extends AbstractFieldWriter
implements ComplexWri
case LIST:
listRoot.clear();
break;
+ case MAP:
+ mapRoot.clear();
+ break;
default:
break;
}
@@ -121,6 +126,9 @@ public class ComplexWriterImpl extends AbstractFieldWriter
implements ComplexWri
case LIST:
listRoot.setValueCount(count);
break;
+ case MAP:
+ mapRoot.setValueCount(count);
+ break;
default:
break;
}
@@ -136,6 +144,9 @@ public class ComplexWriterImpl extends AbstractFieldWriter
implements ComplexWri
case LIST:
listRoot.setPosition(index);
break;
+ case MAP:
+ mapRoot.setPosition(index);
+ break;
default:
break;
}
@@ -223,5 +234,29 @@ public class ComplexWriterImpl extends AbstractFieldWriter
implements ComplexWri
return listRoot;
}
+ @Override
+ public MapWriter rootAsMap(boolean keysSorted) {
+ switch (mode) {
+ case INIT:
+ int vectorCount = container.size();
+ // TODO allow dictionaries in complex types
+ MapVector mapVector = container.addOrGetMap(name, keysSorted);
+ if (container.size() > vectorCount) {
+ mapVector.allocateNew();
+ }
+ mapRoot = new UnionMapWriter(mapVector);
+ mapRoot.setPosition(idx());
+ mode = Mode.MAP;
+ break;
+
+ case MAP:
+ break;
+
+ default:
+ check(Mode.INIT, Mode.STRUCT);
+ }
+
+ return mapRoot;
+ }
}
diff --git
a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
index 9f7f66083a..6f7f5abd30 100644
---
a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
+++
b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
@@ -1563,4 +1563,108 @@ public class TestComplexWriter {
Assert.assertEquals(0, size);
}
}
+
+ @Test
+ public void testMap() {
+ try (NonNullableStructVector parent =
NonNullableStructVector.empty("parent", allocator)) {
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ MapWriter mapWriter = writer.rootAsMap(false);
+ for (int i = 0; i < COUNT; i++) {
+ mapWriter.startMap();
+ for (int j = 0; j < i % 7; j++) {
+ mapWriter.startEntry();
+ if (j % 2 == 0) {
+ mapWriter.key().integer().writeInt(j);
+ mapWriter.value().integer().writeInt(j + 1);
+ } else {
+ IntHolder keyHolder = new IntHolder();
+ keyHolder.value = j;
+ IntHolder valueHolder = new IntHolder();
+ valueHolder.value = j + 1;
+ mapWriter.key().integer().write(keyHolder);
+ mapWriter.value().integer().write(valueHolder);
+ }
+ mapWriter.endEntry();
+ }
+ mapWriter.endMap();
+ }
+ writer.setValueCount(COUNT);
+ UnionMapReader mapReader = (UnionMapReader) new
SingleStructReaderImpl(parent).reader("root");
+ for (int i = 0; i < COUNT; i++) {
+ mapReader.setPosition(i);
+ for (int j = 0; j < i % 7; j++) {
+ mapReader.next();
+ assertEquals(j, mapReader.key().readInteger().intValue());
+ assertEquals(j + 1, mapReader.value().readInteger().intValue());
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testMapWithNulls() {
+ try (NonNullableStructVector parent =
NonNullableStructVector.empty("parent", allocator)) {
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ MapWriter mapWriter = writer.rootAsMap(false);
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().integer().writeNull();
+ mapWriter.value().integer().writeInt(1);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+ writer.setValueCount(1);
+ UnionMapReader mapReader = (UnionMapReader) new
SingleStructReaderImpl(parent).reader("root");
+ Assert.assertNull(mapReader.key().readInteger());
+ assertEquals(1, mapReader.value().readInteger().intValue());
+ }
+ }
+
+ @Test
+ public void testMapWithListKey() {
+ try (NonNullableStructVector parent =
NonNullableStructVector.empty("parent", allocator)) {
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ MapWriter mapWriter = writer.rootAsMap(false);
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().list().startList();
+ for (int i = 0; i < 3; i++) {
+ mapWriter.key().list().integer().writeInt(i);
+ }
+ mapWriter.key().list().endList();
+ mapWriter.value().integer().writeInt(1);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+ writer.setValueCount(1);
+ UnionMapReader mapReader = (UnionMapReader) new
SingleStructReaderImpl(parent).reader("root");
+ mapReader.key().next();
+ assertEquals(0, mapReader.key().reader().readInteger().intValue());
+ mapReader.key().next();
+ assertEquals(1, mapReader.key().reader().readInteger().intValue());
+ mapReader.key().next();
+ assertEquals(2, mapReader.key().reader().readInteger().intValue());
+ assertEquals(1, mapReader.value().readInteger().intValue());
+ }
+ }
+
+ @Test
+ public void testMapWithStructKey() {
+ try (NonNullableStructVector parent =
NonNullableStructVector.empty("parent", allocator)) {
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ MapWriter mapWriter = writer.rootAsMap(false);
+ mapWriter.startMap();
+ mapWriter.startEntry();
+ mapWriter.key().struct().start();
+ mapWriter.key().struct().integer("value1").writeInt(1);
+ mapWriter.key().struct().integer("value2").writeInt(2);
+ mapWriter.key().struct().end();
+ mapWriter.value().integer().writeInt(1);
+ mapWriter.endEntry();
+ mapWriter.endMap();
+ writer.setValueCount(1);
+ UnionMapReader mapReader = (UnionMapReader) new
SingleStructReaderImpl(parent).reader("root");
+ assertEquals(1,
mapReader.key().reader("value1").readInteger().intValue());
+ assertEquals(2,
mapReader.key().reader("value2").readInteger().intValue());
+ assertEquals(1, mapReader.value().readInteger().intValue());
+ }
+ }
}