This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 3711657d8d GH-40942: [Java] Implement C Data Interface for StringView
(#41967)
3711657d8d is described below
commit 3711657d8d9e9c583ed8829418b3833b79f05037
Author: Vibhatha Lakmal Abeykoon <[email protected]>
AuthorDate: Fri Jun 21 05:58:37 2024 +0530
GH-40942: [Java] Implement C Data Interface for StringView (#41967)
### Rationale for this change
Recent inclusion of `Utf8View` and `BinaryView` support to Java also
requires adding C Data interface for integrating it with other systems.
### What changes are included in this PR?
- [X] Adding core functionality for C Data interface for `Utf8View` and
`BinaryView`
- [X] Adding `RoundtripTest`
- [X] Adding `StreamingTest`
### Are these changes tested?
Yes, with new tests.
### Are there any user-facing changes?
No
* GitHub Issue: #40942
Authored-by: Vibhatha Abeykoon <[email protected]>
Signed-off-by: David Li <[email protected]>
---
dev/archery/archery/integration/datagen.py | 1 -
.../java/org/apache/arrow/c/ArrayExporter.java | 10 +-
.../apache/arrow/c/BufferImportTypeVisitor.java | 38 ++++-
.../c/src/main/java/org/apache/arrow/c/Format.java | 8 +
.../org/apache/arrow/c/StructVectorUnloader.java | 2 +-
.../java/org/apache/arrow/c/DictionaryTest.java | 56 +++++++
.../java/org/apache/arrow/c/RoundtripTest.java | 75 +++++++++
.../test/java/org/apache/arrow/c/StreamTest.java | 86 +++++++++++
java/c/src/test/python/integration_tests.py | 56 +++++++
.../apache/arrow/tools/ArrowFileTestFixtures.java | 88 +++++++++++
.../org/apache/arrow/tools/TestIntegration.java | 56 +++++++
.../arrow/vector/BaseVariableWidthViewVector.java | 49 +++++-
.../java/org/apache/arrow/vector/BufferLayout.java | 9 +-
.../java/org/apache/arrow/vector/FieldVector.java | 9 ++
.../java/org/apache/arrow/vector/TypeLayout.java | 36 ++++-
.../org/apache/arrow/vector/VectorUnloader.java | 2 +-
.../apache/arrow/vector/ipc/JsonFileReader.java | 168 ++++++++++++++++++---
.../apache/arrow/vector/ipc/JsonFileWriter.java | 142 ++++++++++++++++-
.../vector/ipc/message/MessageSerializer.java | 13 +-
.../apache/arrow/vector/TestVarCharViewVector.java | 60 ++++++++
.../vector/complex/writer/TestComplexWriter.java | 21 +++
21 files changed, 931 insertions(+), 54 deletions(-)
diff --git a/dev/archery/archery/integration/datagen.py
b/dev/archery/archery/integration/datagen.py
index f6302165cd..b51f3d876f 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1932,7 +1932,6 @@ def get_generated_json_files(tempdir=None):
.skip_tester('Rust'),
generate_binary_view_case()
- .skip_tester('Java')
.skip_tester('JS')
.skip_tester('nanoarrow')
.skip_tester('Rust'),
diff --git a/java/c/src/main/java/org/apache/arrow/c/ArrayExporter.java
b/java/c/src/main/java/org/apache/arrow/c/ArrayExporter.java
index 2d2086eda5..820a152274 100644
--- a/java/c/src/main/java/org/apache/arrow/c/ArrayExporter.java
+++ b/java/c/src/main/java/org/apache/arrow/c/ArrayExporter.java
@@ -72,7 +72,6 @@ final class ArrayExporter {
void export(ArrowArray array, FieldVector vector, DictionaryProvider
dictionaryProvider) {
List<FieldVector> children = vector.getChildrenFromFields();
- List<ArrowBuf> buffers = vector.getFieldBuffers();
int valueCount = vector.getValueCount();
int nullCount = vector.getNullCount();
DictionaryEncoding dictionaryEncoding = vector.getField().getDictionary();
@@ -89,11 +88,10 @@ final class ArrayExporter {
}
}
- if (buffers != null) {
- data.buffers = new ArrayList<>(buffers.size());
- data.buffers_ptrs = allocator.buffer((long) buffers.size() *
Long.BYTES);
- vector.exportCDataBuffers(data.buffers, data.buffers_ptrs, NULL);
- }
+ data.buffers = new ArrayList<>(vector.getExportedCDataBufferCount());
+ data.buffers_ptrs =
+ allocator.buffer((long) (vector.getExportedCDataBufferCount()) *
Long.BYTES);
+ vector.exportCDataBuffers(data.buffers, data.buffers_ptrs, NULL);
if (dictionaryEncoding != null) {
Dictionary dictionary =
dictionaryProvider.lookup(dictionaryEncoding.getId());
diff --git
a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
index 9c4dd1ba2c..5f262d3dc3 100644
--- a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
+++ b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java
@@ -27,6 +27,7 @@ import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.util.AutoCloseables;
import org.apache.arrow.util.VisibleForTesting;
+import org.apache.arrow.vector.BaseVariableWidthViewVector;
import org.apache.arrow.vector.DateDayVector;
import org.apache.arrow.vector.DateMilliVector;
import org.apache.arrow.vector.DurationVector;
@@ -51,7 +52,6 @@ import org.apache.arrow.vector.complex.MapVector;
import org.apache.arrow.vector.complex.UnionVector;
import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
import org.apache.arrow.vector.types.pojo.ArrowType;
-import org.apache.arrow.vector.types.pojo.ArrowType.ListView;
import org.apache.arrow.vector.util.DataSizeRoundingUtil;
/** Import buffers from a C Data Interface struct. */
@@ -227,10 +227,37 @@ class BufferImportTypeVisitor implements
ArrowType.ArrowTypeVisitor<List<ArrowBu
}
}
+ private List<ArrowBuf> visitVariableWidthView(ArrowType type) {
+ final int viewBufferIndex = 1;
+ final int variadicSizeBufferIndex = this.buffers.length - 1;
+ final long numOfVariadicBuffers = this.buffers.length - 3;
+ final long variadicSizeBufferCapacity = numOfVariadicBuffers * Long.BYTES;
+ List<ArrowBuf> buffers = new ArrayList<>();
+
+ ArrowBuf variadicSizeBuffer =
+ importBuffer(type, variadicSizeBufferIndex,
variadicSizeBufferCapacity);
+
+ ArrowBuf view =
+ importFixedBytes(type, viewBufferIndex,
BaseVariableWidthViewVector.ELEMENT_SIZE);
+ buffers.add(maybeImportBitmap(type));
+ buffers.add(view);
+
+ // 0th buffer is validity buffer
+ // 1st buffer is view buffer
+ // 2nd buffer onwards are variadic buffer
+ // N-1 (this.buffers.length - 1) buffer is variadic size buffer
+ final int variadicBufferReadOffset = 2;
+ for (int i = 0; i < numOfVariadicBuffers; i++) {
+ long size = variadicSizeBuffer.getLong((long) i * Long.BYTES);
+ buffers.add(importBuffer(type, i + variadicBufferReadOffset, size));
+ }
+
+ return buffers;
+ }
+
@Override
public List<ArrowBuf> visit(ArrowType.Utf8View type) {
- throw new UnsupportedOperationException(
- "Importing buffers for view type: " + type + " not supported");
+ return visitVariableWidthView(type);
}
@Override
@@ -270,8 +297,7 @@ class BufferImportTypeVisitor implements
ArrowType.ArrowTypeVisitor<List<ArrowBu
@Override
public List<ArrowBuf> visit(ArrowType.BinaryView type) {
- throw new UnsupportedOperationException(
- "Importing buffers for view type: " + type + " not supported");
+ return visitVariableWidthView(type);
}
@Override
@@ -373,7 +399,7 @@ class BufferImportTypeVisitor implements
ArrowType.ArrowTypeVisitor<List<ArrowBu
}
@Override
- public List<ArrowBuf> visit(ListView type) {
+ public List<ArrowBuf> visit(ArrowType.ListView type) {
throw new UnsupportedOperationException(
"Importing buffers for view type: " + type + " not supported");
}
diff --git a/java/c/src/main/java/org/apache/arrow/c/Format.java
b/java/c/src/main/java/org/apache/arrow/c/Format.java
index e1308bf8c3..aff51e7b73 100644
--- a/java/c/src/main/java/org/apache/arrow/c/Format.java
+++ b/java/c/src/main/java/org/apache/arrow/c/Format.java
@@ -225,6 +225,10 @@ final class Format {
}
case Utf8:
return "u";
+ case Utf8View:
+ return "vu";
+ case BinaryView:
+ return "vz";
case NONE:
throw new IllegalArgumentException("Arrow type ID is NONE");
default:
@@ -305,6 +309,10 @@ final class Format {
case "+m":
boolean keysSorted = (flags & Flags.ARROW_FLAG_MAP_KEYS_SORTED) != 0;
return new ArrowType.Map(keysSorted);
+ case "vu":
+ return new ArrowType.Utf8View();
+ case "vz":
+ return new ArrowType.BinaryView();
default:
String[] parts = format.split(":", 2);
if (parts.length == 2) {
diff --git a/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java
b/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java
index 568c505978..6f094b92c7 100644
--- a/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java
+++ b/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java
@@ -108,7 +108,7 @@ public class StructVectorUnloader {
int expectedBufferCount =
(int) (TypeLayout.getTypeBufferCount(vector.getField().getType()) +
variadicBufferCount);
// only update variadicBufferCounts for vectors that have variadic buffers
- if (variadicBufferCount > 0) {
+ if (vector instanceof BaseVariableWidthViewVector) {
variadicBufferCounts.add(variadicBufferCount);
}
if (fieldBuffers.size() != expectedBufferCount) {
diff --git a/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java
b/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java
index d00309744d..ce0e82586b 100644
--- a/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java
+++ b/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java
@@ -260,6 +260,28 @@ public class DictionaryTest {
vector.setValueCount(2);
}
+ private void createStructVectorInline(StructVector vector) {
+ final ViewVarCharVector child1 =
+ vector.addOrGet(
+ "f0", FieldType.nullable(MinorType.VIEWVARCHAR.getType()),
ViewVarCharVector.class);
+ final IntVector child2 =
+ vector.addOrGet("f1", FieldType.nullable(MinorType.INT.getType()),
IntVector.class);
+
+ // Write the values to child 1
+ child1.allocateNew();
+ child1.set(0, "012345678".getBytes());
+ child1.set(1, "01234".getBytes());
+ vector.setIndexDefined(0);
+
+ // Write the values to child 2
+ child2.allocateNew();
+ child2.set(0, 10);
+ child2.set(1, 11);
+ vector.setIndexDefined(1);
+
+ vector.setValueCount(2);
+ }
+
@Test
public void testVectorLoadUnloadOnStructVector() {
try (final StructVector structVector1 = StructVector.empty("struct",
allocator)) {
@@ -293,4 +315,38 @@ public class DictionaryTest {
}
}
}
+
+ @Test
+ public void testVectorLoadUnloadOnStructVectorWithInline() {
+ try (final StructVector structVector1 = StructVector.empty("struct",
allocator)) {
+ createStructVectorInline(structVector1);
+ Field field1 = structVector1.getField();
+ Schema schema = new Schema(field1.getChildren());
+ StructVectorUnloader vectorUnloader = new
StructVectorUnloader(structVector1);
+
+ try (ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
+ BufferAllocator finalVectorsAllocator =
+ allocator.newChildAllocator("struct", 0, Long.MAX_VALUE); ) {
+ // validating recordBatch contains an output for variadicBufferCounts
+ assertFalse(recordBatch.getVariadicBufferCounts().isEmpty());
+ assertEquals(1, recordBatch.getVariadicBufferCounts().size());
+ assertEquals(0, recordBatch.getVariadicBufferCounts().get(0));
+
+ StructVectorLoader vectorLoader = new StructVectorLoader(schema);
+ try (StructVector structVector2 =
vectorLoader.load(finalVectorsAllocator, recordBatch)) {
+ // Improve this after fixing
https://github.com/apache/arrow/issues/41933
+ // assertTrue(VectorEqualsVisitor.vectorEquals(structVector1,
structVector2), "vectors are
+ // not equivalent");
+ assertTrue(
+ VectorEqualsVisitor.vectorEquals(
+ structVector1.getChild("f0"), structVector2.getChild("f0")),
+ "vectors are not equivalent");
+ assertTrue(
+ VectorEqualsVisitor.vectorEquals(
+ structVector1.getChild("f1"), structVector2.getChild("f1")),
+ "vectors are not equivalent");
+ }
+ }
+ }
+ }
}
diff --git a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
index a0dd77f003..6591d1f730 100644
--- a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
+++ b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java
@@ -78,6 +78,8 @@ import org.apache.arrow.vector.ValueVector;
import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ViewVarBinaryVector;
+import org.apache.arrow.vector.ViewVarCharVector;
import org.apache.arrow.vector.ZeroVector;
import org.apache.arrow.vector.compare.VectorEqualsVisitor;
import org.apache.arrow.vector.complex.FixedSizeListVector;
@@ -524,6 +526,79 @@ public class RoundtripTest {
}
}
+ private String generateString(String str, int repetition) {
+ StringBuilder aRepeated = new StringBuilder();
+ for (int i = 0; i < repetition; i++) {
+ aRepeated.append(str);
+ }
+ return aRepeated.toString();
+ }
+
+ @Test
+ public void testViewVector() {
+ // ViewVarCharVector with short strings
+ try (final ViewVarCharVector vector = new ViewVarCharVector("v1",
allocator)) {
+ setVector(
+ vector,
+ "abc".getBytes(StandardCharsets.UTF_8),
+ "def".getBytes(StandardCharsets.UTF_8),
+ null);
+ assertTrue(roundtrip(vector, ViewVarCharVector.class));
+ }
+
+ // ViewVarCharVector with long strings
+ try (final ViewVarCharVector vector = new ViewVarCharVector("v2",
allocator)) {
+ setVector(
+ vector,
+ "01234567890123".getBytes(StandardCharsets.UTF_8),
+ "01234567890123567".getBytes(StandardCharsets.UTF_8),
+ null);
+ assertTrue(roundtrip(vector, ViewVarCharVector.class));
+ }
+
+ // ViewVarBinaryVector with short values
+ try (final ViewVarBinaryVector vector = new ViewVarBinaryVector("v3",
allocator)) {
+ setVector(
+ vector,
+ "abc".getBytes(StandardCharsets.UTF_8),
+ "def".getBytes(StandardCharsets.UTF_8),
+ null);
+ assertTrue(roundtrip(vector, ViewVarBinaryVector.class));
+ }
+
+ // ViewVarBinaryVector with long values
+ try (final ViewVarBinaryVector vector = new ViewVarBinaryVector("v4",
allocator)) {
+ setVector(
+ vector,
+ "01234567890123".getBytes(StandardCharsets.UTF_8),
+ "01234567890123567".getBytes(StandardCharsets.UTF_8),
+ null);
+ assertTrue(roundtrip(vector, ViewVarBinaryVector.class));
+ }
+
+ List<byte[]> byteArrayList = new ArrayList<>();
+ for (int i = 1; i <= 500; i++) {
+ StringBuilder sb = new StringBuilder(i);
+ for (int j = 0; j < i; j++) {
+ sb.append(j); // or any other character
+ }
+ byte[] bytes = sb.toString().getBytes(StandardCharsets.UTF_8);
+ byteArrayList.add(bytes);
+ }
+
+ // ViewVarCharVector with short long strings with multiple data buffers
+ try (final ViewVarCharVector vector = new ViewVarCharVector("v5",
allocator)) {
+ setVector(vector, byteArrayList.toArray(new byte[0][]));
+ assertTrue(roundtrip(vector, ViewVarCharVector.class));
+ }
+
+ // ViewVarBinaryVector with short long strings with multiple data buffers
+ try (final ViewVarBinaryVector vector = new ViewVarBinaryVector("v6",
allocator)) {
+ setVector(vector, byteArrayList.toArray(new byte[0][]));
+ assertTrue(roundtrip(vector, ViewVarBinaryVector.class));
+ }
+ }
+
@Test
public void testVarCharVector() {
try (final VarCharVector vector = new VarCharVector("v", allocator)) {
diff --git a/java/c/src/test/java/org/apache/arrow/c/StreamTest.java
b/java/c/src/test/java/org/apache/arrow/c/StreamTest.java
index 059e30a439..95363fcc32 100644
--- a/java/c/src/test/java/org/apache/arrow/c/StreamTest.java
+++ b/java/c/src/test/java/org/apache/arrow/c/StreamTest.java
@@ -38,6 +38,8 @@ import org.apache.arrow.vector.VarCharVector;
import org.apache.arrow.vector.VectorLoader;
import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.VectorUnloader;
+import org.apache.arrow.vector.ViewVarBinaryVector;
+import org.apache.arrow.vector.ViewVarCharVector;
import org.apache.arrow.vector.compare.Range;
import org.apache.arrow.vector.compare.RangeEqualsVisitor;
import org.apache.arrow.vector.dictionary.Dictionary;
@@ -134,6 +136,90 @@ final class StreamTest {
}
}
+ @Test
+ public void roundtripStringViews() throws Exception {
+ final Schema schema =
+ new Schema(
+ Arrays.asList(
+ Field.nullable("ints", new ArrowType.Int(32, true)),
+ Field.nullable("string_views", new ArrowType.Utf8View())));
+ final List<ArrowRecordBatch> batches = new ArrayList<>();
+ try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema,
allocator)) {
+ final IntVector ints = (IntVector) root.getVector(0);
+ final ViewVarCharVector strs = (ViewVarCharVector) root.getVector(1);
+ VectorUnloader unloader = new VectorUnloader(root);
+
+ root.allocateNew();
+ ints.setSafe(0, 1);
+ ints.setSafe(1, 2);
+ ints.setSafe(2, 4);
+ ints.setSafe(3, 8);
+ strs.setSafe(0, "".getBytes(StandardCharsets.UTF_8));
+ strs.setSafe(1, "a".getBytes(StandardCharsets.UTF_8));
+ strs.setSafe(2, "bc1234567890bc".getBytes(StandardCharsets.UTF_8));
+ strs.setSafe(3, "defg1234567890defg".getBytes(StandardCharsets.UTF_8));
+ root.setRowCount(4);
+ batches.add(unloader.getRecordBatch());
+
+ root.allocateNew();
+ ints.setSafe(0, 1);
+ ints.setNull(1);
+ ints.setSafe(2, 4);
+ ints.setNull(3);
+ strs.setSafe(0, "".getBytes(StandardCharsets.UTF_8));
+ strs.setNull(1);
+ strs.setSafe(2, "bc1234567890bc".getBytes(StandardCharsets.UTF_8));
+ strs.setNull(3);
+ root.setRowCount(4);
+ batches.add(unloader.getRecordBatch());
+ roundtrip(schema, batches);
+ }
+ }
+
+ @Test
+ public void roundtripBinaryViews() throws Exception {
+ final Schema schema =
+ new Schema(
+ Arrays.asList(
+ Field.nullable("ints", new ArrowType.Int(32, true)),
+ Field.nullable("binary_views", new ArrowType.BinaryView())));
+ final List<ArrowRecordBatch> batches = new ArrayList<>();
+ try (final VectorSchemaRoot root = VectorSchemaRoot.create(schema,
allocator)) {
+ final IntVector ints = (IntVector) root.getVector(0);
+ final ViewVarBinaryVector strs = (ViewVarBinaryVector) root.getVector(1);
+ VectorUnloader unloader = new VectorUnloader(root);
+
+ root.allocateNew();
+ ints.setSafe(0, 1);
+ ints.setSafe(1, 2);
+ ints.setSafe(2, 4);
+ ints.setSafe(3, 8);
+ strs.setSafe(0, new byte[0]);
+ strs.setSafe(1, new byte[] {97});
+ strs.setSafe(2, new byte[] {98, 99, 49, 50, 51, 52, 53, 54, 55, 56, 57,
48, 98, 99});
+ strs.setSafe(
+ 3,
+ new byte[] {
+ 100, 101, 102, 103, 49, 50, 51, 52, 53, 54, 55, 56, 57, 48, 100,
101, 102, 103
+ });
+ root.setRowCount(4);
+ batches.add(unloader.getRecordBatch());
+
+ root.allocateNew();
+ ints.setSafe(0, 1);
+ ints.setNull(1);
+ ints.setSafe(2, 4);
+ ints.setNull(3);
+ strs.setSafe(0, new byte[0]);
+ strs.setNull(1);
+ strs.setSafe(2, new byte[] {98, 99, 49, 50, 51, 52, 53, 54, 55, 56, 57,
48, 98, 99});
+ strs.setNull(3);
+ root.setRowCount(4);
+ batches.add(unloader.getRecordBatch());
+ roundtrip(schema, batches);
+ }
+ }
+
@Test
public void roundtripDictionary() throws Exception {
final ArrowType.Int indexType = new ArrowType.Int(32, true);
diff --git a/java/c/src/test/python/integration_tests.py
b/java/c/src/test/python/integration_tests.py
index eb8bb8c4b2..ab2ee1742f 100644
--- a/java/c/src/test/python/integration_tests.py
+++ b/java/c/src/test/python/integration_tests.py
@@ -51,6 +51,13 @@ def setup_jvm():
kwargs = {}
# This will be the default behaviour in jpype 0.8+
kwargs['convertStrings'] = False
+
+ # For debugging purpose please uncomment the following, and include
*jvm_args, before **kwargs
+ # in startJVM function call
+ # jvm_args = [
+ #
"-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005"
+ # ]
+
jpype.startJVM(jpype.getDefaultJVMPath(), "-Djava.class.path=" + jar_path,
**kwargs)
@@ -183,6 +190,55 @@ class TestPythonIntegration(unittest.TestCase):
def test_string_array(self):
self.round_trip_array(lambda: pa.array([None, "a", "bb", "ccc"]))
+ def test_stringview_array(self):
+ # with nulls short strings
+ self.round_trip_array(lambda: pa.array([None, "a", "bb", "c"],
type=pa.string_view()))
+ # with nulls long and strings
+ self.round_trip_array(lambda: pa.array([None, "a", "bb"*10, "c"*13],
type=pa.string_view()))
+ # without nulls short strings
+ self.round_trip_array(lambda: pa.array(["a", "bb", "c"],
type=pa.string_view()))
+ # without nulls long and strings
+ self.round_trip_array(lambda: pa.array(["a", "bb"*10, "c"*13],
type=pa.string_view()))
+ # with multiple data buffers
+ arr1 = pa.array(["a", "bb", "c"], type=pa.string_view())
+ arr2 = pa.array(["b", "ee" * 10, "f" * 20], type=pa.string_view())
+ arr3 = pa.array(["c", "abc" * 20, "efg" * 30], type=pa.string_view())
+ arr4 = pa.array(["d", "abcd" * 100, "efgh" * 200],
type=pa.string_view())
+ self.round_trip_array(lambda: pa.concat_arrays([arr1, arr2, arr3,
arr4]))
+ # empty strings
+ self.round_trip_array(lambda: pa.array(["", "bb" * 10, "c", "", "d",
""], type=pa.string_view()))
+ # null value variations
+ self.round_trip_array(lambda: pa.array(["bb" * 10, None, "", "d",
None], type=pa.string_view()))
+ # empty array
+ self.round_trip_array(lambda: pa.array([], type=pa.string_view()))
+ # all null array
+ self.round_trip_array(lambda: pa.array([None, None, None],
type=pa.string_view()))
+
+ def test_binaryview_array(self):
+ # with nulls short binary values
+ self.round_trip_array(lambda: pa.array([None, bytes([97]), bytes([98,
98]), bytes([99])], type=pa.binary_view()))
+ # with nulls long binary values
+ self.round_trip_array(lambda: pa.array([None, bytes([97]), bytes([98,
98] * 10), bytes([99] * 13)], type=pa.binary_view()))
+ # without nulls short binary values
+ self.round_trip_array(lambda: pa.array([bytes([97]), bytes([98, 98]),
bytes([99])], type=pa.binary_view()))
+ # without nulls long binary values
+ self.round_trip_array(lambda: pa.array([bytes([97]), bytes([98, 98] *
10), bytes([99] * 13)], type=pa.binary_view()))
+ # with multiple data buffers
+ arr1 = pa.array([bytes([97]), bytes([98, 98]), bytes([99])],
type=pa.binary_view())
+ arr2 = pa.array([bytes([98]), bytes([98, 98] * 10), bytes([99] * 13)],
type=pa.binary_view())
+ arr3 = pa.array([bytes([99]), bytes([98, 100] * 100), bytes([99, 100])
* 30], type=pa.binary_view())
+ arr4 = pa.array([bytes([100]), bytes([98, 100, 101] * 200), bytes([98,
99]) * 300], type=pa.binary_view())
+ self.round_trip_array(lambda: pa.concat_arrays([arr1, arr2, arr3,
arr4]))
+ # empty binary values
+ self.round_trip_array(lambda: pa.array([bytes([]), bytes([97, 97]) *
10, bytes([98]), bytes([]), bytes([97]), bytes([])],
+ type=pa.binary_view()))
+ # null value variations
+ self.round_trip_array(lambda: pa.array([bytes([97, 97]) * 10, None,
bytes([]), bytes([99]), None], type=pa.binary_view()))
+ # empty array
+ self.round_trip_array(lambda: pa.array([], type=pa.binary_view()))
+ # all null array
+ self.round_trip_array(lambda: pa.array([None, None, None],
type=pa.binary_view()))
+
def test_decimal_array(self):
data = [
round(decimal.Decimal(722.82), 2),
diff --git
a/java/tools/src/test/java/org/apache/arrow/tools/ArrowFileTestFixtures.java
b/java/tools/src/test/java/org/apache/arrow/tools/ArrowFileTestFixtures.java
index 03c1af3022..d53db2bc54 100644
--- a/java/tools/src/test/java/org/apache/arrow/tools/ArrowFileTestFixtures.java
+++ b/java/tools/src/test/java/org/apache/arrow/tools/ArrowFileTestFixtures.java
@@ -16,6 +16,7 @@
*/
package org.apache.arrow.tools;
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.File;
@@ -23,17 +24,24 @@ import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.IntVector;
import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.ViewVarBinaryVector;
+import org.apache.arrow.vector.ViewVarCharVector;
import org.apache.arrow.vector.complex.NonNullableStructVector;
import org.apache.arrow.vector.complex.impl.ComplexWriterImpl;
import org.apache.arrow.vector.complex.writer.BaseWriter.ComplexWriter;
import org.apache.arrow.vector.complex.writer.BaseWriter.StructWriter;
import org.apache.arrow.vector.complex.writer.BigIntWriter;
import org.apache.arrow.vector.complex.writer.IntWriter;
+import org.apache.arrow.vector.complex.writer.ViewVarBinaryWriter;
+import org.apache.arrow.vector.complex.writer.ViewVarCharWriter;
import org.apache.arrow.vector.ipc.ArrowFileReader;
import org.apache.arrow.vector.ipc.ArrowFileWriter;
import org.apache.arrow.vector.ipc.message.ArrowBlock;
+import org.apache.arrow.vector.util.Text;
public class ArrowFileTestFixtures {
static final int COUNT = 10;
@@ -52,6 +60,44 @@ public class ArrowFileTestFixtures {
writer.setValueCount(count);
}
+ private static String generateString(int length) {
+ StringBuilder stringBuilder = new StringBuilder(length);
+
+ for (int i = 0; i < length; i++) {
+ stringBuilder.append(i);
+ }
+
+ return stringBuilder.toString();
+ }
+
+ private static byte[] generateBytes(int length) {
+ byte[] bytes = new byte[length];
+ for (int i = 0; i < length; i++) {
+ bytes[i] = (byte) i;
+ }
+ return bytes;
+ }
+
+ static void writeVariableWidthViewData(int count, NonNullableStructVector
parent) {
+ ComplexWriter writer = new ComplexWriterImpl("root", parent);
+ StructWriter rootWriter = writer.rootAsStruct();
+ ViewVarCharWriter viewVarCharWriter =
rootWriter.viewVarChar("viewVarChar");
+ ViewVarBinaryWriter viewVarBinaryWriter =
rootWriter.viewVarBinary("viewVarBinary");
+ IntWriter intWriter = rootWriter.integer("int");
+ BigIntWriter bigIntWriter = rootWriter.bigInt("bigInt");
+ for (int i = 0; i < count; i++) {
+ viewVarCharWriter.setPosition(i);
+ viewVarCharWriter.writeViewVarChar(generateString(i));
+ viewVarBinaryWriter.setPosition(i);
+ viewVarBinaryWriter.writeViewVarBinary(generateBytes(i));
+ intWriter.setPosition(i);
+ intWriter.writeInt(i);
+ bigIntWriter.setPosition(i);
+ bigIntWriter.writeBigInt(i);
+ }
+ writer.setValueCount(count);
+ }
+
static void validateOutput(File testOutFile, BufferAllocator allocator)
throws Exception {
// read
try (BufferAllocator readerAllocator =
@@ -69,6 +115,24 @@ public class ArrowFileTestFixtures {
}
}
+ static void validateVariadicOutput(File testOutFile, BufferAllocator
allocator, int count)
+ throws Exception {
+ // read
+ try (BufferAllocator readerAllocator =
+ allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
+ FileInputStream fileInputStream = new FileInputStream(testOutFile);
+ ArrowFileReader arrowReader =
+ new ArrowFileReader(fileInputStream.getChannel(),
readerAllocator)) {
+ VectorSchemaRoot root = arrowReader.getVectorSchemaRoot();
+ for (ArrowBlock rbBlock : arrowReader.getRecordBlocks()) {
+ if (!arrowReader.loadRecordBatch(rbBlock)) {
+ throw new IOException("Expected to read record batch");
+ }
+ validateVariadicContent(count, root);
+ }
+ }
+ }
+
static void validateContent(int count, VectorSchemaRoot root) {
assertEquals(count, root.getRowCount());
for (int i = 0; i < count; i++) {
@@ -77,6 +141,20 @@ public class ArrowFileTestFixtures {
}
}
+ static void validateVariadicContent(int count, VectorSchemaRoot root) {
+ assertEquals(count, root.getRowCount());
+ ViewVarCharVector viewVarCharVector = (ViewVarCharVector)
root.getVector("viewVarChar");
+ ViewVarBinaryVector viewVarBinaryVector = (ViewVarBinaryVector)
root.getVector("viewVarBinary");
+ IntVector intVector = (IntVector) root.getVector("int");
+ BigIntVector bigIntVector = (BigIntVector) root.getVector("bigInt");
+ for (int i = 0; i < count; i++) {
+ assertEquals(new Text(generateString(i)),
viewVarCharVector.getObject(i));
+ assertArrayEquals(generateBytes(i), viewVarBinaryVector.get(i));
+ assertEquals(i, intVector.getObject(i));
+ assertEquals(Long.valueOf(i), bigIntVector.getObject(i));
+ }
+ }
+
static void write(FieldVector parent, File file) throws IOException {
VectorSchemaRoot root = new VectorSchemaRoot(parent);
try (FileOutputStream fileOutputStream = new FileOutputStream(file);
@@ -94,4 +172,14 @@ public class ArrowFileTestFixtures {
write(parent.getChild("root"), testInFile);
}
}
+
+ static void writeVariableWidthViewInput(File testInFile, BufferAllocator
allocator, int count)
+ throws IOException {
+ try (BufferAllocator vectorAllocator =
+ allocator.newChildAllocator("original view vectors", 0,
Integer.MAX_VALUE);
+ NonNullableStructVector parent =
NonNullableStructVector.empty("parent", vectorAllocator)) {
+ writeVariableWidthViewData(count, parent);
+ write(parent.getChild("root"), testInFile);
+ }
+ }
}
diff --git
a/java/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java
b/java/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java
index e7f59b628b..28f9a9010f 100644
--- a/java/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java
+++ b/java/tools/src/test/java/org/apache/arrow/tools/TestIntegration.java
@@ -17,9 +17,11 @@
package org.apache.arrow.tools;
import static org.apache.arrow.tools.ArrowFileTestFixtures.validateOutput;
+import static
org.apache.arrow.tools.ArrowFileTestFixtures.validateVariadicOutput;
import static org.apache.arrow.tools.ArrowFileTestFixtures.write;
import static org.apache.arrow.tools.ArrowFileTestFixtures.writeData;
import static org.apache.arrow.tools.ArrowFileTestFixtures.writeInput;
+import static
org.apache.arrow.tools.ArrowFileTestFixtures.writeVariableWidthViewInput;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -345,4 +347,58 @@ public class TestIntegration {
assertTrue(e.getMessage().contains("Different values in column"),
e.getMessage());
assertTrue(e.getMessage().contains("999"), e.getMessage());
}
+
+ @Test
+ public void testValidateVariableWidthView() throws Exception {
+ final int valueCount = 256;
+ final int multiplier = 6;
+
+ for (int i = 1; i < multiplier; i++) {
+ File testInFile = new File(testFolder, "testIn.arrow");
+ File testJSONFile = new File(testFolder, "testOut.json");
+ testJSONFile.delete();
+ File testOutFile = new File(testFolder, "testOut.arrow");
+ testOutFile.delete();
+
+ writeVariableWidthViewInput(testInFile, allocator, multiplier *
valueCount);
+
+ Integration integration = new Integration();
+
+ // convert it to json
+ String[] args1 = {
+ "-arrow",
+ testInFile.getAbsolutePath(),
+ "-json",
+ testJSONFile.getAbsolutePath(),
+ "-command",
+ Command.ARROW_TO_JSON.name()
+ };
+ integration.run(args1);
+
+ // convert back to arrow
+ String[] args2 = {
+ "-arrow",
+ testOutFile.getAbsolutePath(),
+ "-json",
+ testJSONFile.getAbsolutePath(),
+ "-command",
+ Command.JSON_TO_ARROW.name()
+ };
+ integration.run(args2);
+
+ // check it is the same
+ validateVariadicOutput(testOutFile, allocator, multiplier * valueCount);
+
+ // validate arrow against json
+ String[] args3 = {
+ "-arrow",
+ testInFile.getAbsolutePath(),
+ "-json",
+ testJSONFile.getAbsolutePath(),
+ "-command",
+ Command.VALIDATE.name()
+ };
+ integration.run(args3);
+ }
+ }
}
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java
b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java
index 64630c3ef3..f0c84bd410 100644
---
a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java
+++
b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java
@@ -1373,7 +1373,7 @@ public abstract class BaseVariableWidthViewVector extends
BaseValueVector
// this is helpful in case of overwriting the value
viewBuffer.setZero(writePosition, ELEMENT_SIZE);
- if (value.length <= INLINE_SIZE) {
+ if (length <= INLINE_SIZE) {
// allocate inline buffer
// set length
viewBuffer.setInt(writePosition, length);
@@ -1668,4 +1668,51 @@ public abstract class BaseVariableWidthViewVector
extends BaseValueVector
public <OUT, IN> OUT accept(VectorVisitor<OUT, IN> visitor, IN value) {
return visitor.visit(this, value);
}
+
+ /**
+ * Retrieves the export buffer count for the C Data Interface.
+ *
+ * <p>For Variadic types, an additional buffer is kept to store the size of
each variadic buffer
+ * since that information cannot be retrieved in the C Data import.
+ *
+ * <p>In the C Data Interface, the binary view import expects at least three
buffers. The variadic
+ * size buffer is merely allocated to determine the number of elements per
each variadic buffer,
+ * and it is not part of the imported data.
+ *
+ * <p>The count is set to 3 + dataBuffers.size(). Three is formed by
validity, view, and variadic
+ * size buffer.
+ *
+ * @return the number of buffers to be exported
+ */
+ @Override
+ public int getExportedCDataBufferCount() {
+ return 3 + dataBuffers.size();
+ }
+
+ /**
+ * Get the data buffer of the vector. Note that an additional buffer is
appended to store the size
+ * of each variadic buffer's size.
+ *
+ * @param buffers list of buffers to be exported
+ * @param buffersPtr buffer to store the pointers to the exported buffers
+ * @param nullValue null value
+ */
+ @Override
+ public void exportCDataBuffers(List<ArrowBuf> buffers, ArrowBuf buffersPtr,
long nullValue) {
+ exportBuffer(validityBuffer, buffers, buffersPtr, nullValue, true);
+ exportBuffer(viewBuffer, buffers, buffersPtr, nullValue, true);
+
+ // allocating additional space to keep the number of variadic buffers
+ ArrowBuf variadicSizeBuffer = allocator.buffer((long) Long.BYTES *
dataBuffers.size());
+ // variadicSizeBuffer.setZero(0, variadicSizeBuffer.capacity());
+ // export data buffers
+ for (int i = 0; i < dataBuffers.size(); i++) {
+ ArrowBuf dataBuf = dataBuffers.get(i);
+ // calculate sizes for variadic size buffer
+ variadicSizeBuffer.setLong((long) i * Long.BYTES, dataBuf.capacity());
+ exportBuffer(dataBuf, buffers, buffersPtr, nullValue, true);
+ }
+ // export variadic size buffer
+ exportBuffer(variadicSizeBuffer, buffers, buffersPtr, nullValue, false);
+ }
}
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java
b/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java
index 6ba0392776..d4248c4ef9 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java
@@ -35,7 +35,9 @@ public class BufferLayout {
OFFSET("OFFSET"),
VALIDITY("VALIDITY"),
TYPE("TYPE_ID"),
- SIZE("SIZE");
+ SIZE("SIZE"),
+ VIEWS("VIEWS"),
+ VARIADIC_DATA_BUFFERS("VARIADIC_DATA_BUFFERS");
private final String name;
@@ -60,6 +62,7 @@ public class BufferLayout {
private static final BufferLayout VALUES_16 = new
BufferLayout(BufferType.DATA, 16);
private static final BufferLayout VALUES_8 = new
BufferLayout(BufferType.DATA, 8);
private static final BufferLayout SIZE_BUFFER = new
BufferLayout(BufferType.SIZE, 32);
+ private static final BufferLayout VIEW_BUFFER = new
BufferLayout(BufferType.VIEWS, 16);
public static BufferLayout typeBuffer() {
return TYPE_BUFFER;
@@ -112,6 +115,10 @@ public class BufferLayout {
return dataBuffer(8);
}
+ public static BufferLayout viewVector() {
+ return VIEW_BUFFER;
+ }
+
private final short typeBitWidth;
private final BufferType type;
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java
b/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java
index 9163ece46d..e58f7bba84 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/FieldVector.java
@@ -56,6 +56,15 @@ public interface FieldVector extends ValueVector {
*/
List<ArrowBuf> getFieldBuffers();
+ /**
+ * Retrieves the export buffer count for the C Data Interface.
+ *
+ * @return the number of variadic buffers
+ */
+ default int getExportedCDataBufferCount() {
+ return getFieldBuffers().size();
+ }
+
/**
* Export a given buffer and its memory address into a list of buffers and a
pointer to the list
* of buffers.
diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java
b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java
index 9bb25f7aef..b8535532ea 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java
@@ -210,8 +210,8 @@ public class TypeLayout {
}
private TypeLayout newVariableWidthViewTypeLayout() {
- return newPrimitiveTypeLayout(
- BufferLayout.validityVector(), BufferLayout.byteVector());
+ return new TypeLayout(
+ false, BufferLayout.validityVector(),
BufferLayout.viewVector());
}
private TypeLayout newLargeVariableWidthTypeLayout() {
@@ -232,7 +232,7 @@ public class TypeLayout {
@Override
public TypeLayout visit(Null type) {
- return new TypeLayout(Collections.<BufferLayout>emptyList());
+ return new TypeLayout(Collections.emptyList());
}
@Override
@@ -433,13 +433,30 @@ public class TypeLayout {
private final List<BufferLayout> bufferLayouts;
- public TypeLayout(List<BufferLayout> bufferLayouts) {
+ private final boolean isFixedBufferCount;
+
+ /**
+ * Constructs a new {@link TypeLayout}.
+ *
+ * @param bufferLayouts the individual {@linkplain BufferLayout}s for the
given type
+ * @param isFixedBufferCount whether the number of buffers is fixed
+ */
+ public TypeLayout(List<BufferLayout> bufferLayouts, boolean
isFixedBufferCount) {
super();
this.bufferLayouts = Preconditions.checkNotNull(bufferLayouts);
+ this.isFixedBufferCount = isFixedBufferCount;
+ }
+
+ public TypeLayout(List<BufferLayout> bufferLayouts) {
+ this(bufferLayouts, true);
}
public TypeLayout(BufferLayout... bufferLayouts) {
- this(asList(bufferLayouts));
+ this(asList(bufferLayouts), true);
+ }
+
+ public TypeLayout(boolean isFixedBufferCount, BufferLayout... bufferLayouts)
{
+ this(asList(bufferLayouts), isFixedBufferCount);
}
/** Returns the individual {@linkplain BufferLayout}s for the given type. */
@@ -459,6 +476,15 @@ public class TypeLayout {
return types;
}
+ /**
+ * Determines whether the buffer count is fixed for the given type.
+ *
+ * @return true if the buffer count is fixed, false otherwise
+ */
+ public boolean isFixedBufferCount() {
+ return isFixedBufferCount;
+ }
+
@Override
public String toString() {
return bufferLayouts.toString();
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java
b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java
index 05d4f2e212..342f210b82 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java
@@ -111,7 +111,7 @@ public class VectorUnloader {
int expectedBufferCount =
(int) (TypeLayout.getTypeBufferCount(vector.getField().getType()) +
variadicBufferCount);
// only update variadicBufferCounts for vectors that have variadic buffers
- if (variadicBufferCount > 0) {
+ if (vector instanceof BaseVariableWidthViewVector) {
variadicBufferCounts.add(variadicBufferCount);
}
if (fieldBuffers.size() != expectedBufferCount) {
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java
b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java
index 72ec517e50..604f18b56b 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java
@@ -24,9 +24,12 @@ import static
org.apache.arrow.vector.BufferLayout.BufferType.DATA;
import static org.apache.arrow.vector.BufferLayout.BufferType.OFFSET;
import static org.apache.arrow.vector.BufferLayout.BufferType.TYPE;
import static org.apache.arrow.vector.BufferLayout.BufferType.VALIDITY;
+import static
org.apache.arrow.vector.BufferLayout.BufferType.VARIADIC_DATA_BUFFERS;
+import static org.apache.arrow.vector.BufferLayout.BufferType.VIEWS;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonParser.Feature;
import com.fasterxml.jackson.core.JsonToken;
import com.fasterxml.jackson.databind.MapperFeature;
import com.fasterxml.jackson.databind.MappingJsonFactory;
@@ -35,9 +38,10 @@ import java.io.File;
import java.io.IOException;
import java.math.BigDecimal;
import java.math.BigInteger;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
@@ -47,6 +51,7 @@ import java.util.Set;
import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.BufferAllocator;
import org.apache.arrow.util.Preconditions;
+import org.apache.arrow.vector.BaseVariableWidthViewVector;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.BitVectorHelper;
import org.apache.arrow.vector.BufferLayout.BufferType;
@@ -65,8 +70,9 @@ import org.apache.arrow.vector.VectorSchemaRoot;
import org.apache.arrow.vector.dictionary.Dictionary;
import org.apache.arrow.vector.dictionary.DictionaryProvider;
import org.apache.arrow.vector.ipc.message.ArrowFieldNode;
-import org.apache.arrow.vector.types.Types;
+import org.apache.arrow.vector.types.Types.MinorType;
import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.ArrowType.Union;
import org.apache.arrow.vector.types.pojo.Field;
import org.apache.arrow.vector.types.pojo.Schema;
import org.apache.arrow.vector.util.DecimalUtility;
@@ -105,7 +111,7 @@ public class JsonFileReader implements AutoCloseable,
DictionaryProvider {
.configure(MapperFeature.ACCEPT_CASE_INSENSITIVE_ENUMS, true));
this.parser = jsonFactory.createParser(inputFile);
// Allow reading NaN for floating point values
- this.parser.configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, true);
+ this.parser.configure(Feature.ALLOW_NON_NUMERIC_NUMBERS, true);
}
@Override
@@ -268,7 +274,106 @@ public class JsonFileReader implements AutoCloseable,
DictionaryProvider {
}
}
+ /**
+ * Read all the variadic data buffers from the parser.
+ *
+ * @param allocator BufferAllocator
+ * @param variadicBuffersCount Number of variadic buffers
+ * @return List of ArrowBuf
+ * @throws IOException throws IOException in a failure
+ */
+ List<ArrowBuf> readVariadicBuffers(BufferAllocator allocator, int
variadicBuffersCount)
+ throws IOException {
+ readToken(START_ARRAY);
+ ArrayList<ArrowBuf> dataBuffers = new ArrayList<>(variadicBuffersCount);
+ for (int i = 0; i < variadicBuffersCount; i++) {
+ parser.nextToken();
+ final byte[] value;
+
+ String variadicStr = parser.readValueAs(String.class);
+ if (variadicStr == null) {
+ value = new byte[0];
+ } else {
+ value = decodeHexSafe(variadicStr);
+ }
+
+ ArrowBuf buf = allocator.buffer(value.length);
+ buf.writeBytes(value);
+ dataBuffers.add(buf);
+ }
+ readToken(END_ARRAY);
+ return dataBuffers;
+ }
+
+ private ArrowBuf readViewBuffers(
+ BufferAllocator allocator, int count, List<Integer>
variadicBufferIndices, MinorType type)
+ throws IOException {
+ readToken(START_ARRAY);
+ ArrayList<byte[]> values = new ArrayList<>(count);
+ long bufferSize = 0L;
+ for (int i = 0; i < count; i++) {
+ readToken(START_OBJECT);
+ final int length = readNextField("SIZE", Integer.class);
+ byte[] value;
+ if (length > BaseVariableWidthViewVector.INLINE_SIZE) {
+ // PREFIX_HEX
+ final byte[] prefix = decodeHexSafe(readNextField("PREFIX_HEX",
String.class));
+ // BUFFER_INDEX
+ final int bufferIndex = readNextField("BUFFER_INDEX", Integer.class);
+ if (variadicBufferIndices.isEmpty()) {
+ variadicBufferIndices.add(bufferIndex);
+ } else {
+ int lastBufferIndex =
variadicBufferIndices.get(variadicBufferIndices.size() - 1);
+ if (lastBufferIndex != bufferIndex) {
+ variadicBufferIndices.add(bufferIndex);
+ }
+ }
+
+ // OFFSET
+ final int offset = readNextField("OFFSET", Integer.class);
+ ByteBuffer buffer =
+ ByteBuffer.allocate(BaseVariableWidthViewVector.ELEMENT_SIZE)
+ .order(ByteOrder.LITTLE_ENDIAN); // Allocate a ByteBuffer of
size 16 bytes
+ buffer.putInt(length); // Write 'length' to bytes 0-3
+ buffer.put(prefix); // Write 'prefix' to bytes 4-7
+ buffer.putInt(bufferIndex); // Write 'bufferIndex' to bytes 8-11
+ buffer.putInt(offset); // Write 'offset' to bytes 12-15
+ value = buffer.array(); // Convert the ByteBuffer to a byte array
+ } else {
+ // in-line
+ ByteBuffer buffer =
+ ByteBuffer.allocate(BaseVariableWidthViewVector.ELEMENT_SIZE)
+ .order(ByteOrder.LITTLE_ENDIAN); // Allocate a ByteBuffer of
size 16 bytes
+ buffer.putInt(length); // Write 'length' to bytes 0-3
+ // INLINE
+ if (type == MinorType.VIEWVARCHAR) {
+ buffer.put(readNextField("INLINED",
String.class).getBytes(StandardCharsets.UTF_8));
+ } else {
+ String inlined = readNextField("INLINED", String.class);
+ if (inlined == null) {
+ buffer.put(new byte[length]);
+ } else {
+ buffer.put(decodeHexSafe(inlined));
+ }
+ }
+ value = buffer.array(); // Convert the ByteBuffer to a byte array
+ }
+ values.add(value);
+ bufferSize += value.length;
+ readToken(END_OBJECT);
+ }
+
+ ArrowBuf buf = allocator.buffer(bufferSize);
+
+ for (byte[] value : values) {
+ buf.writeBytes(value);
+ }
+ readToken(END_ARRAY);
+ return buf;
+ }
+
private class BufferHelper {
+
BufferReader BIT =
new BufferReader() {
@Override
@@ -605,21 +710,24 @@ public class JsonFileReader implements AutoCloseable,
DictionaryProvider {
};
}
- private ArrowBuf readIntoBuffer(
- BufferAllocator allocator, BufferType bufferType, Types.MinorType type,
int count)
+ private List<ArrowBuf> readIntoBuffer(
+ BufferAllocator allocator,
+ BufferType bufferType,
+ MinorType type,
+ int count,
+ List<Integer> variadicBufferIndices)
throws IOException {
ArrowBuf buf;
BufferHelper helper = new BufferHelper();
-
- BufferReader reader = null;
+ BufferReader reader;
if (bufferType.equals(VALIDITY)) {
reader = helper.BIT;
} else if (bufferType.equals(OFFSET)) {
- if (type == Types.MinorType.LARGELIST
- || type == Types.MinorType.LARGEVARCHAR
- || type == Types.MinorType.LARGEVARBINARY) {
+ if (type == MinorType.LARGELIST
+ || type == MinorType.LARGEVARCHAR
+ || type == MinorType.LARGEVARBINARY) {
reader = helper.INT8;
} else {
reader = helper.INT4;
@@ -721,25 +829,32 @@ public class JsonFileReader implements AutoCloseable,
DictionaryProvider {
default:
throw new UnsupportedOperationException("Cannot read array of type "
+ type);
}
+ } else if (bufferType.equals(VIEWS)) {
+ return Collections.singletonList(
+ readViewBuffers(allocator, count, variadicBufferIndices, type));
+ } else if (bufferType.equals(VARIADIC_DATA_BUFFERS)) {
+ return readVariadicBuffers(allocator, variadicBufferIndices.size());
} else {
throw new InvalidArrowFileException("Unrecognized buffer type " +
bufferType);
}
buf = reader.readBuffer(allocator, count);
-
Preconditions.checkNotNull(buf);
- return buf;
+ return Collections.singletonList(buf);
}
- private void readFromJsonIntoVector(Field field, FieldVector vector)
- throws JsonParseException, IOException {
+ private void readFromJsonIntoVector(Field field, FieldVector vector) throws
IOException {
ArrowType type = field.getType();
- // TODO: https://github.com/apache/arrow/issues/41733
TypeLayout typeLayout = TypeLayout.getTypeLayout(type);
List<BufferType> vectorTypes = typeLayout.getBufferTypes();
- ArrowBuf[] vectorBuffers = new ArrowBuf[vectorTypes.size()];
+ List<ArrowBuf> vectorBuffers = new ArrayList<>(vectorTypes.size());
+ List<Integer> variadicBufferIndices = new ArrayList<>();
+
+ if (!typeLayout.isFixedBufferCount()) {
+ vectorTypes.add(VARIADIC_DATA_BUFFERS);
+ }
/*
- * The order of inner buffers is :
+ * The order of inner buffers is:
* Fixed width vector:
* -- validity buffer
* -- data buffer
@@ -766,29 +881,35 @@ public class JsonFileReader implements AutoCloseable,
DictionaryProvider {
* be doing loadFieldBuffers.
*/
int valueCount = readNextField("count", Integer.class);
+
vector.setInitialCapacity(valueCount);
for (int v = 0; v < vectorTypes.size(); v++) {
BufferType bufferType = vectorTypes.get(v);
nextFieldIs(bufferType.getName());
int innerBufferValueCount = valueCount;
- if (bufferType.equals(OFFSET) && !(type instanceof ArrowType.Union)) {
+ if (bufferType.equals(OFFSET) && !(type instanceof Union)) {
/* offset buffer has 1 additional value capacity except for dense
unions */
innerBufferValueCount = valueCount + 1;
}
- vectorBuffers[v] =
- readIntoBuffer(allocator, bufferType, vector.getMinorType(),
innerBufferValueCount);
+ vectorBuffers.addAll(
+ readIntoBuffer(
+ allocator,
+ bufferType,
+ vector.getMinorType(),
+ innerBufferValueCount,
+ variadicBufferIndices));
}
int nullCount = 0;
if (type instanceof ArrowType.Null) {
nullCount = valueCount;
- } else if (!(type instanceof ArrowType.Union)) {
- nullCount = BitVectorHelper.getNullCount(vectorBuffers[0], valueCount);
+ } else if (!(type instanceof Union)) {
+ nullCount = BitVectorHelper.getNullCount(vectorBuffers.get(0),
valueCount);
}
final ArrowFieldNode fieldNode = new ArrowFieldNode(valueCount,
nullCount);
- vector.loadFieldBuffers(fieldNode, Arrays.asList(vectorBuffers));
+ vector.loadFieldBuffers(fieldNode, vectorBuffers);
/* read child vectors (if any) */
List<Field> fields = field.getChildren();
@@ -811,6 +932,7 @@ public class JsonFileReader implements AutoCloseable,
DictionaryProvider {
readToken(END_ARRAY);
}
}
+
readToken(END_OBJECT);
for (ArrowBuf buffer : vectorBuffers) {
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java
b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java
index 8d6ac759d7..d1ee890f5c 100644
--- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java
+++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java
@@ -27,6 +27,7 @@ import java.io.File;
import java.io.IOException;
import java.math.BigDecimal;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
@@ -35,6 +36,7 @@ import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.util.Preconditions;
import org.apache.arrow.vector.BaseLargeVariableWidthVector;
import org.apache.arrow.vector.BaseVariableWidthVector;
+import org.apache.arrow.vector.BaseVariableWidthViewVector;
import org.apache.arrow.vector.BigIntVector;
import org.apache.arrow.vector.BitVectorHelper;
import org.apache.arrow.vector.BufferLayout.BufferType;
@@ -198,16 +200,22 @@ public class JsonFileWriter implements AutoCloseable {
}
private void writeFromVectorIntoJson(Field field, FieldVector vector) throws
IOException {
- // TODO: https://github.com/apache/arrow/issues/41733
- List<BufferType> vectorTypes =
TypeLayout.getTypeLayout(field.getType()).getBufferTypes();
+ TypeLayout typeLayout = TypeLayout.getTypeLayout(field.getType());
+ List<BufferType> vectorTypes = typeLayout.getBufferTypes();
List<ArrowBuf> vectorBuffers = vector.getFieldBuffers();
- if (vectorTypes.size() != vectorBuffers.size()) {
- throw new IllegalArgumentException(
- "vector types and inner vector buffers are not the same size: "
- + vectorTypes.size()
- + " != "
- + vectorBuffers.size());
+
+ if (typeLayout.isFixedBufferCount()) {
+ if (vectorTypes.size() != vectorBuffers.size()) {
+ throw new IllegalArgumentException(
+ "vector types and inner vector buffers are not the same size: "
+ + vectorTypes.size()
+ + " != "
+ + vectorBuffers.size());
+ }
+ } else {
+ vectorTypes.add(VARIADIC_DATA_BUFFERS);
}
+
generator.writeStartObject();
{
generator.writeObjectField("name", field.getName());
@@ -217,6 +225,8 @@ public class JsonFileWriter implements AutoCloseable {
for (int v = 0; v < vectorTypes.size(); v++) {
BufferType bufferType = vectorTypes.get(v);
ArrowBuf vectorBuffer = vectorBuffers.get(v);
+ // Note that in JSON format we cannot have VARIADIC_DATA_BUFFERS
repeated,
+ // thus the values are only written to a single entity.
generator.writeArrayFieldStart(bufferType.getName());
final int bufferValueCount =
(bufferType.equals(OFFSET) && vector.getMinorType() !=
MinorType.DENSEUNION)
@@ -227,6 +237,25 @@ public class JsonFileWriter implements AutoCloseable {
&& (vector.getMinorType() == MinorType.VARCHAR
|| vector.getMinorType() == MinorType.VARBINARY)) {
writeValueToGenerator(bufferType, vectorBuffer,
vectorBuffers.get(v - 1), vector, i);
+ } else if (bufferType.equals(VIEWS)
+ && (vector.getMinorType() == MinorType.VIEWVARCHAR
+ || vector.getMinorType() == MinorType.VIEWVARBINARY)) {
+ // writing views
+ ArrowBuf viewBuffer = vectorBuffers.get(1);
+ List<ArrowBuf> dataBuffers = vectorBuffers.subList(v + 1,
vectorBuffers.size());
+ writeValueToViewGenerator(bufferType, viewBuffer, dataBuffers,
vector, i);
+ } else if (bufferType.equals(VARIADIC_DATA_BUFFERS)
+ && (vector.getMinorType() == MinorType.VIEWVARCHAR
+ || vector.getMinorType() == MinorType.VIEWVARBINARY)) {
+ ArrowBuf viewBuffer = vectorBuffers.get(1); // check if this is v-1
+ List<ArrowBuf> dataBuffers = vectorBuffers.subList(v,
vectorBuffers.size());
+ if (!dataBuffers.isEmpty()) {
+ writeValueToDataBufferGenerator(bufferType, viewBuffer,
dataBuffers, vector);
+ // The variadic buffers are written at once and doesn't require
iterating for
+ // each index.
+ // So, break the loop.
+ break;
+ }
} else if (bufferType.equals(OFFSET)
&& vector.getValueCount() == 0
&& (vector.getMinorType() == MinorType.LIST
@@ -254,6 +283,7 @@ public class JsonFileWriter implements AutoCloseable {
}
generator.writeEndArray();
}
+
List<Field> fields = field.getChildren();
List<FieldVector> children = vector.getChildrenFromFields();
if (fields.size() != children.size()) {
@@ -276,6 +306,102 @@ public class JsonFileWriter implements AutoCloseable {
generator.writeEndObject();
}
+ /**
+ * Get data of a view by index.
+ *
+ * @param viewBuffer view buffer
+ * @param dataBuffers data buffers
+ * @param index index of the view
+ * @return byte array of the view
+ */
+ private byte[] getView(final ArrowBuf viewBuffer, final List<ArrowBuf>
dataBuffers, int index) {
+ final int dataLength =
+ viewBuffer.getInt((long) index *
BaseVariableWidthViewVector.ELEMENT_SIZE);
+ byte[] result = new byte[dataLength];
+
+ final int inlineSize = BaseVariableWidthViewVector.INLINE_SIZE;
+ final int elementSize = BaseVariableWidthViewVector.ELEMENT_SIZE;
+ final int lengthWidth = BaseVariableWidthViewVector.LENGTH_WIDTH;
+ final int prefixWidth = BaseVariableWidthViewVector.PREFIX_WIDTH;
+ final int bufIndexWidth = BaseVariableWidthViewVector.BUF_INDEX_WIDTH;
+
+ if (dataLength > inlineSize) {
+ // data is in the data buffer
+ // get buffer index
+ final int bufferIndex =
+ viewBuffer.getInt(((long) index * elementSize) + lengthWidth +
prefixWidth);
+ // get data offset
+ final int dataOffset =
+ viewBuffer.getInt(
+ ((long) index * elementSize) + lengthWidth + prefixWidth +
bufIndexWidth);
+ dataBuffers.get(bufferIndex).getBytes(dataOffset, result, 0, dataLength);
+ } else {
+ // data is in the view buffer
+ viewBuffer.getBytes((long) index * elementSize + lengthWidth, result, 0,
dataLength);
+ }
+ return result;
+ }
+
+ private void writeValueToViewGenerator(
+ BufferType bufferType,
+ ArrowBuf viewBuffer,
+ List<ArrowBuf> dataBuffers,
+ FieldVector vector,
+ final int index)
+ throws IOException {
+ Preconditions.checkNotNull(viewBuffer);
+ byte[] b = getView(viewBuffer, dataBuffers, index);
+ final int elementSize = BaseVariableWidthViewVector.ELEMENT_SIZE;
+ final int lengthWidth = BaseVariableWidthViewVector.LENGTH_WIDTH;
+ final int prefixWidth = BaseVariableWidthViewVector.PREFIX_WIDTH;
+ final int bufIndexWidth = BaseVariableWidthViewVector.BUF_INDEX_WIDTH;
+ final int length = viewBuffer.getInt((long) index * elementSize);
+ generator.writeStartObject();
+ generator.writeFieldName("SIZE");
+ generator.writeObject(length);
+ if (length > 12) {
+ byte[] prefix = Arrays.copyOfRange(b, 0, prefixWidth);
+ final int bufferIndex =
+ viewBuffer.getInt(((long) index * elementSize) + lengthWidth +
prefixWidth);
+ // get data offset
+ final int dataOffset =
+ viewBuffer.getInt(
+ ((long) index * elementSize) + lengthWidth + prefixWidth +
bufIndexWidth);
+ generator.writeFieldName("PREFIX_HEX");
+ generator.writeString(Hex.encodeHexString(prefix));
+ generator.writeFieldName("BUFFER_INDEX");
+ generator.writeObject(bufferIndex);
+ generator.writeFieldName("OFFSET");
+ generator.writeObject(dataOffset);
+ } else {
+ generator.writeFieldName("INLINED");
+ if (vector.getMinorType() == MinorType.VIEWVARCHAR) {
+ generator.writeString(new String(b, "UTF-8"));
+ } else {
+ generator.writeString(Hex.encodeHexString(b));
+ }
+ }
+ generator.writeEndObject();
+ }
+
+ private void writeValueToDataBufferGenerator(
+ BufferType bufferType, ArrowBuf viewBuffer, List<ArrowBuf> dataBuffers,
FieldVector vector)
+ throws IOException {
+ if (bufferType.equals(VARIADIC_DATA_BUFFERS)) {
+ Preconditions.checkNotNull(viewBuffer);
+ Preconditions.checkArgument(!dataBuffers.isEmpty());
+
+ for (int i = 0; i < dataBuffers.size(); i++) {
+ ArrowBuf dataBuf = dataBuffers.get(i);
+ byte[] result = new byte[(int) dataBuf.writerIndex()];
+ dataBuf.getBytes(0, result);
+ if (result != null) {
+ generator.writeString(Hex.encodeHexString(result));
+ }
+ }
+ }
+ }
+
private void writeValueToGenerator(
BufferType bufferType,
ArrowBuf buffer,
diff --git
a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java
b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java
index 406d0a5502..36f6ea449b 100644
---
a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java
+++
b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java
@@ -304,6 +304,7 @@ public class MessageSerializer {
+ layout.getSize());
}
}
+
out.align();
return out.getCurrentPosition() - bufferStart;
}
@@ -435,13 +436,23 @@ public class MessageSerializer {
: new ArrowBodyCompression(
recordBatchFB.compression().codec(),
recordBatchFB.compression().method());
+ List<Long> variadicBufferCounts = new ArrayList<>();
+ for (int i = 0; i < recordBatchFB.variadicBufferCountsLength(); i++) {
+ variadicBufferCounts.add(recordBatchFB.variadicBufferCounts(i));
+ }
+
if ((int) recordBatchFB.length() != recordBatchFB.length()) {
throw new IOException(
"Cannot currently deserialize record batches with more than INT_MAX
records.");
}
ArrowRecordBatch arrowRecordBatch =
new ArrowRecordBatch(
- checkedCastToInt(recordBatchFB.length()), nodes, buffers,
bodyCompression);
+ checkedCastToInt(recordBatchFB.length()),
+ nodes,
+ buffers,
+ bodyCompression,
+ variadicBufferCounts,
+ /*alignBuffers*/ true);
body.getReferenceManager().release();
return arrowRecordBatch;
}
diff --git
a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java
b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java
index 43d49fc7a3..308431fdeb 100644
---
a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java
+++
b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java
@@ -1542,6 +1542,66 @@ public class TestVarCharViewVector {
}
}
+ @Test
+ public void testVectorLoadUnloadInLine() {
+
+ try (final ViewVarCharVector vector1 = new ViewVarCharVector("myvector",
allocator)) {
+
+ setVector(vector1, STR0, STR1, STR4, STR5, STR6);
+
+ assertEquals(4, vector1.getLastSet());
+ vector1.setValueCount(15);
+ assertEquals(14, vector1.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR0, vector1.get(0));
+ assertArrayEquals(STR1, vector1.get(1));
+ assertArrayEquals(STR4, vector1.get(2));
+ assertArrayEquals(STR5, vector1.get(3));
+ assertArrayEquals(STR6, vector1.get(4));
+
+ Field field = vector1.getField();
+ String fieldName = field.getName();
+
+ List<Field> fields = new ArrayList<>();
+ List<FieldVector> fieldVectors = new ArrayList<>();
+
+ fields.add(field);
+ fieldVectors.add(vector1);
+
+ Schema schema = new Schema(fields);
+
+ VectorSchemaRoot schemaRoot1 =
+ new VectorSchemaRoot(schema, fieldVectors, vector1.getValueCount());
+ VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1);
+
+ try (ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
+ BufferAllocator finalVectorsAllocator =
+ allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE);
+ VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema,
finalVectorsAllocator); ) {
+
+ VectorLoader vectorLoader = new VectorLoader(schemaRoot2);
+ vectorLoader.load(recordBatch);
+
+ ViewVarCharVector vector2 = (ViewVarCharVector)
schemaRoot2.getVector(fieldName);
+ /*
+ * lastSet would have internally been set by VectorLoader.load() when
it invokes
+ * loadFieldBuffers.
+ */
+ assertEquals(14, vector2.getLastSet());
+ vector2.setValueCount(25);
+ assertEquals(24, vector2.getLastSet());
+
+ /* Check the vector output */
+ assertArrayEquals(STR0, vector2.get(0));
+ assertArrayEquals(STR1, vector2.get(1));
+ assertArrayEquals(STR4, vector2.get(2));
+ assertArrayEquals(STR5, vector2.get(3));
+ assertArrayEquals(STR6, vector2.get(4));
+ }
+ }
+ }
+
@Test
public void testVectorLoadUnload() {
diff --git
a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
index 21589f4ff4..654940908b 100644
---
a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
+++
b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java
@@ -45,6 +45,7 @@ import org.apache.arrow.vector.LargeVarCharVector;
import org.apache.arrow.vector.SchemaChangeCallBack;
import org.apache.arrow.vector.VarBinaryVector;
import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.ViewVarCharVector;
import org.apache.arrow.vector.complex.ListVector;
import org.apache.arrow.vector.complex.MapVector;
import org.apache.arrow.vector.complex.NonNullableStructVector;
@@ -1705,6 +1706,26 @@ public class TestComplexWriter {
}
}
+ @Test
+ public void structWriterVarCharViewHelpers() {
+ try (NonNullableStructVector parent =
NonNullableStructVector.empty("parent", allocator)) {
+ ComplexWriter writer = new ComplexWriterImpl("root", parent, false,
true);
+ StructWriter rootWriter = writer.rootAsStruct();
+ rootWriter.start();
+ rootWriter.setPosition(0);
+ rootWriter.viewVarChar("c").writeViewVarChar(new Text("row1"));
+ rootWriter.setPosition(1);
+ rootWriter.viewVarChar("c").writeViewVarChar("row2");
+ rootWriter.end();
+
+ ViewVarCharVector vector =
+ parent.getChild("root", StructVector.class).getChild("c",
ViewVarCharVector.class);
+
+ assertEquals("row1", vector.getObject(0).toString());
+ assertEquals("row2", vector.getObject(1).toString());
+ }
+ }
+
@Test
public void structWriterLargeVarCharHelpers() {
try (NonNullableStructVector parent =
NonNullableStructVector.empty("parent", allocator)) {