alamb commented on code in PR #9006:
URL: https://github.com/apache/arrow-rs/pull/9006#discussion_r2628556793
##########
arrow-ipc/src/writer.rs:
##########
@@ -3212,6 +3290,109 @@ mod tests {
roundtrip_ensure_sliced_smaller(in_batch, 1000);
}
+ fn generate_list_view_data<O: OffsetSizeTrait>() ->
GenericListViewArray<O> {
+ let mut builder = GenericListViewBuilder::<O,
_>::new(UInt32Builder::new());
+
+ for i in 0..100_000 {
+ for value in [i, i, i] {
+ builder.values().append_value(value);
+ }
+ builder.append(true);
Review Comment:
Can you please also add at least one null to the list to cover the null
encoding?
##########
arrow-ipc/src/writer.rs:
##########
@@ -3212,6 +3290,109 @@ mod tests {
roundtrip_ensure_sliced_smaller(in_batch, 1000);
}
+ fn generate_list_view_data<O: OffsetSizeTrait>() ->
GenericListViewArray<O> {
+ let mut builder = GenericListViewBuilder::<O,
_>::new(UInt32Builder::new());
+
+ for i in 0..100_000 {
+ for value in [i, i, i] {
+ builder.values().append_value(value);
+ }
+ builder.append(true);
+ }
+
+ builder.finish()
+ }
+
+ #[test]
+ fn encode_list_view_arrays() {
+ let val_inner = Field::new_list_field(DataType::UInt32, true);
+ let val_field = Field::new("val",
DataType::ListView(Arc::new(val_inner)), false);
+ let schema = Arc::new(Schema::new(vec![val_field]));
+
+ let values = Arc::new(generate_list_view_data::<i32>());
+
+ let in_batch = RecordBatch::try_new(schema, vec![values]).unwrap();
+ let out_batch = deserialize_file(serialize_file(&in_batch));
+ assert_eq!(in_batch, out_batch);
+ }
+
+ #[test]
+ fn encode_large_list_view_arrays() {
+ let val_inner = Field::new_list_field(DataType::UInt32, true);
+ let val_field = Field::new("val",
DataType::LargeListView(Arc::new(val_inner)), false);
+ let schema = Arc::new(Schema::new(vec![val_field]));
+
+ let values = Arc::new(generate_list_view_data::<i64>());
+
+ let in_batch = RecordBatch::try_new(schema, vec![values]).unwrap();
+ let out_batch = deserialize_file(serialize_file(&in_batch));
+ assert_eq!(in_batch, out_batch);
+ }
+
+ #[test]
+ fn check_sliced_list_view_array() {
+ let inner = Field::new_list_field(DataType::UInt32, true);
+ let field = Field::new("val", DataType::ListView(Arc::new(inner)),
true);
+ let schema = Arc::new(Schema::new(vec![field]));
+ let values = Arc::new(generate_list_view_data::<i32>());
+
+ for (offset, len) in [(999, 1), (0, 13), (47, 12), (values.len() - 13,
13)] {
+ let in_batch = RecordBatch::try_new(schema.clone(),
vec![values.clone()])
+ .unwrap()
+ .slice(offset, len);
+ let out_batch = deserialize_file(serialize_file(&in_batch));
+ assert_eq!(in_batch, out_batch);
+ }
+ }
+
+ #[test]
+ fn check_sliced_large_list_view_array() {
+ let inner = Field::new_list_field(DataType::UInt32, true);
+ let field = Field::new("val",
DataType::LargeListView(Arc::new(inner)), true);
+ let schema = Arc::new(Schema::new(vec![field]));
+ let values = Arc::new(generate_list_view_data::<i64>());
+
+ for (offset, len) in [(999, 1), (0, 13), (47, 12), (values.len() - 13,
13)] {
+ let in_batch = RecordBatch::try_new(schema.clone(),
vec![values.clone()])
+ .unwrap()
+ .slice(offset, len);
+ let out_batch = deserialize_file(serialize_file(&in_batch));
+ assert_eq!(in_batch, out_batch);
+ }
+ }
+
+ fn generate_nested_list_view_data<O: OffsetSizeTrait>() ->
GenericListViewArray<O> {
Review Comment:
nice!
##########
arrow-ipc/src/reader.rs:
##########
@@ -322,6 +332,32 @@ impl RecordBatchDecoder<'_> {
self.create_array_from_builder(builder)
}
+ fn create_list_view_array(
+ &self,
+ field_node: &FieldNode,
+ data_type: &DataType,
+ buffers: &[Buffer],
+ child_array: ArrayRef,
+ ) -> Result<ArrayRef, ArrowError> {
+ let null_buffer = (field_node.null_count() >
0).then_some(buffers[0].clone());
+ let length = field_node.length() as usize;
+ let child_data = child_array.into_data();
+
+ let mut builder = match data_type {
+ ListView(_) | LargeListView(_) =>
ArrayData::builder(data_type.clone())
+ .len(length)
+ .add_buffer(buffers[1].clone()) // offsets
+ .add_buffer(buffers[2].clone()) // sizes
+ .add_child_data(child_data)
+ .null_bit_buffer(null_buffer),
+ _ => unreachable!("Cannot create listview array from {:?}",
data_type),
Review Comment:
We could potentially return `"internal error"` or something -- however,
since the code can only be called with a `ListView / LargeListView` it is
probably fine.
Looking more at this code, it seems like this is more of an assert --
perhaps the intent would be clearer if it explicitly did something like
```rust
assert!(matches!(data_type, ListView(_)|LargeListView(_));
let mut builder = ArrayData::builder(data_type.clone())
.len(length)
.add_buffer(buffers[1].clone()) // offsets
.add_buffer(buffers[2].clone()) // sizes
..
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]