This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 1b3d1a9f581 feat: implemented with_field() for FixedSizeListBuilder
(#5541)
1b3d1a9f581 is described below
commit 1b3d1a9f581fa650f440a7926f1efc5f2562d98a
Author: Istvan Fodor <[email protected]>
AuthorDate: Tue Apr 9 04:00:38 2024 -0500
feat: implemented with_field() for FixedSizeListBuilder (#5541)
* feat: implemented with_field() for FixedSizeListBuilder
* Switched back to build_unchecked, assertions on array_data
* decresed code repetition
* Fixed null logic
* Added unit tests for empty with field
* Fixed clippy issue
---
arrow-array/src/builder/fixed_size_list_builder.rs | 282 ++++++++++++++++++---
1 file changed, 249 insertions(+), 33 deletions(-)
diff --git a/arrow-array/src/builder/fixed_size_list_builder.rs
b/arrow-array/src/builder/fixed_size_list_builder.rs
index 0fe779d5c1a..3d79a763e9d 100644
--- a/arrow-array/src/builder/fixed_size_list_builder.rs
+++ b/arrow-array/src/builder/fixed_size_list_builder.rs
@@ -19,7 +19,7 @@ use crate::builder::ArrayBuilder;
use crate::{ArrayRef, FixedSizeListArray};
use arrow_buffer::NullBufferBuilder;
use arrow_data::ArrayData;
-use arrow_schema::{DataType, Field};
+use arrow_schema::{DataType, Field, FieldRef};
use std::any::Any;
use std::sync::Arc;
@@ -67,6 +67,7 @@ pub struct FixedSizeListBuilder<T: ArrayBuilder> {
null_buffer_builder: NullBufferBuilder,
values_builder: T,
list_len: i32,
+ field: Option<FieldRef>,
}
impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
@@ -89,6 +90,20 @@ impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
null_buffer_builder: NullBufferBuilder::new(capacity),
values_builder,
list_len: value_length,
+ field: None,
+ }
+ }
+
+ /// Override the field passed to [`ArrayData::builder`]
+ ///
+ /// By default a nullable field is created with the name `item`
+ ///
+ /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the
+ /// field's data type does not match that of `T`
+ pub fn with_field(self, field: impl Into<FieldRef>) -> Self {
+ Self {
+ field: Some(field.into()),
+ ..self
}
}
}
@@ -166,13 +181,40 @@ where
);
let nulls = self.null_buffer_builder.finish();
- let array_data = ArrayData::builder(DataType::FixedSizeList(
- Arc::new(Field::new("item", values_data.data_type().clone(),
true)),
- self.list_len,
- ))
- .len(len)
- .add_child_data(values_data)
- .nulls(nulls);
+
+ let field = match &self.field {
+ Some(f) => {
+ let size = self.value_length();
+ assert_eq!(
+ f.data_type(),
+ values_data.data_type(),
+ "DataType of field ({}) should be the same as the
values_builder DataType ({})",
+ f.data_type(),
+ values_data.data_type()
+ );
+
+ if let Some(a) = values_arr.logical_nulls() {
+ let nulls_valid = f.is_nullable()
+ || nulls
+ .as_ref()
+ .map(|n| n.expand(size as _).contains(&a))
+ .unwrap_or_default();
+
+ assert!(
+ nulls_valid,
+ "Found unmasked nulls for non-nullable
FixedSizeListBuilder field {:?}",
+ f.name()
+ );
+ }
+ f.clone()
+ }
+ None => Arc::new(Field::new("item",
values_data.data_type().clone(), true)),
+ };
+
+ let array_data = ArrayData::builder(DataType::FixedSizeList(field,
self.list_len))
+ .len(len)
+ .add_child_data(values_data)
+ .nulls(nulls);
let array_data = unsafe { array_data.build_unchecked() };
@@ -194,13 +236,39 @@ where
);
let nulls = self.null_buffer_builder.finish_cloned();
- let array_data = ArrayData::builder(DataType::FixedSizeList(
- Arc::new(Field::new("item", values_data.data_type().clone(),
true)),
- self.list_len,
- ))
- .len(len)
- .add_child_data(values_data)
- .nulls(nulls);
+
+ let field = match &self.field {
+ Some(f) => {
+ let size = self.value_length();
+ assert_eq!(
+ f.data_type(),
+ values_data.data_type(),
+ "DataType of field ({}) should be the same as the
values_builder DataType ({})",
+ f.data_type(),
+ values_data.data_type()
+ );
+ if let Some(a) = values_arr.logical_nulls() {
+ let nulls_valid = f.is_nullable()
+ || nulls
+ .as_ref()
+ .map(|n| n.expand(size as _).contains(&a))
+ .unwrap_or_default();
+
+ assert!(
+ nulls_valid,
+ "Found unmasked nulls for non-nullable
FixedSizeListBuilder field {:?}",
+ f.name()
+ );
+ }
+ f.clone()
+ }
+ None => Arc::new(Field::new("item",
values_data.data_type().clone(), true)),
+ };
+
+ let array_data = ArrayData::builder(DataType::FixedSizeList(field,
self.list_len))
+ .len(len)
+ .add_child_data(values_data)
+ .nulls(nulls);
let array_data = unsafe { array_data.build_unchecked() };
@@ -216,28 +284,54 @@ mod tests {
use crate::Array;
use crate::Int32Array;
- #[test]
- fn test_fixed_size_list_array_builder() {
+ fn make_list_builder(
+ include_null_element: bool,
+ include_null_in_values: bool,
+ ) ->
FixedSizeListBuilder<crate::builder::PrimitiveBuilder<crate::types::Int32Type>>
{
let values_builder = Int32Builder::new();
let mut builder = FixedSizeListBuilder::new(values_builder, 3);
- // [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
builder.values().append_value(0);
builder.values().append_value(1);
builder.values().append_value(2);
builder.append(true);
- builder.values().append_null();
- builder.values().append_null();
- builder.values().append_null();
- builder.append(false);
+
+ builder.values().append_value(2);
builder.values().append_value(3);
- builder.values().append_null();
- builder.values().append_value(5);
- builder.append(true);
- builder.values().append_value(6);
- builder.values().append_value(7);
- builder.values().append_null();
+ builder.values().append_value(4);
builder.append(true);
+
+ if include_null_element {
+ builder.values().append_null();
+ builder.values().append_null();
+ builder.values().append_null();
+ builder.append(false);
+ } else {
+ builder.values().append_value(2);
+ builder.values().append_value(3);
+ builder.values().append_value(4);
+ builder.append(true);
+ }
+
+ if include_null_in_values {
+ builder.values().append_value(3);
+ builder.values().append_null();
+ builder.values().append_value(5);
+ builder.append(true);
+ } else {
+ builder.values().append_value(3);
+ builder.values().append_value(4);
+ builder.values().append_value(5);
+ builder.append(true);
+ }
+
+ builder
+ }
+
+ #[test]
+ fn test_fixed_size_list_array_builder() {
+ let mut builder = make_list_builder(true, true);
+
let list_array = builder.finish();
assert_eq!(DataType::Int32, list_array.value_type());
@@ -248,9 +342,48 @@ mod tests {
}
#[test]
- fn test_fixed_size_list_array_builder_finish_cloned() {
+ fn test_fixed_size_list_array_builder_with_field() {
+ let builder = make_list_builder(false, false);
+ let mut builder = builder.with_field(Field::new("list_element",
DataType::Int32, false));
+ let list_array = builder.finish();
+
+ assert_eq!(DataType::Int32, list_array.value_type());
+ assert_eq!(4, list_array.len());
+ assert_eq!(0, list_array.null_count());
+ assert_eq!(6, list_array.value_offset(2));
+ assert_eq!(3, list_array.value_length());
+ }
+
+ #[test]
+ fn test_fixed_size_list_array_builder_with_field_and_null() {
+ let builder = make_list_builder(true, false);
+ let mut builder = builder.with_field(Field::new("list_element",
DataType::Int32, false));
+ let list_array = builder.finish();
+
+ assert_eq!(DataType::Int32, list_array.value_type());
+ assert_eq!(4, list_array.len());
+ assert_eq!(1, list_array.null_count());
+ assert_eq!(6, list_array.value_offset(2));
+ assert_eq!(3, list_array.value_length());
+ }
+
+ #[test]
+ #[should_panic(expected = "Found unmasked nulls for non-nullable
FixedSizeListBuilder field")]
+ fn test_fixed_size_list_array_builder_with_field_null_panic() {
+ let builder = make_list_builder(true, true);
+ let mut builder = builder.with_field(Field::new("list_item",
DataType::Int32, false));
+
+ builder.finish();
+ }
+
+ #[test]
+ #[should_panic(
+ expected = "DataType of field (Int64) should be the same as the
values_builder DataType (Int32)"
+ )]
+ fn test_fixed_size_list_array_builder_with_field_type_panic() {
let values_builder = Int32Builder::new();
- let mut builder = FixedSizeListBuilder::new(values_builder, 3);
+ let builder = FixedSizeListBuilder::new(values_builder, 3);
+ let mut builder = builder.with_field(Field::new("list_item",
DataType::Int64, true));
// [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
builder.values().append_value(0);
@@ -262,13 +395,68 @@ mod tests {
builder.values().append_null();
builder.append(false);
builder.values().append_value(3);
- builder.values().append_null();
+ builder.values().append_value(4);
builder.values().append_value(5);
builder.append(true);
+
+ builder.finish();
+ }
+
+ #[test]
+ fn test_fixed_size_list_array_builder_cloned_with_field() {
+ let builder = make_list_builder(true, true);
+ let builder = builder.with_field(Field::new("list_element",
DataType::Int32, true));
+
+ let list_array = builder.finish_cloned();
+
+ assert_eq!(DataType::Int32, list_array.value_type());
+ assert_eq!(4, list_array.len());
+ assert_eq!(1, list_array.null_count());
+ assert_eq!(6, list_array.value_offset(2));
+ assert_eq!(3, list_array.value_length());
+ }
+
+ #[test]
+ #[should_panic(expected = "Found unmasked nulls for non-nullable
FixedSizeListBuilder field")]
+ fn test_fixed_size_list_array_builder_cloned_with_field_null_panic() {
+ let builder = make_list_builder(true, true);
+ let builder = builder.with_field(Field::new("list_item",
DataType::Int32, false));
+
+ builder.finish_cloned();
+ }
+
+ #[test]
+ fn test_fixed_size_list_array_builder_cloned_with_field_and_null() {
+ let builder = make_list_builder(true, false);
+ let mut builder = builder.with_field(Field::new("list_element",
DataType::Int32, false));
+ let list_array = builder.finish();
+
+ assert_eq!(DataType::Int32, list_array.value_type());
+ assert_eq!(4, list_array.len());
+ assert_eq!(1, list_array.null_count());
+ assert_eq!(6, list_array.value_offset(2));
+ assert_eq!(3, list_array.value_length());
+ }
+
+ #[test]
+ #[should_panic(
+ expected = "DataType of field (Int64) should be the same as the
values_builder DataType (Int32)"
+ )]
+ fn test_fixed_size_list_array_builder_cloned_with_field_type_panic() {
+ let builder = make_list_builder(false, false);
+ let builder = builder.with_field(Field::new("list_item",
DataType::Int64, true));
+
+ builder.finish_cloned();
+ }
+
+ #[test]
+ fn test_fixed_size_list_array_builder_finish_cloned() {
+ let mut builder = make_list_builder(true, true);
+
let mut list_array = builder.finish_cloned();
assert_eq!(DataType::Int32, list_array.value_type());
- assert_eq!(3, list_array.len());
+ assert_eq!(4, list_array.len());
assert_eq!(1, list_array.null_count());
assert_eq!(3, list_array.value_length());
@@ -283,12 +471,40 @@ mod tests {
list_array = builder.finish();
assert_eq!(DataType::Int32, list_array.value_type());
- assert_eq!(5, list_array.len());
+ assert_eq!(6, list_array.len());
assert_eq!(2, list_array.null_count());
assert_eq!(6, list_array.value_offset(2));
assert_eq!(3, list_array.value_length());
}
+ #[test]
+ fn test_fixed_size_list_array_builder_with_field_empty() {
+ let values_builder = Int32Array::builder(0);
+ let mut builder = FixedSizeListBuilder::new(values_builder,
3).with_field(Field::new(
+ "list_item",
+ DataType::Int32,
+ false,
+ ));
+ assert!(builder.is_empty());
+ let arr = builder.finish();
+ assert_eq!(0, arr.len());
+ assert_eq!(0, builder.len());
+ }
+
+ #[test]
+ fn test_fixed_size_list_array_builder_cloned_with_field_empty() {
+ let values_builder = Int32Array::builder(0);
+ let builder = FixedSizeListBuilder::new(values_builder,
3).with_field(Field::new(
+ "list_item",
+ DataType::Int32,
+ false,
+ ));
+ assert!(builder.is_empty());
+ let arr = builder.finish_cloned();
+ assert_eq!(0, arr.len());
+ assert_eq!(0, builder.len());
+ }
+
#[test]
fn test_fixed_size_list_array_builder_empty() {
let values_builder = Int32Array::builder(5);