This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 1b3d1a9f581 feat: implemented with_field() for FixedSizeListBuilder 
(#5541)
1b3d1a9f581 is described below

commit 1b3d1a9f581fa650f440a7926f1efc5f2562d98a
Author: Istvan Fodor <[email protected]>
AuthorDate: Tue Apr 9 04:00:38 2024 -0500

    feat: implemented with_field() for FixedSizeListBuilder (#5541)
    
    * feat: implemented with_field() for FixedSizeListBuilder
    
    * Switched back to build_unchecked, assertions on array_data
    
    * decresed code repetition
    
    * Fixed null logic
    
    * Added unit tests for empty with field
    
    * Fixed clippy issue
---
 arrow-array/src/builder/fixed_size_list_builder.rs | 282 ++++++++++++++++++---
 1 file changed, 249 insertions(+), 33 deletions(-)

diff --git a/arrow-array/src/builder/fixed_size_list_builder.rs 
b/arrow-array/src/builder/fixed_size_list_builder.rs
index 0fe779d5c1a..3d79a763e9d 100644
--- a/arrow-array/src/builder/fixed_size_list_builder.rs
+++ b/arrow-array/src/builder/fixed_size_list_builder.rs
@@ -19,7 +19,7 @@ use crate::builder::ArrayBuilder;
 use crate::{ArrayRef, FixedSizeListArray};
 use arrow_buffer::NullBufferBuilder;
 use arrow_data::ArrayData;
-use arrow_schema::{DataType, Field};
+use arrow_schema::{DataType, Field, FieldRef};
 use std::any::Any;
 use std::sync::Arc;
 
@@ -67,6 +67,7 @@ pub struct FixedSizeListBuilder<T: ArrayBuilder> {
     null_buffer_builder: NullBufferBuilder,
     values_builder: T,
     list_len: i32,
+    field: Option<FieldRef>,
 }
 
 impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
@@ -89,6 +90,20 @@ impl<T: ArrayBuilder> FixedSizeListBuilder<T> {
             null_buffer_builder: NullBufferBuilder::new(capacity),
             values_builder,
             list_len: value_length,
+            field: None,
+        }
+    }
+
+    /// Override the field passed to [`ArrayData::builder`]
+    ///
+    /// By default a nullable field is created with the name `item`
+    ///
+    /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the
+    /// field's data type does not match that of `T`
+    pub fn with_field(self, field: impl Into<FieldRef>) -> Self {
+        Self {
+            field: Some(field.into()),
+            ..self
         }
     }
 }
@@ -166,13 +181,40 @@ where
         );
 
         let nulls = self.null_buffer_builder.finish();
-        let array_data = ArrayData::builder(DataType::FixedSizeList(
-            Arc::new(Field::new("item", values_data.data_type().clone(), 
true)),
-            self.list_len,
-        ))
-        .len(len)
-        .add_child_data(values_data)
-        .nulls(nulls);
+
+        let field = match &self.field {
+            Some(f) => {
+                let size = self.value_length();
+                assert_eq!(
+                    f.data_type(),
+                    values_data.data_type(),
+                    "DataType of field ({}) should be the same as the 
values_builder DataType ({})",
+                    f.data_type(),
+                    values_data.data_type()
+                );
+
+                if let Some(a) = values_arr.logical_nulls() {
+                    let nulls_valid = f.is_nullable()
+                        || nulls
+                            .as_ref()
+                            .map(|n| n.expand(size as _).contains(&a))
+                            .unwrap_or_default();
+
+                    assert!(
+                        nulls_valid,
+                        "Found unmasked nulls for non-nullable 
FixedSizeListBuilder field {:?}",
+                        f.name()
+                    );
+                }
+                f.clone()
+            }
+            None => Arc::new(Field::new("item", 
values_data.data_type().clone(), true)),
+        };
+
+        let array_data = ArrayData::builder(DataType::FixedSizeList(field, 
self.list_len))
+            .len(len)
+            .add_child_data(values_data)
+            .nulls(nulls);
 
         let array_data = unsafe { array_data.build_unchecked() };
 
@@ -194,13 +236,39 @@ where
         );
 
         let nulls = self.null_buffer_builder.finish_cloned();
-        let array_data = ArrayData::builder(DataType::FixedSizeList(
-            Arc::new(Field::new("item", values_data.data_type().clone(), 
true)),
-            self.list_len,
-        ))
-        .len(len)
-        .add_child_data(values_data)
-        .nulls(nulls);
+
+        let field = match &self.field {
+            Some(f) => {
+                let size = self.value_length();
+                assert_eq!(
+                    f.data_type(),
+                    values_data.data_type(),
+                    "DataType of field ({}) should be the same as the 
values_builder DataType ({})",
+                    f.data_type(),
+                    values_data.data_type()
+                );
+                if let Some(a) = values_arr.logical_nulls() {
+                    let nulls_valid = f.is_nullable()
+                        || nulls
+                            .as_ref()
+                            .map(|n| n.expand(size as _).contains(&a))
+                            .unwrap_or_default();
+
+                    assert!(
+                        nulls_valid,
+                        "Found unmasked nulls for non-nullable 
FixedSizeListBuilder field {:?}",
+                        f.name()
+                    );
+                }
+                f.clone()
+            }
+            None => Arc::new(Field::new("item", 
values_data.data_type().clone(), true)),
+        };
+
+        let array_data = ArrayData::builder(DataType::FixedSizeList(field, 
self.list_len))
+            .len(len)
+            .add_child_data(values_data)
+            .nulls(nulls);
 
         let array_data = unsafe { array_data.build_unchecked() };
 
@@ -216,28 +284,54 @@ mod tests {
     use crate::Array;
     use crate::Int32Array;
 
-    #[test]
-    fn test_fixed_size_list_array_builder() {
+    fn make_list_builder(
+        include_null_element: bool,
+        include_null_in_values: bool,
+    ) -> 
FixedSizeListBuilder<crate::builder::PrimitiveBuilder<crate::types::Int32Type>> 
{
         let values_builder = Int32Builder::new();
         let mut builder = FixedSizeListBuilder::new(values_builder, 3);
 
-        //  [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
         builder.values().append_value(0);
         builder.values().append_value(1);
         builder.values().append_value(2);
         builder.append(true);
-        builder.values().append_null();
-        builder.values().append_null();
-        builder.values().append_null();
-        builder.append(false);
+
+        builder.values().append_value(2);
         builder.values().append_value(3);
-        builder.values().append_null();
-        builder.values().append_value(5);
-        builder.append(true);
-        builder.values().append_value(6);
-        builder.values().append_value(7);
-        builder.values().append_null();
+        builder.values().append_value(4);
         builder.append(true);
+
+        if include_null_element {
+            builder.values().append_null();
+            builder.values().append_null();
+            builder.values().append_null();
+            builder.append(false);
+        } else {
+            builder.values().append_value(2);
+            builder.values().append_value(3);
+            builder.values().append_value(4);
+            builder.append(true);
+        }
+
+        if include_null_in_values {
+            builder.values().append_value(3);
+            builder.values().append_null();
+            builder.values().append_value(5);
+            builder.append(true);
+        } else {
+            builder.values().append_value(3);
+            builder.values().append_value(4);
+            builder.values().append_value(5);
+            builder.append(true);
+        }
+
+        builder
+    }
+
+    #[test]
+    fn test_fixed_size_list_array_builder() {
+        let mut builder = make_list_builder(true, true);
+
         let list_array = builder.finish();
 
         assert_eq!(DataType::Int32, list_array.value_type());
@@ -248,9 +342,48 @@ mod tests {
     }
 
     #[test]
-    fn test_fixed_size_list_array_builder_finish_cloned() {
+    fn test_fixed_size_list_array_builder_with_field() {
+        let builder = make_list_builder(false, false);
+        let mut builder = builder.with_field(Field::new("list_element", 
DataType::Int32, false));
+        let list_array = builder.finish();
+
+        assert_eq!(DataType::Int32, list_array.value_type());
+        assert_eq!(4, list_array.len());
+        assert_eq!(0, list_array.null_count());
+        assert_eq!(6, list_array.value_offset(2));
+        assert_eq!(3, list_array.value_length());
+    }
+
+    #[test]
+    fn test_fixed_size_list_array_builder_with_field_and_null() {
+        let builder = make_list_builder(true, false);
+        let mut builder = builder.with_field(Field::new("list_element", 
DataType::Int32, false));
+        let list_array = builder.finish();
+
+        assert_eq!(DataType::Int32, list_array.value_type());
+        assert_eq!(4, list_array.len());
+        assert_eq!(1, list_array.null_count());
+        assert_eq!(6, list_array.value_offset(2));
+        assert_eq!(3, list_array.value_length());
+    }
+
+    #[test]
+    #[should_panic(expected = "Found unmasked nulls for non-nullable 
FixedSizeListBuilder field")]
+    fn test_fixed_size_list_array_builder_with_field_null_panic() {
+        let builder = make_list_builder(true, true);
+        let mut builder = builder.with_field(Field::new("list_item", 
DataType::Int32, false));
+
+        builder.finish();
+    }
+
+    #[test]
+    #[should_panic(
+        expected = "DataType of field (Int64) should be the same as the 
values_builder DataType (Int32)"
+    )]
+    fn test_fixed_size_list_array_builder_with_field_type_panic() {
         let values_builder = Int32Builder::new();
-        let mut builder = FixedSizeListBuilder::new(values_builder, 3);
+        let builder = FixedSizeListBuilder::new(values_builder, 3);
+        let mut builder = builder.with_field(Field::new("list_item", 
DataType::Int64, true));
 
         //  [[0, 1, 2], null, [3, null, 5], [6, 7, null]]
         builder.values().append_value(0);
@@ -262,13 +395,68 @@ mod tests {
         builder.values().append_null();
         builder.append(false);
         builder.values().append_value(3);
-        builder.values().append_null();
+        builder.values().append_value(4);
         builder.values().append_value(5);
         builder.append(true);
+
+        builder.finish();
+    }
+
+    #[test]
+    fn test_fixed_size_list_array_builder_cloned_with_field() {
+        let builder = make_list_builder(true, true);
+        let builder = builder.with_field(Field::new("list_element", 
DataType::Int32, true));
+
+        let list_array = builder.finish_cloned();
+
+        assert_eq!(DataType::Int32, list_array.value_type());
+        assert_eq!(4, list_array.len());
+        assert_eq!(1, list_array.null_count());
+        assert_eq!(6, list_array.value_offset(2));
+        assert_eq!(3, list_array.value_length());
+    }
+
+    #[test]
+    #[should_panic(expected = "Found unmasked nulls for non-nullable 
FixedSizeListBuilder field")]
+    fn test_fixed_size_list_array_builder_cloned_with_field_null_panic() {
+        let builder = make_list_builder(true, true);
+        let builder = builder.with_field(Field::new("list_item", 
DataType::Int32, false));
+
+        builder.finish_cloned();
+    }
+
+    #[test]
+    fn test_fixed_size_list_array_builder_cloned_with_field_and_null() {
+        let builder = make_list_builder(true, false);
+        let mut builder = builder.with_field(Field::new("list_element", 
DataType::Int32, false));
+        let list_array = builder.finish();
+
+        assert_eq!(DataType::Int32, list_array.value_type());
+        assert_eq!(4, list_array.len());
+        assert_eq!(1, list_array.null_count());
+        assert_eq!(6, list_array.value_offset(2));
+        assert_eq!(3, list_array.value_length());
+    }
+
+    #[test]
+    #[should_panic(
+        expected = "DataType of field (Int64) should be the same as the 
values_builder DataType (Int32)"
+    )]
+    fn test_fixed_size_list_array_builder_cloned_with_field_type_panic() {
+        let builder = make_list_builder(false, false);
+        let builder = builder.with_field(Field::new("list_item", 
DataType::Int64, true));
+
+        builder.finish_cloned();
+    }
+
+    #[test]
+    fn test_fixed_size_list_array_builder_finish_cloned() {
+        let mut builder = make_list_builder(true, true);
+
         let mut list_array = builder.finish_cloned();
 
         assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(3, list_array.len());
+        assert_eq!(4, list_array.len());
         assert_eq!(1, list_array.null_count());
         assert_eq!(3, list_array.value_length());
 
@@ -283,12 +471,40 @@ mod tests {
         list_array = builder.finish();
 
         assert_eq!(DataType::Int32, list_array.value_type());
-        assert_eq!(5, list_array.len());
+        assert_eq!(6, list_array.len());
         assert_eq!(2, list_array.null_count());
         assert_eq!(6, list_array.value_offset(2));
         assert_eq!(3, list_array.value_length());
     }
 
+    #[test]
+    fn test_fixed_size_list_array_builder_with_field_empty() {
+        let values_builder = Int32Array::builder(0);
+        let mut builder = FixedSizeListBuilder::new(values_builder, 
3).with_field(Field::new(
+            "list_item",
+            DataType::Int32,
+            false,
+        ));
+        assert!(builder.is_empty());
+        let arr = builder.finish();
+        assert_eq!(0, arr.len());
+        assert_eq!(0, builder.len());
+    }
+
+    #[test]
+    fn test_fixed_size_list_array_builder_cloned_with_field_empty() {
+        let values_builder = Int32Array::builder(0);
+        let builder = FixedSizeListBuilder::new(values_builder, 
3).with_field(Field::new(
+            "list_item",
+            DataType::Int32,
+            false,
+        ));
+        assert!(builder.is_empty());
+        let arr = builder.finish_cloned();
+        assert_eq!(0, arr.len());
+        assert_eq!(0, builder.len());
+    }
+
     #[test]
     fn test_fixed_size_list_array_builder_empty() {
         let values_builder = Int32Array::builder(5);

Reply via email to