alamb commented on code in PR #3244:
URL: https://github.com/apache/arrow-rs/pull/3244#discussion_r1038991721


##########
arrow-array/src/array/struct_array.rs:
##########
@@ -568,7 +562,9 @@ mod tests {
     }
 
     #[test]
-    #[should_panic(expected = "non-nullable field cannot have null values")]
+    #[should_panic(
+        expected = "non-nullable child of type Int32 contains nulls not 
present in parent Struct"

Review Comment:
   👍 



##########
arrow-array/src/array/mod.rs:
##########
@@ -916,7 +916,7 @@ mod tests {
     #[test]
     fn test_null_struct() {
         let struct_type =
-            DataType::Struct(vec![Field::new("data", DataType::Int64, true)]);
+            DataType::Struct(vec![Field::new("data", DataType::Int64, false)]);

Review Comment:
   ```suggestion
               // It is possible to create a null struct containing a 
non-nullable child 🤯 
               // see https://github.com/apache/arrow-rs/pull/3244 for details
               DataType::Struct(vec![Field::new("data", DataType::Int64, 
false)]);
   ```
   
   It might be a good idea to leave some rationale for this very non obvious 
corner case



##########
arrow-array/src/array/string_array.rs:
##########
@@ -609,7 +609,7 @@ mod tests {
 
         let offsets = [0, 5, 10].map(|n| O::from_usize(n).unwrap());
         let data_type = GenericListArray::<O>::DATA_TYPE_CONSTRUCTOR(Box::new(
-            Field::new("item", DataType::UInt8, false),
+            Field::new("item", DataType::UInt8, true),

Review Comment:
   ```suggestion
               // It is possible to create a null struct containing a 
non-nullable child 🤯 
               // see https://github.com/apache/arrow-rs/pull/3244 for details
               Field::new("item", DataType::UInt8, true),
   ```



##########
arrow-data/src/data.rs:
##########
@@ -1012,9 +1020,100 @@ impl ArrayData {
                 self.null_count, actual_null_count
             )));
         }
+
+        // In general non-nullable children should not contain nulls, however, 
for certain
+        // types, such as StructArray and FixedSizeList, nulls in the parent 
take up
+        // space in the child. As such we permit nulls in the children in the 
corresponding
+        // positions for such types
+        match &self.data_type {
+            DataType::List(f) | DataType::LargeList(f) | DataType::Map(f, _) 
=> {
+                if !f.is_nullable() {
+                    self.validate_non_nullable(None, 0, &self.child_data[0])?
+                }
+            }
+            DataType::FixedSizeList(field, len) => {
+                let child = &self.child_data[0];
+                if !field.is_nullable() {
+                    match nulls {
+                        Some(nulls) => {
+                            let element_len = *len as usize;
+                            let mut buffer =
+                                MutableBuffer::new_null(element_len * 
self.len);
+
+                            for i in 0..self.len {
+                                if !bit_util::get_bit(nulls.as_ref(), 
self.offset + i) {
+                                    continue;
+                                }
+                                for j in 0..element_len {
+                                    bit_util::set_bit(
+                                        buffer.as_mut(),
+                                        i * element_len + j,
+                                    )
+                                }
+                            }
+                            let mask = buffer.into();
+                            self.validate_non_nullable(Some(&mask), 0, child)?;

Review Comment:
   I think some comments about this mask (what it is and why it is built) might 
be helpful



##########
arrow-data/src/data.rs:
##########
@@ -1012,9 +1020,100 @@ impl ArrayData {
                 self.null_count, actual_null_count
             )));
         }
+
+        // In general non-nullable children should not contain nulls, however, 
for certain
+        // types, such as StructArray and FixedSizeList, nulls in the parent 
take up
+        // space in the child. As such we permit nulls in the children in the 
corresponding
+        // positions for such types
+        match &self.data_type {
+            DataType::List(f) | DataType::LargeList(f) | DataType::Map(f, _) 
=> {
+                if !f.is_nullable() {
+                    self.validate_non_nullable(None, 0, &self.child_data[0])?
+                }
+            }
+            DataType::FixedSizeList(field, len) => {
+                let child = &self.child_data[0];
+                if !field.is_nullable() {
+                    match nulls {
+                        Some(nulls) => {
+                            let element_len = *len as usize;
+                            let mut buffer =
+                                MutableBuffer::new_null(element_len * 
self.len);
+
+                            for i in 0..self.len {
+                                if !bit_util::get_bit(nulls.as_ref(), 
self.offset + i) {
+                                    continue;
+                                }
+                                for j in 0..element_len {
+                                    bit_util::set_bit(
+                                        buffer.as_mut(),
+                                        i * element_len + j,
+                                    )
+                                }
+                            }
+                            let mask = buffer.into();
+                            self.validate_non_nullable(Some(&mask), 0, child)?;
+                        }
+                        None => self.validate_non_nullable(None, 0, child)?,
+                    }
+                }
+            }
+            DataType::Struct(fields) => {
+                for (field, child) in fields.iter().zip(&self.child_data) {
+                    if !field.is_nullable() {
+                        self.validate_non_nullable(nulls, self.offset, child)?
+                    }
+                }
+            }
+            _ => {}
+        }
+
         Ok(())
     }
 
+    /// Verifies that `child` contains no nulls not present in `mask`
+    fn validate_non_nullable(
+        &self,
+        mask: Option<&Buffer>,
+        offset: usize,
+        data: &ArrayData,
+    ) -> Result<(), ArrowError> {
+        let mask = match mask {
+            Some(mask) => mask.as_ref(),
+            None => return match data.null_count {
+                0 => Ok(()),
+                _ => Err(ArrowError::InvalidArgumentError(format!(
+                    "non-nullable child of type {} contains nulls not present 
in parent {}",

Review Comment:
   I suggest making these two messages different so it is easier to find what 
error is being hit by `grep` if someone hits the error



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to