alamb commented on code in PR #9954:
URL: https://github.com/apache/arrow-rs/pull/9954#discussion_r3229337064


##########
parquet/src/arrow/arrow_writer/levels.rs:
##########
@@ -2442,4 +2491,103 @@ mod tests {
         assert_eq!(sliced.non_null_indices, Vec::<usize>::new());
         assert_eq!(sliced.array.len(), 0);
     }
+
+    #[test]
+    fn test_all_null_struct() {
+        // Struct<Int32> where every struct slot is null.
+        // Schema: a (struct, nullable) -> c (int32, nullable)
+        // Data: [null, null, null, null]
+        //
+        // Expected: max_def=2, def_levels all 0 (struct is null → child never 
reached),
+        // leaf values are empty.
+        let c = Int32Array::from(vec![None::<i32>; 4]);
+        let leaf = Arc::new(c) as ArrayRef;
+        let c_field = Arc::new(Field::new("c", DataType::Int32, true));
+        let a = StructArray::from((vec![(c_field, leaf.clone())], 
Buffer::from([0b00000000])));
+        let a_field = Field::new("a", a.data_type().clone(), true);
+        let a_array = Arc::new(a) as ArrayRef;
+
+        let levels = calculate_array_levels(&a_array, &a_field).unwrap();
+        assert_eq!(levels.len(), 1);
+
+        let expected = ArrayLevels {
+            def_levels: LevelData::Uniform { value: 0, count: 4 },
+            rep_levels: LevelData::Absent,
+            non_null_indices: vec![],
+            max_def_level: 2,
+            max_rep_level: 0,
+            array: leaf,
+            logical_nulls: Some(NullBuffer::new_null(4)),
+        };
+        assert_eq!(&levels[0], &expected);
+    }
+
+    #[test]
+    fn test_all_null_nested_struct() {
+        // Struct<Struct<Int32>> where the outer struct is entirely null.
+        // Schema: a (struct, nullable) -> b (struct, nullable) -> c (int32, 
nullable)
+        // Data: [null, null, null]
+        //
+        // Expected: max_def=3, def_levels all 0.
+        let c = Int32Array::from(vec![None::<i32>; 3]);
+        let leaf = Arc::new(c) as ArrayRef;
+        let c_field = Arc::new(Field::new("c", DataType::Int32, true));
+        let b = StructArray::from((vec![(c_field, leaf.clone())], 
Buffer::from([0b00000000])));
+        let b_field = Arc::new(Field::new("b", b.data_type().clone(), true));
+        let a = StructArray::from((
+            vec![(b_field, Arc::new(b) as ArrayRef)],
+            Buffer::from([0b00000000]),
+        ));
+        let a_field = Field::new("a", a.data_type().clone(), true);
+        let a_array = Arc::new(a) as ArrayRef;
+
+        let levels = calculate_array_levels(&a_array, &a_field).unwrap();
+        assert_eq!(levels.len(), 1);
+
+        let expected = ArrayLevels {
+            def_levels: LevelData::Uniform { value: 0, count: 3 },
+            rep_levels: LevelData::Absent,
+            non_null_indices: vec![],
+            max_def_level: 3,
+            max_rep_level: 0,
+            array: leaf,
+            logical_nulls: Some(NullBuffer::new_null(3)),
+        };
+        assert_eq!(&levels[0], &expected);
+    }
+
+    #[test]
+    fn test_all_null_struct_multiple_children() {

Review Comment:
   these tests cover structs, but you also added fast paths for `write_list` 
and `write_fixed_size_list`. Can we add tests for them too?



##########
parquet/src/arrow/arrow_writer/levels.rs:
##########
@@ -501,6 +512,19 @@ impl LevelInfoBuilder {
         nulls: Option<&NullBuffer>,
         range: Range<usize>,
     ) {
+        // Fast path: entire struct array is null; emit bulk null def/rep 
levels
+        if let Some(nulls) = nulls {
+            if nulls.null_count() == nulls.len() {
+                let len = range.end - range.start;
+                for child in children.iter_mut() {
+                    child.visit_leaves(|info| {
+                        info.extend_uniform_levels(ctx.def_level - 1, 
ctx.rep_level, len);
+                    });
+                }
+                return;
+            }
+        }
+
         let write_null = |children: &mut [LevelInfoBuilder], range: 
Range<usize>| {

Review Comment:
   I wonder if this closure could be used above to avoid duplication (why call 
extend_uniform_levels above and `extend_uniform_levels` in the closure?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to