This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new c3e0684179 bench(parquet): add nested list writer benchmarks (#10084)
c3e0684179 is described below

commit c3e0684179d2e3059a3bd99ea13cc7ccb0411f46
Author: mwish <[email protected]>
AuthorDate: Wed Jun 10 19:30:45 2026 +0800

    bench(parquet): add nested list writer benchmarks (#10084)
    
    # Which issue does this PR close?
    
    - Closes #10083 .
    
    # Rationale for this change
    
    Add benchmarks for list types with nested repetition levels:
    - `list_nested`: List<List<Int32>>
    - `list_struct_with_list`: List<Struct<a:Int32, b:Float32,
    c:List<Int32>>>
    
    These exercise the per-slot (non-batched) write path where
    child_has_no_nested_rep() returns false, providing a baseline for future
    optimizations.
    
    # What changes are included in this PR?
    
    Add some benchmarks
    
    # Are these changes tested?
    
    They're already tests
    
    # Are there any user-facing changes?
    
    No
    
    Co-authored-by: Claude Opus 4 <[email protected]>
---
 parquet/benches/arrow_writer.rs | 54 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/parquet/benches/arrow_writer.rs b/parquet/benches/arrow_writer.rs
index 80d3e7144b..6b09fd4a2f 100644
--- a/parquet/benches/arrow_writer.rs
+++ b/parquet/benches/arrow_writer.rs
@@ -332,6 +332,54 @@ fn create_struct_bench_batch(size: usize, null_density: 
f32) -> Result<RecordBat
     )?)
 }
 
+fn create_nested_list_bench_batch(size: usize, null_density: f32) -> 
Result<RecordBatch> {
+    // List<List<Int32>> — exercises the nested repetition (non-batched) path
+    let fields = vec![Field::new(
+        "_1",
+        DataType::List(Arc::new(Field::new_list_field(
+            DataType::List(Arc::new(Field::new_list_field(DataType::Int32, 
true))),
+            true,
+        ))),
+        true,
+    )];
+    let schema = Schema::new(fields);
+    Ok(create_random_batch(
+        Arc::new(schema),
+        size,
+        null_density,
+        0.75,
+    )?)
+}
+
+fn create_list_struct_with_list_batch(size: usize, null_density: f32) -> 
Result<RecordBatch> {
+    // List<Struct<a:Int32, b:Float32, c:List<Int32>>>
+    // The struct child contains a nested list, so child_has_no_nested_rep() = 
false.
+    // This exercises the per-slot (non-batched) write path in level 
computation.
+    let fields = vec![Field::new(
+        "_1",
+        DataType::List(Arc::new(Field::new_list_field(
+            DataType::Struct(Fields::from(vec![
+                Field::new("a", DataType::Int32, true),
+                Field::new("b", DataType::Float32, true),
+                Field::new(
+                    "c",
+                    
DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
+                    true,
+                ),
+            ])),
+            true,
+        ))),
+        true,
+    )];
+    let schema = Schema::new(fields);
+    Ok(create_random_batch(
+        Arc::new(schema),
+        size,
+        null_density,
+        0.75,
+    )?)
+}
+
 fn _create_nested_bench_batch(
     size: usize,
     null_density: f32,
@@ -491,6 +539,12 @@ fn create_batches() -> Vec<(&'static str, RecordBatch)> {
     let batch = create_struct_bench_batch(BATCH_SIZE, 1.0).unwrap();
     batches.push(("struct_all_null", batch));
 
+    let batch = create_nested_list_bench_batch(BATCH_SIZE, 0.25).unwrap();
+    batches.push(("list_nested", batch));
+
+    let batch = create_list_struct_with_list_batch(BATCH_SIZE, 0.25).unwrap();
+    batches.push(("list_struct_with_list", batch));
+
     batches
 }
 

Reply via email to