This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new c3e0684179 bench(parquet): add nested list writer benchmarks (#10084)
c3e0684179 is described below
commit c3e0684179d2e3059a3bd99ea13cc7ccb0411f46
Author: mwish <[email protected]>
AuthorDate: Wed Jun 10 19:30:45 2026 +0800
bench(parquet): add nested list writer benchmarks (#10084)
# Which issue does this PR close?
- Closes #10083 .
# Rationale for this change
Add benchmarks for list types with nested repetition levels:
- `list_nested`: List<List<Int32>>
- `list_struct_with_list`: List<Struct<a:Int32, b:Float32,
c:List<Int32>>>
These exercise the per-slot (non-batched) write path where
child_has_no_nested_rep() returns false, providing a baseline for future
optimizations.
# What changes are included in this PR?
Add some benchmarks
# Are these changes tested?
They're already tests
# Are there any user-facing changes?
No
Co-authored-by: Claude Opus 4 <[email protected]>
---
parquet/benches/arrow_writer.rs | 54 +++++++++++++++++++++++++++++++++++++++++
1 file changed, 54 insertions(+)
diff --git a/parquet/benches/arrow_writer.rs b/parquet/benches/arrow_writer.rs
index 80d3e7144b..6b09fd4a2f 100644
--- a/parquet/benches/arrow_writer.rs
+++ b/parquet/benches/arrow_writer.rs
@@ -332,6 +332,54 @@ fn create_struct_bench_batch(size: usize, null_density:
f32) -> Result<RecordBat
)?)
}
+fn create_nested_list_bench_batch(size: usize, null_density: f32) ->
Result<RecordBatch> {
+ // List<List<Int32>> — exercises the nested repetition (non-batched) path
+ let fields = vec![Field::new(
+ "_1",
+ DataType::List(Arc::new(Field::new_list_field(
+ DataType::List(Arc::new(Field::new_list_field(DataType::Int32,
true))),
+ true,
+ ))),
+ true,
+ )];
+ let schema = Schema::new(fields);
+ Ok(create_random_batch(
+ Arc::new(schema),
+ size,
+ null_density,
+ 0.75,
+ )?)
+}
+
+fn create_list_struct_with_list_batch(size: usize, null_density: f32) ->
Result<RecordBatch> {
+ // List<Struct<a:Int32, b:Float32, c:List<Int32>>>
+ // The struct child contains a nested list, so child_has_no_nested_rep() =
false.
+ // This exercises the per-slot (non-batched) write path in level
computation.
+ let fields = vec![Field::new(
+ "_1",
+ DataType::List(Arc::new(Field::new_list_field(
+ DataType::Struct(Fields::from(vec![
+ Field::new("a", DataType::Int32, true),
+ Field::new("b", DataType::Float32, true),
+ Field::new(
+ "c",
+
DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
+ true,
+ ),
+ ])),
+ true,
+ ))),
+ true,
+ )];
+ let schema = Schema::new(fields);
+ Ok(create_random_batch(
+ Arc::new(schema),
+ size,
+ null_density,
+ 0.75,
+ )?)
+}
+
fn _create_nested_bench_batch(
size: usize,
null_density: f32,
@@ -491,6 +539,12 @@ fn create_batches() -> Vec<(&'static str, RecordBatch)> {
let batch = create_struct_bench_batch(BATCH_SIZE, 1.0).unwrap();
batches.push(("struct_all_null", batch));
+ let batch = create_nested_list_bench_batch(BATCH_SIZE, 0.25).unwrap();
+ batches.push(("list_nested", batch));
+
+ let batch = create_list_struct_with_list_batch(BATCH_SIZE, 0.25).unwrap();
+ batches.push(("list_struct_with_list", batch));
+
batches
}