This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new adf9308158 feat(parquet): add struct-column writer benchmarks (#9679)
adf9308158 is described below
commit adf930815885d25ec405f4735428685a01984b3d
Author: Hippolyte Barraud <[email protected]>
AuthorDate: Thu Apr 9 08:25:22 2026 -0400
feat(parquet): add struct-column writer benchmarks (#9679)
# Which issue does this PR close?
- None, but relates to #9653
# Rationale for this change
#9653 introduces optimizations related to non-null uniform workloads.
This adds benchmarks so we can quantify them.
# What changes are included in this PR?
Add three new benchmark cases to the arrow_writer benchmark suite for
evaluating write performance on struct columns at varying null
densities:
* `struct_non_null`: a nullable struct with 0% null rows and
non-nullable primitive children;
* `struct_sparse_99pct_null`: a nullable struct with 99% null rows,
exercising null batching through one level of struct nesting;
* `struct_all_null`: a nullable struct with 100% null rows, exercising
the uniform-null path through struct nesting.
Baseline results (Apple M1 Max):
```
struct_non_null/default 29.9 ms
struct_non_null/parquet_2 38.2 ms
struct_non_null/zstd_parquet_2 50.9 ms
struct_sparse_99pct_null/default 7.2 ms
struct_sparse_99pct_null/parquet_2 7.3 ms
struct_sparse_99pct_null/zstd_p2 8.1 ms
struct_all_null/default 83.3 µs
struct_all_null/parquet_2 82.5 µs
struct_all_null/zstd_parquet_2 106.6 µs
```
# Are these changes tested?
N/A
# Are there any user-facing changes?
None
Signed-off-by: Hippolyte Barraud <[email protected]>
---
parquet/benches/arrow_writer.rs | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
diff --git a/parquet/benches/arrow_writer.rs b/parquet/benches/arrow_writer.rs
index 909d419825..6b48afbf3d 100644
--- a/parquet/benches/arrow_writer.rs
+++ b/parquet/benches/arrow_writer.rs
@@ -266,6 +266,25 @@ fn create_list_primitive_bench_batch_non_null(
)?)
}
+fn create_struct_bench_batch(size: usize, null_density: f32) ->
Result<RecordBatch> {
+ let fields = vec![Field::new(
+ "_1",
+ DataType::Struct(Fields::from(vec![
+ Field::new("_1", DataType::Int32, false),
+ Field::new("_2", DataType::Int64, false),
+ Field::new("_3", DataType::Float32, false),
+ ])),
+ true,
+ )];
+ let schema = Schema::new(fields);
+ Ok(create_random_batch(
+ Arc::new(schema),
+ size,
+ null_density,
+ 0.75,
+ )?)
+}
+
fn _create_nested_bench_batch(
size: usize,
null_density: f32,
@@ -400,6 +419,15 @@ fn create_batches() -> Vec<(&'static str, RecordBatch)> {
let batch = create_primitive_bench_batch(BATCH_SIZE, 1.0, 0.75).unwrap();
batches.push(("primitive_all_null", batch));
+ let batch = create_struct_bench_batch(BATCH_SIZE, 0.0).unwrap();
+ batches.push(("struct_non_null", batch));
+
+ let batch = create_struct_bench_batch(BATCH_SIZE, 0.99).unwrap();
+ batches.push(("struct_sparse_99pct_null", batch));
+
+ let batch = create_struct_bench_batch(BATCH_SIZE, 1.0).unwrap();
+ batches.push(("struct_all_null", batch));
+
batches
}