This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new adf9308158 feat(parquet): add struct-column writer benchmarks (#9679)
adf9308158 is described below

commit adf930815885d25ec405f4735428685a01984b3d
Author: Hippolyte Barraud <[email protected]>
AuthorDate: Thu Apr 9 08:25:22 2026 -0400

    feat(parquet): add struct-column writer benchmarks (#9679)
    
    # Which issue does this PR close?
    
    - None, but relates to #9653
    
    # Rationale for this change
    
    #9653 introduces optimizations related to non-null uniform workloads.
    This adds benchmarks so we can quantify them.
    
    # What changes are included in this PR?
    
    Add three new benchmark cases to the arrow_writer benchmark suite for
    evaluating write performance on struct columns at varying null
    densities:
    
    * `struct_non_null`: a nullable struct with 0% null rows and
    non-nullable primitive children;
    * `struct_sparse_99pct_null`: a nullable struct with 99% null rows,
    exercising null batching through one level of struct nesting;
    * `struct_all_null`: a nullable struct with 100% null rows, exercising
    the uniform-null path through struct nesting.
    
    Baseline results (Apple M1 Max):
    ```
      struct_non_null/default              29.9 ms
      struct_non_null/parquet_2            38.2 ms
      struct_non_null/zstd_parquet_2       50.9 ms
      struct_sparse_99pct_null/default      7.2 ms
      struct_sparse_99pct_null/parquet_2    7.3 ms
      struct_sparse_99pct_null/zstd_p2      8.1 ms
      struct_all_null/default              83.3 µs
      struct_all_null/parquet_2            82.5 µs
      struct_all_null/zstd_parquet_2      106.6 µs
    ```
    
    # Are these changes tested?
    
    N/A
    
    # Are there any user-facing changes?
    
    None
    
    Signed-off-by: Hippolyte Barraud <[email protected]>
---
 parquet/benches/arrow_writer.rs | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/parquet/benches/arrow_writer.rs b/parquet/benches/arrow_writer.rs
index 909d419825..6b48afbf3d 100644
--- a/parquet/benches/arrow_writer.rs
+++ b/parquet/benches/arrow_writer.rs
@@ -266,6 +266,25 @@ fn create_list_primitive_bench_batch_non_null(
     )?)
 }
 
+fn create_struct_bench_batch(size: usize, null_density: f32) -> 
Result<RecordBatch> {
+    let fields = vec![Field::new(
+        "_1",
+        DataType::Struct(Fields::from(vec![
+            Field::new("_1", DataType::Int32, false),
+            Field::new("_2", DataType::Int64, false),
+            Field::new("_3", DataType::Float32, false),
+        ])),
+        true,
+    )];
+    let schema = Schema::new(fields);
+    Ok(create_random_batch(
+        Arc::new(schema),
+        size,
+        null_density,
+        0.75,
+    )?)
+}
+
 fn _create_nested_bench_batch(
     size: usize,
     null_density: f32,
@@ -400,6 +419,15 @@ fn create_batches() -> Vec<(&'static str, RecordBatch)> {
     let batch = create_primitive_bench_batch(BATCH_SIZE, 1.0, 0.75).unwrap();
     batches.push(("primitive_all_null", batch));
 
+    let batch = create_struct_bench_batch(BATCH_SIZE, 0.0).unwrap();
+    batches.push(("struct_non_null", batch));
+
+    let batch = create_struct_bench_batch(BATCH_SIZE, 0.99).unwrap();
+    batches.push(("struct_sparse_99pct_null", batch));
+
+    let batch = create_struct_bench_batch(BATCH_SIZE, 1.0).unwrap();
+    batches.push(("struct_all_null", batch));
+
     batches
 }
 

Reply via email to