(arrow-rs) branch main updated: Add benchmarks for FromIter (PrimitiveArray and BooleanArray) (#8525)

alamb Thu, 02 Oct 2025 13:59:41 -0700

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git



The following commit(s) were added to refs/heads/main by this push:
     new f88921cb04 Add benchmarks for FromIter (PrimitiveArray and 
BooleanArray) (#8525)
f88921cb04 is described below

commit f88921cb04d8dfdf3ed3f0c7d05b5065d124fc6b
Author: Tobias Schwarzinger <[email protected]>
AuthorDate: Thu Oct 2 22:59:16 2025 +0200

    Add benchmarks for FromIter (PrimitiveArray and BooleanArray) (#8525)
    
    # Which issue does this PR close?
    
    - Relates to https://github.com/apache/arrow-rs/issues/8505 .
    -
    I want to see any performance regressions to `BooleanArray::from_iter`.
    
    # Rationale for this change
    
    Add microbenchmarks for observing the performance of
    `XYZArray::from_iter`.
    
    On my machine, executing the benchmarks back to back results in
    deviations within 1% .
    
    ```
    Int64Array::from_iter   time:   [14.292 µs 14.297 µs 14.303 µs]
                            change: [-0.0049% +0.1290% +0.2631%] (p = 0.06 > 
0.05)
                            No change in performance detected.
    Found 26 outliers among 100 measurements (26.00%)
      1 (1.00%) low severe
      3 (3.00%) low mild
      9 (9.00%) high mild
      13 (13.00%) high severe
    
    Int64Array::from_trusted_len_iter
                            time:   [6.7355 µs 6.7472 µs 6.7628 µs]
                            change: [+0.0215% +0.1868% +0.3739%] (p = 0.03 < 
0.05)
                            Change within noise threshold.
    Found 11 outliers among 100 measurements (11.00%)
      4 (4.00%) high mild
      7 (7.00%) high severe
    
    BooleanArray::from_iter time:   [7.3389 µs 7.3596 µs 7.3861 µs]
                            change: [-1.3820% -0.8065% -0.2803%] (p = 0.00 < 
0.05)
                            Change within noise threshold.
    Found 16 outliers among 100 measurements (16.00%)
      9 (9.00%) high mild
      7 (7.00%) high severe
    ```
    
    # What changes are included in this PR?
    
    Only benchmarks
    
    # Are these changes tested?
    
    Functionality is tested in the implementation file.
    
    # Are there any user-facing changes?
    
    None
---
 arrow/Cargo.toml                                   |  2 +-
 arrow/benches/{array_from_vec.rs => array_from.rs} | 43 ++++++++++++++++++++--
 2 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 0be22561a5..c77e85861d 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -120,7 +120,7 @@ harness = false
 required-features = ["test_utils"]
 
 [[bench]]
-name = "array_from_vec"
+name = "array_from"
 harness = false
 
 [[bench]]
diff --git a/arrow/benches/array_from_vec.rs b/arrow/benches/array_from.rs
similarity index 82%
rename from arrow/benches/array_from_vec.rs
rename to arrow/benches/array_from.rs
index dc1b2d7b74..3af605ef4a 100644
--- a/arrow/benches/array_from_vec.rs
+++ b/arrow/benches/array_from.rs
@@ -25,6 +25,7 @@ extern crate arrow;
 use arrow::array::*;
 use arrow_buffer::i256;
 use rand::Rng;
+use std::iter::repeat_n;
 use std::{hint, sync::Arc};
 
 fn array_from_vec(n: usize) {
@@ -117,7 +118,7 @@ fn decimal256_array_from_vec(array: &[Option<i256>]) {
     );
 }
 
-fn decimal_benchmark(c: &mut Criterion) {
+fn array_from_vec_decimal_benchmark(c: &mut Criterion) {
     // bench decimal32 array
     // create option<i32> array
     let size: usize = 1 << 15;
@@ -170,7 +171,7 @@ fn decimal_benchmark(c: &mut Criterion) {
     });
 }
 
-fn criterion_benchmark(c: &mut Criterion) {
+fn array_from_vec_benchmark(c: &mut Criterion) {
     c.bench_function("array_from_vec 128", |b| b.iter(|| array_from_vec(128)));
     c.bench_function("array_from_vec 256", |b| b.iter(|| array_from_vec(256)));
     c.bench_function("array_from_vec 512", |b| b.iter(|| array_from_vec(512)));
@@ -206,5 +207,41 @@ fn criterion_benchmark(c: &mut Criterion) {
     });
 }
 
-criterion_group!(benches, criterion_benchmark, decimal_benchmark);
+fn gen_option_vector<TItem: Copy>(item: TItem, len: usize) -> 
Vec<Option<TItem>> {
+    hint::black_box(
+        repeat_n(item, len)
+            .enumerate()
+            .map(|(idx, item)| if idx % 3 == 0 { None } else { Some(item) })
+            .collect(),
+    )
+}
+
+fn from_iter_benchmark(c: &mut Criterion) {
+    const ITER_LEN: usize = 16_384;
+
+    // All ArrowPrimitiveType use the same implementation
+    c.bench_function("Int64Array::from_iter", |b| {
+        let values = gen_option_vector(1, ITER_LEN);
+        b.iter(|| hint::black_box(Int64Array::from_iter(values.iter())));
+    });
+    c.bench_function("Int64Array::from_trusted_len_iter", |b| {
+        let values = gen_option_vector(1, ITER_LEN);
+        b.iter(|| unsafe {
+            // SAFETY: values.iter() is a TrustedLenIterator
+            hint::black_box(Int64Array::from_trusted_len_iter(values.iter()))
+        });
+    });
+
+    c.bench_function("BooleanArray::from_iter", |b| {
+        let values = gen_option_vector(true, ITER_LEN);
+        b.iter(|| hint::black_box(BooleanArray::from_iter(values.iter())));
+    });
+}
+
+criterion_group!(
+    benches,
+    array_from_vec_benchmark,
+    array_from_vec_decimal_benchmark,
+    from_iter_benchmark
+);
 criterion_main!(benches);

(arrow-rs) branch main updated: Add benchmarks for FromIter (PrimitiveArray and BooleanArray) (#8525)

Reply via email to