This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new f88921cb04 Add benchmarks for FromIter (PrimitiveArray and
BooleanArray) (#8525)
f88921cb04 is described below
commit f88921cb04d8dfdf3ed3f0c7d05b5065d124fc6b
Author: Tobias Schwarzinger <[email protected]>
AuthorDate: Thu Oct 2 22:59:16 2025 +0200
Add benchmarks for FromIter (PrimitiveArray and BooleanArray) (#8525)
# Which issue does this PR close?
- Relates to https://github.com/apache/arrow-rs/issues/8505 .
-
I want to see any performance regressions to `BooleanArray::from_iter`.
# Rationale for this change
Add microbenchmarks for observing the performance of
`XYZArray::from_iter`.
On my machine, executing the benchmarks back to back results in
deviations within 1% .
```
Int64Array::from_iter time: [14.292 µs 14.297 µs 14.303 µs]
change: [-0.0049% +0.1290% +0.2631%] (p = 0.06 >
0.05)
No change in performance detected.
Found 26 outliers among 100 measurements (26.00%)
1 (1.00%) low severe
3 (3.00%) low mild
9 (9.00%) high mild
13 (13.00%) high severe
Int64Array::from_trusted_len_iter
time: [6.7355 µs 6.7472 µs 6.7628 µs]
change: [+0.0215% +0.1868% +0.3739%] (p = 0.03 <
0.05)
Change within noise threshold.
Found 11 outliers among 100 measurements (11.00%)
4 (4.00%) high mild
7 (7.00%) high severe
BooleanArray::from_iter time: [7.3389 µs 7.3596 µs 7.3861 µs]
change: [-1.3820% -0.8065% -0.2803%] (p = 0.00 <
0.05)
Change within noise threshold.
Found 16 outliers among 100 measurements (16.00%)
9 (9.00%) high mild
7 (7.00%) high severe
```
# What changes are included in this PR?
Only benchmarks
# Are these changes tested?
Functionality is tested in the implementation file.
# Are there any user-facing changes?
None
---
arrow/Cargo.toml | 2 +-
arrow/benches/{array_from_vec.rs => array_from.rs} | 43 ++++++++++++++++++++--
2 files changed, 41 insertions(+), 4 deletions(-)
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 0be22561a5..c77e85861d 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -120,7 +120,7 @@ harness = false
required-features = ["test_utils"]
[[bench]]
-name = "array_from_vec"
+name = "array_from"
harness = false
[[bench]]
diff --git a/arrow/benches/array_from_vec.rs b/arrow/benches/array_from.rs
similarity index 82%
rename from arrow/benches/array_from_vec.rs
rename to arrow/benches/array_from.rs
index dc1b2d7b74..3af605ef4a 100644
--- a/arrow/benches/array_from_vec.rs
+++ b/arrow/benches/array_from.rs
@@ -25,6 +25,7 @@ extern crate arrow;
use arrow::array::*;
use arrow_buffer::i256;
use rand::Rng;
+use std::iter::repeat_n;
use std::{hint, sync::Arc};
fn array_from_vec(n: usize) {
@@ -117,7 +118,7 @@ fn decimal256_array_from_vec(array: &[Option<i256>]) {
);
}
-fn decimal_benchmark(c: &mut Criterion) {
+fn array_from_vec_decimal_benchmark(c: &mut Criterion) {
// bench decimal32 array
// create option<i32> array
let size: usize = 1 << 15;
@@ -170,7 +171,7 @@ fn decimal_benchmark(c: &mut Criterion) {
});
}
-fn criterion_benchmark(c: &mut Criterion) {
+fn array_from_vec_benchmark(c: &mut Criterion) {
c.bench_function("array_from_vec 128", |b| b.iter(|| array_from_vec(128)));
c.bench_function("array_from_vec 256", |b| b.iter(|| array_from_vec(256)));
c.bench_function("array_from_vec 512", |b| b.iter(|| array_from_vec(512)));
@@ -206,5 +207,41 @@ fn criterion_benchmark(c: &mut Criterion) {
});
}
-criterion_group!(benches, criterion_benchmark, decimal_benchmark);
+fn gen_option_vector<TItem: Copy>(item: TItem, len: usize) ->
Vec<Option<TItem>> {
+ hint::black_box(
+ repeat_n(item, len)
+ .enumerate()
+ .map(|(idx, item)| if idx % 3 == 0 { None } else { Some(item) })
+ .collect(),
+ )
+}
+
+fn from_iter_benchmark(c: &mut Criterion) {
+ const ITER_LEN: usize = 16_384;
+
+ // All ArrowPrimitiveType use the same implementation
+ c.bench_function("Int64Array::from_iter", |b| {
+ let values = gen_option_vector(1, ITER_LEN);
+ b.iter(|| hint::black_box(Int64Array::from_iter(values.iter())));
+ });
+ c.bench_function("Int64Array::from_trusted_len_iter", |b| {
+ let values = gen_option_vector(1, ITER_LEN);
+ b.iter(|| unsafe {
+ // SAFETY: values.iter() is a TrustedLenIterator
+ hint::black_box(Int64Array::from_trusted_len_iter(values.iter()))
+ });
+ });
+
+ c.bench_function("BooleanArray::from_iter", |b| {
+ let values = gen_option_vector(true, ITER_LEN);
+ b.iter(|| hint::black_box(BooleanArray::from_iter(values.iter())));
+ });
+}
+
+criterion_group!(
+ benches,
+ array_from_vec_benchmark,
+ array_from_vec_decimal_benchmark,
+ from_iter_benchmark
+);
criterion_main!(benches);