This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 2b851d9b30 Add List and ListView take benchmarks (#9626)
2b851d9b30 is described below
commit 2b851d9b30ce76b70e68e29391fdef63719e694b
Author: Adam Gutglick <[email protected]>
AuthorDate: Fri Apr 3 12:23:31 2026 +0100
Add List and ListView take benchmarks (#9626)
# Which issue does this PR close?
- Closes https://github.com/apache/arrow-rs/issues/9627.
# Rationale for this change
Adding benchmarks makes it easier to measure performance and evaluate
the impact of changes to the implementation. I also have a PR including
some significant improvements, but figured its worth splitting it into
two parts, LMK if its better to do that in one step.
# What changes are included in this PR?
Add a couple of utility functions to generate list and list_view arrays
without providing a seed
# Are these changes tested?
Benchmarks run locally, same setup as other benchmarks.
# Are there any user-facing changes?
No
---
arrow/benches/take_kernels.rs | 60 +++++++++++++++++++++++++++++++++++
arrow/src/util/bench_util.rs | 74 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 134 insertions(+)
diff --git a/arrow/benches/take_kernels.rs b/arrow/benches/take_kernels.rs
index fb23177168..a10f80c590 100644
--- a/arrow/benches/take_kernels.rs
+++ b/arrow/benches/take_kernels.rs
@@ -186,6 +186,66 @@ fn add_benchmark(c: &mut Criterion) {
b.iter(|| bench_take(&values, &indices))
});
+ let values = create_primitive_list_array::<i32, Int32Type>(512, 0.0, 0.0,
20);
+ let indices = create_random_index(512, 0.0);
+ c.bench_function("take list i32 512", |b| {
+ b.iter(|| bench_take(&values, &indices))
+ });
+
+ let values = create_primitive_list_array::<i32, Int32Type>(1024, 0.0, 0.0,
20);
+ let indices = create_random_index(1024, 0.0);
+ c.bench_function("take list i32 1024", |b| {
+ b.iter(|| bench_take(&values, &indices))
+ });
+
+ let values = create_primitive_list_array::<i32, Int32Type>(1024, 0.5, 0.0,
20);
+ let indices = create_random_index(1024, 0.0);
+ c.bench_function("take list i32 null values 1024", |b| {
+ b.iter(|| bench_take(&values, &indices))
+ });
+
+ let values = create_primitive_list_array::<i32, Int32Type>(1024, 0.0, 0.0,
202);
+ let indices = create_random_index(1024, 0.5);
+ c.bench_function("take list i32 null indices 1024", |b| {
+ b.iter(|| bench_take(&values, &indices))
+ });
+
+ let values = create_primitive_list_array::<i32, Int32Type>(1024, 0.5, 0.5,
20);
+ let indices = create_random_index(1024, 0.5);
+ c.bench_function("take list i32 null values null indices 1024", |b| {
+ b.iter(|| bench_take(&values, &indices))
+ });
+
+ let values = create_primitive_list_view_array::<i32, Int32Type>(512, 0.0,
0.0, 20);
+ let indices = create_random_index(512, 0.0);
+ c.bench_function("take listview i32 512", |b| {
+ b.iter(|| bench_take(&values, &indices))
+ });
+
+ let values = create_primitive_list_view_array::<i32, Int32Type>(1024, 0.0,
0.0, 20);
+ let indices = create_random_index(1024, 0.0);
+ c.bench_function("take listview i32 1024", |b| {
+ b.iter(|| bench_take(&values, &indices))
+ });
+
+ let values = create_primitive_list_view_array::<i32, Int32Type>(1024, 0.5,
0.0, 20);
+ let indices = create_random_index(1024, 0.0);
+ c.bench_function("take listview i32 null values 1024", |b| {
+ b.iter(|| bench_take(&values, &indices))
+ });
+
+ let values = create_primitive_list_view_array::<i32, Int32Type>(1024, 0.0,
0.0, 20);
+ let indices = create_random_index(1024, 0.5);
+ c.bench_function("take listview i32 null indices 1024", |b| {
+ b.iter(|| bench_take(&values, &indices))
+ });
+
+ let values = create_primitive_list_view_array::<i32, Int32Type>(1024, 0.5,
0.5, 20);
+ let indices = create_random_index(1024, 0.5);
+ c.bench_function("take listview i32 null values null indices 1024", |b| {
+ b.iter(|| bench_take(&values, &indices))
+ });
+
let values = create_primitive_run_array::<Int32Type, Int32Type>(1024, 512);
let indices = create_random_index(1024, 0.0);
c.bench_function(
diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs
index fefb9077b1..aba95ba4aa 100644
--- a/arrow/src/util/bench_util.rs
+++ b/arrow/src/util/bench_util.rs
@@ -491,6 +491,80 @@ where
GenericListArray::<O>::from_iter_primitive::<T, _, _>(values)
}
+/// Create a List/LargeList Array of primitive values using a fixed seed
+///
+/// See [`create_primitive_list_array_with_seed`] for details on arguments.
+pub fn create_primitive_list_array<O, T>(
+ size: usize,
+ null_density: f32,
+ list_null_density: f32,
+ max_list_size: usize,
+) -> GenericListArray<O>
+where
+ O: OffsetSizeTrait,
+ T: ArrowPrimitiveType,
+ StandardUniform: Distribution<T::Native>,
+{
+ let mut rng = seedable_rng();
+
+ let values = (0..size).map(|_| {
+ if rng.random::<f32>() < null_density {
+ None
+ } else {
+ let list_size = rng.random_range(0..=max_list_size);
+ let list_values: Vec<Option<T::Native>> = (0..list_size)
+ .map(|_| {
+ if rng.random::<f32>() < list_null_density {
+ None
+ } else {
+ Some(rng.random())
+ }
+ })
+ .collect();
+ Some(list_values)
+ }
+ });
+
+ GenericListArray::<O>::from_iter_primitive::<T, _, _>(values)
+}
+
+/// Create a ListViewArray of primitive values using a fixed seed
+///
+/// See [`create_primitive_list_array_with_seed`] for details on arguments.
+pub fn create_primitive_list_view_array<O, T>(
+ size: usize,
+ null_density: f32,
+ list_null_density: f32,
+ max_list_size: usize,
+) -> GenericListViewArray<O>
+where
+ T: ArrowPrimitiveType,
+ StandardUniform: Distribution<T::Native>,
+ O: OffsetSizeTrait,
+{
+ let mut rng = seedable_rng();
+
+ let values = (0..size).map(|_| {
+ if rng.random::<f32>() < null_density {
+ None
+ } else {
+ let list_size = rng.random_range(0..=max_list_size);
+ let list_values: Vec<Option<T::Native>> = (0..list_size)
+ .map(|_| {
+ if rng.random::<f32>() < list_null_density {
+ None
+ } else {
+ Some(rng.random())
+ }
+ })
+ .collect();
+ Some(list_values)
+ }
+ });
+
+ GenericListViewArray::<O>::from_iter_primitive::<T, _, _>(values)
+}
+
/// Create primitive run array for given logical and physical array lengths
pub fn create_primitive_run_array<R: RunEndIndexType, V: ArrowPrimitiveType>(
logical_array_len: usize,