This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 2b851d9b30 Add List and ListView take benchmarks (#9626)
2b851d9b30 is described below

commit 2b851d9b30ce76b70e68e29391fdef63719e694b
Author: Adam Gutglick <[email protected]>
AuthorDate: Fri Apr 3 12:23:31 2026 +0100

    Add List and ListView take benchmarks (#9626)
    
    # Which issue does this PR close?
    
    - Closes https://github.com/apache/arrow-rs/issues/9627.
    
    # Rationale for this change
    
    Adding benchmarks makes it easier to measure performance and evaluate
    the impact of changes to the implementation. I also have a PR including
    some significant improvements, but figured its worth splitting it into
    two parts, LMK if its better to do that in one step.
    
    # What changes are included in this PR?
    
    Add a couple of utility functions to generate list and list_view arrays
    without providing a seed
    
    # Are these changes tested?
    
    Benchmarks run locally, same setup as other benchmarks.
    
    # Are there any user-facing changes?
    
    No
---
 arrow/benches/take_kernels.rs | 60 +++++++++++++++++++++++++++++++++++
 arrow/src/util/bench_util.rs  | 74 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 134 insertions(+)

diff --git a/arrow/benches/take_kernels.rs b/arrow/benches/take_kernels.rs
index fb23177168..a10f80c590 100644
--- a/arrow/benches/take_kernels.rs
+++ b/arrow/benches/take_kernels.rs
@@ -186,6 +186,66 @@ fn add_benchmark(c: &mut Criterion) {
         b.iter(|| bench_take(&values, &indices))
     });
 
+    let values = create_primitive_list_array::<i32, Int32Type>(512, 0.0, 0.0, 
20);
+    let indices = create_random_index(512, 0.0);
+    c.bench_function("take list i32 512", |b| {
+        b.iter(|| bench_take(&values, &indices))
+    });
+
+    let values = create_primitive_list_array::<i32, Int32Type>(1024, 0.0, 0.0, 
20);
+    let indices = create_random_index(1024, 0.0);
+    c.bench_function("take list i32 1024", |b| {
+        b.iter(|| bench_take(&values, &indices))
+    });
+
+    let values = create_primitive_list_array::<i32, Int32Type>(1024, 0.5, 0.0, 
20);
+    let indices = create_random_index(1024, 0.0);
+    c.bench_function("take list i32 null values 1024", |b| {
+        b.iter(|| bench_take(&values, &indices))
+    });
+
+    let values = create_primitive_list_array::<i32, Int32Type>(1024, 0.0, 0.0, 
202);
+    let indices = create_random_index(1024, 0.5);
+    c.bench_function("take list i32 null indices 1024", |b| {
+        b.iter(|| bench_take(&values, &indices))
+    });
+
+    let values = create_primitive_list_array::<i32, Int32Type>(1024, 0.5, 0.5, 
20);
+    let indices = create_random_index(1024, 0.5);
+    c.bench_function("take list i32 null values null indices 1024", |b| {
+        b.iter(|| bench_take(&values, &indices))
+    });
+
+    let values = create_primitive_list_view_array::<i32, Int32Type>(512, 0.0, 
0.0, 20);
+    let indices = create_random_index(512, 0.0);
+    c.bench_function("take listview i32 512", |b| {
+        b.iter(|| bench_take(&values, &indices))
+    });
+
+    let values = create_primitive_list_view_array::<i32, Int32Type>(1024, 0.0, 
0.0, 20);
+    let indices = create_random_index(1024, 0.0);
+    c.bench_function("take listview i32 1024", |b| {
+        b.iter(|| bench_take(&values, &indices))
+    });
+
+    let values = create_primitive_list_view_array::<i32, Int32Type>(1024, 0.5, 
0.0, 20);
+    let indices = create_random_index(1024, 0.0);
+    c.bench_function("take listview i32 null values 1024", |b| {
+        b.iter(|| bench_take(&values, &indices))
+    });
+
+    let values = create_primitive_list_view_array::<i32, Int32Type>(1024, 0.0, 
0.0, 20);
+    let indices = create_random_index(1024, 0.5);
+    c.bench_function("take listview i32 null indices 1024", |b| {
+        b.iter(|| bench_take(&values, &indices))
+    });
+
+    let values = create_primitive_list_view_array::<i32, Int32Type>(1024, 0.5, 
0.5, 20);
+    let indices = create_random_index(1024, 0.5);
+    c.bench_function("take listview i32 null values null indices 1024", |b| {
+        b.iter(|| bench_take(&values, &indices))
+    });
+
     let values = create_primitive_run_array::<Int32Type, Int32Type>(1024, 512);
     let indices = create_random_index(1024, 0.0);
     c.bench_function(
diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs
index fefb9077b1..aba95ba4aa 100644
--- a/arrow/src/util/bench_util.rs
+++ b/arrow/src/util/bench_util.rs
@@ -491,6 +491,80 @@ where
     GenericListArray::<O>::from_iter_primitive::<T, _, _>(values)
 }
 
+/// Create a List/LargeList Array of primitive values using a fixed seed
+///
+/// See [`create_primitive_list_array_with_seed`] for details on arguments.
+pub fn create_primitive_list_array<O, T>(
+    size: usize,
+    null_density: f32,
+    list_null_density: f32,
+    max_list_size: usize,
+) -> GenericListArray<O>
+where
+    O: OffsetSizeTrait,
+    T: ArrowPrimitiveType,
+    StandardUniform: Distribution<T::Native>,
+{
+    let mut rng = seedable_rng();
+
+    let values = (0..size).map(|_| {
+        if rng.random::<f32>() < null_density {
+            None
+        } else {
+            let list_size = rng.random_range(0..=max_list_size);
+            let list_values: Vec<Option<T::Native>> = (0..list_size)
+                .map(|_| {
+                    if rng.random::<f32>() < list_null_density {
+                        None
+                    } else {
+                        Some(rng.random())
+                    }
+                })
+                .collect();
+            Some(list_values)
+        }
+    });
+
+    GenericListArray::<O>::from_iter_primitive::<T, _, _>(values)
+}
+
+/// Create a ListViewArray of primitive values using a fixed seed
+///
+/// See [`create_primitive_list_array_with_seed`] for details on arguments.
+pub fn create_primitive_list_view_array<O, T>(
+    size: usize,
+    null_density: f32,
+    list_null_density: f32,
+    max_list_size: usize,
+) -> GenericListViewArray<O>
+where
+    T: ArrowPrimitiveType,
+    StandardUniform: Distribution<T::Native>,
+    O: OffsetSizeTrait,
+{
+    let mut rng = seedable_rng();
+
+    let values = (0..size).map(|_| {
+        if rng.random::<f32>() < null_density {
+            None
+        } else {
+            let list_size = rng.random_range(0..=max_list_size);
+            let list_values: Vec<Option<T::Native>> = (0..list_size)
+                .map(|_| {
+                    if rng.random::<f32>() < list_null_density {
+                        None
+                    } else {
+                        Some(rng.random())
+                    }
+                })
+                .collect();
+            Some(list_values)
+        }
+    });
+
+    GenericListViewArray::<O>::from_iter_primitive::<T, _, _>(values)
+}
+
 /// Create primitive run array for given logical and physical array lengths
 pub fn create_primitive_run_array<R: RunEndIndexType, V: ArrowPrimitiveType>(
     logical_array_len: usize,

Reply via email to