This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 0c3732fcae Optimize `take` kernel for `BinaryViewArray` and 
`StringViewArray` (#6168)
0c3732fcae is described below

commit 0c3732fcae50ea188f4677fcf0c598f723100db6
Author: Andrew Duffy <[email protected]>
AuthorDate: Fri Aug 2 09:31:06 2024 -0400

    Optimize `take` kernel for `BinaryViewArray` and `StringViewArray` (#6168)
    
    * improve speed of view take kernel
    
    * ArrayData -> new_unchecked
    
    * Update arrow-select/src/take.rs
    
    Co-authored-by: Andrew Lamb <[email protected]>
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 arrow-select/src/take.rs      |  9 ++++-----
 arrow/benches/take_kernels.rs | 36 ++++++++++++++++++++++++++++++++++++
 arrow/src/util/bench_util.rs  | 28 ++++++++++++++++++++++++++++
 3 files changed, 68 insertions(+), 5 deletions(-)

diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs
index d6892eb0a9..b66133ac71 100644
--- a/arrow-select/src/take.rs
+++ b/arrow-select/src/take.rs
@@ -487,11 +487,10 @@ fn take_byte_view<T: ByteViewType, IndexType: 
ArrowPrimitiveType>(
 ) -> Result<GenericByteViewArray<T>, ArrowError> {
     let new_views = take_native(array.views(), indices);
     let new_nulls = take_nulls(array.nulls(), indices);
-    Ok(GenericByteViewArray::new(
-        new_views,
-        array.data_buffers().to_vec(),
-        new_nulls,
-    ))
+    // Safety:  array.views was valid, and take_native copies only valid 
values, and verifies bounds
+    Ok(unsafe {
+        GenericByteViewArray::new_unchecked(new_views, 
array.data_buffers().to_vec(), new_nulls)
+    })
 }
 
 /// `take` implementation for list arrays
diff --git a/arrow/benches/take_kernels.rs b/arrow/benches/take_kernels.rs
index 9c3f1eb409..77ec54c97b 100644
--- a/arrow/benches/take_kernels.rs
+++ b/arrow/benches/take_kernels.rs
@@ -149,6 +149,42 @@ fn add_benchmark(c: &mut Criterion) {
         b.iter(|| bench_take(&values, &indices))
     });
 
+    let values = create_string_view_array(512, 0.0);
+    let indices = create_random_index(512, 0.0);
+    c.bench_function("take stringview 512", |b| {
+        b.iter(|| bench_take(&values, &indices))
+    });
+
+    let values = create_string_view_array(1024, 0.0);
+    let indices = create_random_index(1024, 0.0);
+    c.bench_function("take stringview 1024", |b| {
+        b.iter(|| bench_take(&values, &indices))
+    });
+
+    let values = create_string_view_array(512, 0.0);
+    let indices = create_random_index(512, 0.5);
+    c.bench_function("take stringview null indices 512", |b| {
+        b.iter(|| bench_take(&values, &indices))
+    });
+
+    let values = create_string_view_array(1024, 0.0);
+    let indices = create_random_index(1024, 0.5);
+    c.bench_function("take stringview null indices 1024", |b| {
+        b.iter(|| bench_take(&values, &indices))
+    });
+
+    let values = create_string_view_array(1024, 0.5);
+    let indices = create_random_index(1024, 0.0);
+    c.bench_function("take stringview null values 1024", |b| {
+        b.iter(|| bench_take(&values, &indices))
+    });
+
+    let values = create_string_view_array(1024, 0.5);
+    let indices = create_random_index(1024, 0.5);
+    c.bench_function("take stringview null values null indices 1024", |b| {
+        b.iter(|| bench_take(&values, &indices))
+    });
+
     let values = create_primitive_run_array::<Int32Type, Int32Type>(1024, 512);
     let indices = create_random_index(1024, 0.0);
     c.bench_function(
diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs
index ac7f86d561..2561c925aa 100644
--- a/arrow/src/util/bench_util.rs
+++ b/arrow/src/util/bench_util.rs
@@ -160,6 +160,34 @@ pub fn create_string_array_with_len<Offset: 
OffsetSizeTrait>(
         .collect()
 }
 
+/// Creates a random (but fixed-seeded) string view array of a given size and 
null density.
+///
+/// See `create_string_array` above for more details.
+pub fn create_string_view_array(size: usize, null_density: f32) -> 
StringViewArray {
+    create_string_view_array_with_max_len(size, null_density, 400)
+}
+
+/// Creates a random (but fixed-seeded) array of rand size with a given max 
size, null density and length
+fn create_string_view_array_with_max_len(
+    size: usize,
+    null_density: f32,
+    max_str_len: usize,
+) -> StringViewArray {
+    let rng = &mut seedable_rng();
+    (0..size)
+        .map(|_| {
+            if rng.gen::<f32>() < null_density {
+                None
+            } else {
+                let str_len = rng.gen_range(0..max_str_len);
+                let value = 
rng.sample_iter(&Alphanumeric).take(str_len).collect();
+                let value = String::from_utf8(value).unwrap();
+                Some(value)
+            }
+        })
+        .collect()
+}
+
 /// Creates a random (but fixed-seeded) array of a given size, null density 
and length
 pub fn create_string_view_array_with_len(
     size: usize,

Reply via email to