This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new c83b9d7223 feat: add `array_slice` benchmark (#18879)
c83b9d7223 is described below

commit c83b9d7223438a72f7dc380779c08c86f9d1bd51
Author: Khanh Duong <[email protected]>
AuthorDate: Tue Nov 25 23:08:03 2025 +0900

    feat: add `array_slice` benchmark (#18879)
    
    ## Which issue does this PR close?
    
    - Part of #18458.
    
    ## Rationale for this change
    
    - Add bench mark for `array_slice`
    
    ## What changes are included in this PR?
    
    Bench marks `array_slice` with:
    -  `ListArray` / `ListViewArray`
    - array / scalar input
    - with / without `stride`
    - `stride` in `[-2, -1, 1, 2]`
    
    ## Are these changes tested?
    
    ## Are there any user-facing changes?
---
 datafusion/functions-nested/Cargo.toml             |   4 +
 datafusion/functions-nested/benches/array_slice.rs | 230 +++++++++++++++++++++
 2 files changed, 234 insertions(+)

diff --git a/datafusion/functions-nested/Cargo.toml 
b/datafusion/functions-nested/Cargo.toml
index 3a02db7501..6d8e3c4497 100644
--- a/datafusion/functions-nested/Cargo.toml
+++ b/datafusion/functions-nested/Cargo.toml
@@ -77,6 +77,10 @@ name = "array_has"
 harness = false
 name = "array_reverse"
 
+[[bench]]
+harness = false
+name = "array_slice"
+
 [[bench]]
 harness = false
 name = "map"
diff --git a/datafusion/functions-nested/benches/array_slice.rs 
b/datafusion/functions-nested/benches/array_slice.rs
new file mode 100644
index 0000000000..bdbbeb837e
--- /dev/null
+++ b/datafusion/functions-nested/benches/array_slice.rs
@@ -0,0 +1,230 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use arrow::array::{
+    Int64Array, ListArray, ListViewArray, NullBufferBuilder, PrimitiveArray,
+};
+use arrow::buffer::{OffsetBuffer, ScalarBuffer};
+use arrow::datatypes::{DataType, Field, Int64Type};
+use criterion::{criterion_group, criterion_main, Criterion};
+use datafusion_common::config::ConfigOptions;
+use datafusion_common::ScalarValue;
+use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
+use datafusion_functions_nested::extract::array_slice_udf;
+use rand::rngs::StdRng;
+use rand::seq::IndexedRandom;
+use rand::{Rng, SeedableRng};
+use std::hint::black_box;
+use std::sync::Arc;
+
+fn create_inputs(
+    rng: &mut StdRng,
+    size: usize,
+    child_array_size: usize,
+    null_density: f32,
+) -> (ListArray, ListViewArray) {
+    let mut nulls_builder = NullBufferBuilder::new(size);
+    let mut sizes = Vec::with_capacity(size);
+
+    for _ in 0..size {
+        if rng.random::<f32>() < null_density {
+            nulls_builder.append_null();
+        } else {
+            nulls_builder.append_non_null();
+        }
+        sizes.push(rng.random_range(1..child_array_size));
+    }
+    let nulls = nulls_builder.finish();
+
+    let length = sizes.iter().sum();
+    let values: PrimitiveArray<Int64Type> =
+        (0..length).map(|_| Some(rng.random())).collect();
+    let values = Arc::new(values);
+
+    let offsets = OffsetBuffer::from_lengths(sizes.clone());
+    let list_array = ListArray::new(
+        Arc::new(Field::new_list_field(DataType::Int64, true)),
+        offsets.clone(),
+        values.clone(),
+        nulls.clone(),
+    );
+
+    let offsets = ScalarBuffer::from(offsets.slice(0, size - 1));
+    let sizes = ScalarBuffer::from_iter(sizes.into_iter().map(|v| v as i32));
+    let list_view_array = ListViewArray::new(
+        Arc::new(Field::new_list_field(DataType::Int64, true)),
+        offsets,
+        sizes,
+        values,
+        nulls,
+    );
+
+    (list_array, list_view_array)
+}
+
+/// Create `from`, `to`, and `stride` from an array of strides.
+fn random_from_to_stride(
+    rng: &mut StdRng,
+    size: i64,
+    null_density: f32,
+    stride_choices: &[Option<i64>],
+) -> (Option<i64>, Option<i64>, Option<i64>) {
+    let from = if rng.random::<f32>() < null_density {
+        None
+    } else {
+        Some(rng.random_range(1..=size))
+    };
+
+    let to = if rng.random::<f32>() < null_density {
+        None
+    } else {
+        match from {
+            Some(from) => Some(rng.random_range(from..=size)),
+            None => Some(rng.random_range(1..=size)),
+        }
+    };
+
+    let stride = stride_choices.choose(rng).cloned().unwrap_or(None);
+
+    if from.is_none() || to.is_none() || stride.is_none_or(|s| s > 0) {
+        (from, to, stride)
+    } else {
+        // stride < 0, swap from and to
+        (to, from, stride)
+    }
+}
+
+fn array_slice_benchmark(
+    name: &str,
+    input: ColumnarValue,
+    mut args: Vec<ColumnarValue>,
+    c: &mut Criterion,
+    size: usize,
+) {
+    args.insert(0, input);
+
+    let array_slice = array_slice_udf();
+    let arg_fields = args
+        .iter()
+        .enumerate()
+        .map(|(idx, arg)| {
+            <Arc<Field>>::from(Field::new(format!("arg_{idx}"), 
arg.data_type(), true))
+        })
+        .collect::<Vec<_>>();
+    c.bench_function(name, |b| {
+        b.iter(|| {
+            black_box(
+                array_slice
+                    .invoke_with_args(ScalarFunctionArgs {
+                        args: args.clone(),
+                        arg_fields: arg_fields.clone(),
+                        number_rows: size,
+                        return_field: 
Field::new_list_field(args[0].data_type(), true)
+                            .into(),
+                        config_options: Arc::new(ConfigOptions::default()),
+                    })
+                    .unwrap(),
+            )
+        })
+    });
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let rng = &mut StdRng::seed_from_u64(42);
+    let size = 1_000_000;
+    let child_array_size = 100;
+    let null_density = 0.1;
+
+    let (list_array, list_view_array) =
+        create_inputs(rng, size, child_array_size, null_density);
+
+    let mut array_from = Vec::with_capacity(size);
+    let mut array_to = Vec::with_capacity(size);
+    let mut array_stride = Vec::with_capacity(size);
+    for child_array_size in list_array.offsets().lengths() {
+        let (from, to, stride) = random_from_to_stride(
+            rng,
+            child_array_size as i64,
+            null_density,
+            &[None, Some(-2), Some(-1), Some(1), Some(2)],
+        );
+        array_from.push(from);
+        array_to.push(to);
+        array_stride.push(stride);
+    }
+
+    // input
+    let list_array = ColumnarValue::Array(Arc::new(list_array));
+    let list_view_array = ColumnarValue::Array(Arc::new(list_view_array));
+
+    // args
+    let array_from = 
ColumnarValue::Array(Arc::new(Int64Array::from(array_from)));
+    let array_to = ColumnarValue::Array(Arc::new(Int64Array::from(array_to)));
+    let array_stride = 
ColumnarValue::Array(Arc::new(Int64Array::from(array_stride)));
+    let scalar_from = ColumnarValue::Scalar(ScalarValue::from(1i64));
+    let scalar_to = ColumnarValue::Scalar(ScalarValue::from(child_array_size 
as i64 / 2));
+
+    for input in [list_array, list_view_array] {
+        let input_type = input.data_type().to_string();
+
+        array_slice_benchmark(
+            &format!("array_slice: input {input_type}, array args"),
+            input.clone(),
+            vec![array_from.clone(), array_to.clone(), array_stride.clone()],
+            c,
+            size,
+        );
+
+        array_slice_benchmark(
+            &format!("array_slice: input {input_type}, array args, no stride"),
+            input.clone(),
+            vec![array_from.clone(), array_to.clone()],
+            c,
+            size,
+        );
+
+        array_slice_benchmark(
+            &format!("array_slice: input {input_type}, scalar args, no 
stride"),
+            input.clone(),
+            vec![scalar_from.clone(), scalar_to.clone()],
+            c,
+            size,
+        );
+
+        for stride in [-2i64, -1i64, 1i64, 2i64] {
+            // swap from and to if stride < 0
+            let (scalar_from, scalar_to) = if stride > 0 {
+                (scalar_from.clone(), scalar_to.clone())
+            } else {
+                (scalar_to.clone(), scalar_from.clone())
+            };
+            let scalar_stride = 
ColumnarValue::Scalar(ScalarValue::from(stride));
+            array_slice_benchmark(
+                &format!("array_slice: input {input_type}, scalar args, 
stride={stride}"),
+                input.clone(),
+                vec![scalar_from, scalar_to, scalar_stride],
+                c,
+                size,
+            );
+        }
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to