This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new c83b9d7223 feat: add `array_slice` benchmark (#18879)
c83b9d7223 is described below
commit c83b9d7223438a72f7dc380779c08c86f9d1bd51
Author: Khanh Duong <[email protected]>
AuthorDate: Tue Nov 25 23:08:03 2025 +0900
feat: add `array_slice` benchmark (#18879)
## Which issue does this PR close?
- Part of #18458.
## Rationale for this change
- Add bench mark for `array_slice`
## What changes are included in this PR?
Bench marks `array_slice` with:
- `ListArray` / `ListViewArray`
- array / scalar input
- with / without `stride`
- `stride` in `[-2, -1, 1, 2]`
## Are these changes tested?
## Are there any user-facing changes?
---
datafusion/functions-nested/Cargo.toml | 4 +
datafusion/functions-nested/benches/array_slice.rs | 230 +++++++++++++++++++++
2 files changed, 234 insertions(+)
diff --git a/datafusion/functions-nested/Cargo.toml
b/datafusion/functions-nested/Cargo.toml
index 3a02db7501..6d8e3c4497 100644
--- a/datafusion/functions-nested/Cargo.toml
+++ b/datafusion/functions-nested/Cargo.toml
@@ -77,6 +77,10 @@ name = "array_has"
harness = false
name = "array_reverse"
+[[bench]]
+harness = false
+name = "array_slice"
+
[[bench]]
harness = false
name = "map"
diff --git a/datafusion/functions-nested/benches/array_slice.rs
b/datafusion/functions-nested/benches/array_slice.rs
new file mode 100644
index 0000000000..bdbbeb837e
--- /dev/null
+++ b/datafusion/functions-nested/benches/array_slice.rs
@@ -0,0 +1,230 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use arrow::array::{
+ Int64Array, ListArray, ListViewArray, NullBufferBuilder, PrimitiveArray,
+};
+use arrow::buffer::{OffsetBuffer, ScalarBuffer};
+use arrow::datatypes::{DataType, Field, Int64Type};
+use criterion::{criterion_group, criterion_main, Criterion};
+use datafusion_common::config::ConfigOptions;
+use datafusion_common::ScalarValue;
+use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
+use datafusion_functions_nested::extract::array_slice_udf;
+use rand::rngs::StdRng;
+use rand::seq::IndexedRandom;
+use rand::{Rng, SeedableRng};
+use std::hint::black_box;
+use std::sync::Arc;
+
+fn create_inputs(
+ rng: &mut StdRng,
+ size: usize,
+ child_array_size: usize,
+ null_density: f32,
+) -> (ListArray, ListViewArray) {
+ let mut nulls_builder = NullBufferBuilder::new(size);
+ let mut sizes = Vec::with_capacity(size);
+
+ for _ in 0..size {
+ if rng.random::<f32>() < null_density {
+ nulls_builder.append_null();
+ } else {
+ nulls_builder.append_non_null();
+ }
+ sizes.push(rng.random_range(1..child_array_size));
+ }
+ let nulls = nulls_builder.finish();
+
+ let length = sizes.iter().sum();
+ let values: PrimitiveArray<Int64Type> =
+ (0..length).map(|_| Some(rng.random())).collect();
+ let values = Arc::new(values);
+
+ let offsets = OffsetBuffer::from_lengths(sizes.clone());
+ let list_array = ListArray::new(
+ Arc::new(Field::new_list_field(DataType::Int64, true)),
+ offsets.clone(),
+ values.clone(),
+ nulls.clone(),
+ );
+
+ let offsets = ScalarBuffer::from(offsets.slice(0, size - 1));
+ let sizes = ScalarBuffer::from_iter(sizes.into_iter().map(|v| v as i32));
+ let list_view_array = ListViewArray::new(
+ Arc::new(Field::new_list_field(DataType::Int64, true)),
+ offsets,
+ sizes,
+ values,
+ nulls,
+ );
+
+ (list_array, list_view_array)
+}
+
+/// Create `from`, `to`, and `stride` from an array of strides.
+fn random_from_to_stride(
+ rng: &mut StdRng,
+ size: i64,
+ null_density: f32,
+ stride_choices: &[Option<i64>],
+) -> (Option<i64>, Option<i64>, Option<i64>) {
+ let from = if rng.random::<f32>() < null_density {
+ None
+ } else {
+ Some(rng.random_range(1..=size))
+ };
+
+ let to = if rng.random::<f32>() < null_density {
+ None
+ } else {
+ match from {
+ Some(from) => Some(rng.random_range(from..=size)),
+ None => Some(rng.random_range(1..=size)),
+ }
+ };
+
+ let stride = stride_choices.choose(rng).cloned().unwrap_or(None);
+
+ if from.is_none() || to.is_none() || stride.is_none_or(|s| s > 0) {
+ (from, to, stride)
+ } else {
+ // stride < 0, swap from and to
+ (to, from, stride)
+ }
+}
+
+fn array_slice_benchmark(
+ name: &str,
+ input: ColumnarValue,
+ mut args: Vec<ColumnarValue>,
+ c: &mut Criterion,
+ size: usize,
+) {
+ args.insert(0, input);
+
+ let array_slice = array_slice_udf();
+ let arg_fields = args
+ .iter()
+ .enumerate()
+ .map(|(idx, arg)| {
+ <Arc<Field>>::from(Field::new(format!("arg_{idx}"),
arg.data_type(), true))
+ })
+ .collect::<Vec<_>>();
+ c.bench_function(name, |b| {
+ b.iter(|| {
+ black_box(
+ array_slice
+ .invoke_with_args(ScalarFunctionArgs {
+ args: args.clone(),
+ arg_fields: arg_fields.clone(),
+ number_rows: size,
+ return_field:
Field::new_list_field(args[0].data_type(), true)
+ .into(),
+ config_options: Arc::new(ConfigOptions::default()),
+ })
+ .unwrap(),
+ )
+ })
+ });
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+ let rng = &mut StdRng::seed_from_u64(42);
+ let size = 1_000_000;
+ let child_array_size = 100;
+ let null_density = 0.1;
+
+ let (list_array, list_view_array) =
+ create_inputs(rng, size, child_array_size, null_density);
+
+ let mut array_from = Vec::with_capacity(size);
+ let mut array_to = Vec::with_capacity(size);
+ let mut array_stride = Vec::with_capacity(size);
+ for child_array_size in list_array.offsets().lengths() {
+ let (from, to, stride) = random_from_to_stride(
+ rng,
+ child_array_size as i64,
+ null_density,
+ &[None, Some(-2), Some(-1), Some(1), Some(2)],
+ );
+ array_from.push(from);
+ array_to.push(to);
+ array_stride.push(stride);
+ }
+
+ // input
+ let list_array = ColumnarValue::Array(Arc::new(list_array));
+ let list_view_array = ColumnarValue::Array(Arc::new(list_view_array));
+
+ // args
+ let array_from =
ColumnarValue::Array(Arc::new(Int64Array::from(array_from)));
+ let array_to = ColumnarValue::Array(Arc::new(Int64Array::from(array_to)));
+ let array_stride =
ColumnarValue::Array(Arc::new(Int64Array::from(array_stride)));
+ let scalar_from = ColumnarValue::Scalar(ScalarValue::from(1i64));
+ let scalar_to = ColumnarValue::Scalar(ScalarValue::from(child_array_size
as i64 / 2));
+
+ for input in [list_array, list_view_array] {
+ let input_type = input.data_type().to_string();
+
+ array_slice_benchmark(
+ &format!("array_slice: input {input_type}, array args"),
+ input.clone(),
+ vec![array_from.clone(), array_to.clone(), array_stride.clone()],
+ c,
+ size,
+ );
+
+ array_slice_benchmark(
+ &format!("array_slice: input {input_type}, array args, no stride"),
+ input.clone(),
+ vec![array_from.clone(), array_to.clone()],
+ c,
+ size,
+ );
+
+ array_slice_benchmark(
+ &format!("array_slice: input {input_type}, scalar args, no
stride"),
+ input.clone(),
+ vec![scalar_from.clone(), scalar_to.clone()],
+ c,
+ size,
+ );
+
+ for stride in [-2i64, -1i64, 1i64, 2i64] {
+ // swap from and to if stride < 0
+ let (scalar_from, scalar_to) = if stride > 0 {
+ (scalar_from.clone(), scalar_to.clone())
+ } else {
+ (scalar_to.clone(), scalar_from.clone())
+ };
+ let scalar_stride =
ColumnarValue::Scalar(ScalarValue::from(stride));
+ array_slice_benchmark(
+ &format!("array_slice: input {input_type}, scalar args,
stride={stride}"),
+ input.clone(),
+ vec![scalar_from, scalar_to, scalar_stride],
+ c,
+ size,
+ );
+ }
+ }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]