This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 9a2b49cb7d Add benchmark for cast from/to decimals (#9729)
9a2b49cb7d is described below
commit 9a2b49cb7d78a2a90bff84c7454e7d540ae3f62d
Author: Congxian Qiu <[email protected]>
AuthorDate: Wed Apr 22 21:36:04 2026 +0800
Add benchmark for cast from/to decimals (#9729)
# Which issue does this PR close?
<!--
We generally require a GitHub issue to be filed for all bug fixes and
enhancements and this helps us generate change logs for our releases.
You can link an issue to this PR using the GitHub syntax.
-->
- Closes #9728.
# What changes are included in this PR?
Add benchmarks for cast from/to decimals
<!--
There is no need to duplicate the description in the issue here but it
is sometimes worth providing a summary of the individual changes in this
PR.
-->
# Are these changes tested?
<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code
If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?
-->
This is benchmarks, no need to add tests
# Are there any user-facing changes?
<!--
If there are user-facing changes then we may require documentation to be
updated before approving the PR.
If there are any breaking changes to public APIs, please call them out.
-->
No
---
arrow/benches/cast_kernels.rs | 393 +++++++++++++++++++++++++++++++++++++++++-
arrow/src/util/bench_util.rs | 25 +++
2 files changed, 413 insertions(+), 5 deletions(-)
diff --git a/arrow/benches/cast_kernels.rs b/arrow/benches/cast_kernels.rs
index 040c118a1e..2af3145a84 100644
--- a/arrow/benches/cast_kernels.rs
+++ b/arrow/benches/cast_kernels.rs
@@ -157,6 +157,85 @@ fn build_string_array(size: usize) -> ArrayRef {
Arc::new(builder.finish())
}
+fn build_string_float_array(size: usize, null_density: f32) -> ArrayRef {
+ let mut builder = StringBuilder::new();
+
+ let mut rng = seedable_rng();
+
+ for _ in 0..size {
+ if rng.random::<f32>() < null_density {
+ builder.append_null()
+ } else {
+ builder.append_value(
+ rng.random_range(-999_999_999f32..999_999_999f32)
+ .to_string(),
+ )
+ }
+ }
+ Arc::new(builder.finish())
+}
+
+macro_rules! build_array_with_samples {
+ ($builder: ident, $size: ident, $null_density: expr, $samples: ident) => {{
+ let mut rng = seedable_rng();
+ for i in 0..$size {
+ if rng.random::<f32>() < $null_density {
+ $builder.append_null();
+ } else {
+ $builder.append_value($samples[i % $samples.len()])
+ }
+ }
+ Arc::new($builder.finish())
+ }};
+}
+
+fn build_string_float_array_invalid_item(size: usize, null_density: f32) ->
ArrayRef {
+ let invalid_items = [
+ "--1.23",
+ "1.2.3",
+ "-1.-23499",
+ "--1.23456789",
+ "1-.234",
+ "e10",
+ "1e",
+ "1e++10",
+ "NaN",
+ "Infinity",
+ ];
+ let mut builder = StringBuilder::new();
+ build_array_with_samples!(builder, size, null_density, invalid_items)
+}
+
+fn build_float32_array_for_cast_to_decimal(size: usize, null_density: f32) ->
ArrayRef {
+ Arc::new(create_primitive_array_range::<Float32Type>(
+ size,
+ null_density,
+ -999_999_999f32..999_999_999f32,
+ ))
+}
+
+fn build_float64_array_for_cast_to_decimal(size: usize, null_density: f32) ->
ArrayRef {
+ Arc::new(create_primitive_array_range::<Float64Type>(
+ size,
+ null_density,
+ -999_999_999f64..999_999_999f64,
+ ))
+}
+
+fn build_float32_array_invalid_item(size: usize, null_density: f32) ->
ArrayRef {
+ let mut builder = Float32Builder::with_capacity(size);
+ let invalid_values = [f32::NAN, f32::INFINITY, f32::NEG_INFINITY];
+
+ build_array_with_samples!(builder, size, null_density, invalid_values)
+}
+
+fn build_float64_array_invalid_items(size: usize, null_density: f32) ->
ArrayRef {
+ let mut builder = Float64Builder::with_capacity(size);
+ let invalid_values = [f64::NAN, f64::INFINITY, f64::NEG_INFINITY];
+
+ build_array_with_samples!(builder, size, null_density, invalid_values)
+}
+
fn build_dict_array(size: usize) -> ArrayRef {
let values = StringArray::from_iter([
Some("small"),
@@ -170,7 +249,7 @@ fn build_dict_array(size: usize) -> ArrayRef {
// cast array from specified primitive array type to desired data type
fn cast_array(array: &ArrayRef, to_type: DataType) {
- hint::black_box(cast(array, &to_type).unwrap());
+ hint::black_box(cast(hint::black_box(array),
hint::black_box(&to_type)).unwrap());
}
fn add_benchmark(c: &mut Criterion) {
@@ -189,10 +268,10 @@ fn add_benchmark(c: &mut Criterion) {
let utf8_date_array = build_utf8_date_array(512, true);
let utf8_date_time_array = build_utf8_date_time_array(512, true);
- let decimal32_array = build_decimal32_array(512, 9, 3);
- let decimal64_array = build_decimal64_array(512, 10, 3);
- let decimal128_array = build_decimal128_array(512, 10, 3);
- let decimal256_array = build_decimal256_array(512, 50, 3);
+ let decimal32_array = build_decimal32_array(8_000, 9, 3);
+ let decimal64_array = build_decimal64_array(8_000, 10, 3);
+ let decimal128_array = build_decimal128_array(8_000, 10, 3);
+ let decimal256_array = build_decimal256_array(8_000, 50, 3);
let string_array = build_string_array(512);
let wide_string_array = cast(&string_array, &DataType::LargeUtf8).unwrap();
@@ -200,6 +279,13 @@ fn add_benchmark(c: &mut Criterion) {
let string_view_array = cast(&dict_array, &DataType::Utf8View).unwrap();
let binary_view_array = cast(&string_view_array,
&DataType::BinaryView).unwrap();
+ let string_float_array_normal = build_string_float_array(5_000, 0.1);
+ let invalid_string_float_array =
build_string_float_array_invalid_item(8_000, 0.1);
+ let float32_array_cast_to_decimal =
build_float32_array_for_cast_to_decimal(8_000, 0.1);
+ let float64_array_cast_to_decimal =
build_float64_array_for_cast_to_decimal(8_000, 0.1);
+ let invalid_float32_array_to_decimal =
build_float32_array_invalid_item(8_000, 0.1);
+ let invalid_float64_array_to_decimal =
build_float64_array_invalid_items(8_000, 0.1);
+
c.bench_function("cast int32 to int32 512", |b| {
b.iter(|| cast_array(&i32_array, DataType::Int32))
});
@@ -360,6 +446,303 @@ fn add_benchmark(c: &mut Criterion) {
b.iter(|| cast_array(&binary_view_array, DataType::Utf8View))
});
+ macro_rules! benchmark_cast {
+ ($name: expr, $input_array: ident, $target_type: expr) => {
+ c.bench_function(stringify!($name), |b| {
+ b.iter(|| cast_array(&$input_array, $target_type))
+ });
+ };
+ }
+
+ // cast string with normal items to decimals
+ benchmark_cast!(
+ "cast string to decimal32(9, 2)",
+ string_float_array_normal,
+ DataType::Decimal32(9, 2)
+ );
+ benchmark_cast!(
+ "cast string to decimal64(18, 2)",
+ string_float_array_normal,
+ DataType::Decimal64(18, 2)
+ );
+ benchmark_cast!(
+ "cast string to decimal128(38, 3)",
+ string_float_array_normal,
+ DataType::Decimal128(38, 3)
+ );
+ benchmark_cast!(
+ "cast string to decimal256(76, 4)",
+ string_float_array_normal,
+ DataType::Decimal256(76, 4)
+ );
+
+ // cast invalid string to decimals
+ benchmark_cast!(
+ "cast invalid string to decimal32(9, 2)",
+ invalid_string_float_array,
+ DataType::Decimal32(9, 2)
+ );
+ benchmark_cast!(
+ "cast invalid string to decimal64(18, 2)",
+ invalid_string_float_array,
+ DataType::Decimal64(18, 2)
+ );
+ benchmark_cast!(
+ "cast invalid string to decimal128(38, 3)",
+ invalid_string_float_array,
+ DataType::Decimal128(38, 3)
+ );
+ benchmark_cast!(
+ "cast invalid string to decimal256(76, 4)",
+ invalid_string_float_array,
+ DataType::Decimal256(76, 4)
+ );
+
+ // cast float32 to decimals
+ benchmark_cast!(
+ "cast float32 to decimal32(9, 2)",
+ float32_array_cast_to_decimal,
+ DataType::Decimal32(9, 2)
+ );
+ benchmark_cast!(
+ "cast float32 to decimal64(18, 2",
+ float32_array_cast_to_decimal,
+ DataType::Decimal64(18, 2)
+ );
+ benchmark_cast!(
+ "cast float32 to decimal128(32, 3)",
+ float32_array_cast_to_decimal,
+ DataType::Decimal128(38, 3)
+ );
+ benchmark_cast!(
+ "cast float32 to decimal256(76, 4)",
+ float32_array_cast_to_decimal,
+ DataType::Decimal256(76, 4)
+ );
+
+ // cast invalid float32 to decimals
+ benchmark_cast!(
+ "cast invalid float32 to decimal32(9, 2)",
+ invalid_float32_array_to_decimal,
+ DataType::Decimal32(9, 2)
+ );
+ benchmark_cast!(
+ "cast invalid float32 to decimal64(18, 2",
+ invalid_float32_array_to_decimal,
+ DataType::Decimal64(18, 2)
+ );
+ benchmark_cast!(
+ "cast invalid float32 to decimal128(32, 3)",
+ invalid_float32_array_to_decimal,
+ DataType::Decimal128(32, 3)
+ );
+ benchmark_cast!(
+ "cast invalid float32 to decimal256(76, 4)",
+ invalid_float32_array_to_decimal,
+ DataType::Decimal256(76, 4)
+ );
+
+ // cast float64 to decimals
+ benchmark_cast!(
+ "cast float64 to decimal32(9, 2)",
+ float64_array_cast_to_decimal,
+ DataType::Decimal32(9, 2)
+ );
+ benchmark_cast!(
+ "cast float64 to decimal64(18, 2",
+ float64_array_cast_to_decimal,
+ DataType::Decimal64(18, 2)
+ );
+ benchmark_cast!(
+ "cast float64 to decimal128(32, 3)",
+ float64_array_cast_to_decimal,
+ DataType::Decimal128(32, 3)
+ );
+ benchmark_cast!(
+ "cast float64 to decimal256(76, 4)",
+ float64_array_cast_to_decimal,
+ DataType::Decimal256(76, 4)
+ );
+
+ // cast invalid float64 to decimals
+ benchmark_cast!(
+ "cast invalid float64 to decimal32(9, 2)",
+ invalid_float64_array_to_decimal,
+ DataType::Decimal32(9, 2)
+ );
+ benchmark_cast!(
+ "cast invalid float64 to to decimal64(18, 2)",
+ invalid_float64_array_to_decimal,
+ DataType::Decimal64(18, 2)
+ );
+ benchmark_cast!(
+ "cast invalid float64 to to decimal128(32, 3)",
+ invalid_float64_array_to_decimal,
+ DataType::Decimal128(32, 3)
+ );
+ benchmark_cast!(
+ "cast invalid float64 to to decimal256(76, 4)",
+ invalid_float64_array_to_decimal,
+ DataType::Decimal256(76, 4)
+ );
+
+ // cast decimals to float/integers
+ benchmark_cast!(
+ "cast decimal32 to float32",
+ decimal32_array,
+ DataType::Float32
+ );
+ benchmark_cast!(
+ "cast decimal32 to float64",
+ decimal32_array,
+ DataType::Float64
+ );
+ benchmark_cast!("cast decimal32 to uint8", decimal32_array,
DataType::UInt8);
+ benchmark_cast!(
+ "cast decimal32 to uint16",
+ decimal32_array,
+ DataType::UInt16
+ );
+ benchmark_cast!(
+ "cast decimal32 to uint32",
+ decimal32_array,
+ DataType::UInt32
+ );
+ benchmark_cast!(
+ "cast decimal32 to uint64",
+ decimal32_array,
+ DataType::UInt64
+ );
+ benchmark_cast!("cast decimal32 to int8", decimal32_array, DataType::Int8);
+ benchmark_cast!("cast decimal32 to int16", decimal32_array,
DataType::Int16);
+ benchmark_cast!("cast decimal32 to int32", decimal32_array,
DataType::Int32);
+ benchmark_cast!("cast decimal32 to int64", decimal32_array,
DataType::Int64);
+
+ benchmark_cast!(
+ "cast decimal64 to float32",
+ decimal64_array,
+ DataType::Float32
+ );
+ benchmark_cast!(
+ "cast decimal64 to float64",
+ decimal64_array,
+ DataType::Float64
+ );
+ benchmark_cast!("cast decimal64 to uint8", decimal64_array,
DataType::UInt8);
+ benchmark_cast!(
+ "cast decimal64 to uint16",
+ decimal64_array,
+ DataType::UInt16
+ );
+ benchmark_cast!(
+ "cast decimal64 to uint32",
+ decimal64_array,
+ DataType::UInt32
+ );
+ benchmark_cast!(
+ "cast decimal64 to uint64",
+ decimal64_array,
+ DataType::UInt64
+ );
+ benchmark_cast!("cast decimal64 to int8", decimal64_array, DataType::Int8);
+ benchmark_cast!("cast decimal64 to int16", decimal64_array,
DataType::Int16);
+ benchmark_cast!("cast decimal64 to int32", decimal64_array,
DataType::Int32);
+ benchmark_cast!("cast decimal64 to int64", decimal64_array,
DataType::Int64);
+
+ benchmark_cast!(
+ "cast decimal128 to float32",
+ decimal128_array,
+ DataType::Float32
+ );
+ benchmark_cast!(
+ "cast decimal128 to float64",
+ decimal128_array,
+ DataType::Float64
+ );
+ benchmark_cast!(
+ "cast decimal128 to uint8",
+ decimal128_array,
+ DataType::UInt8
+ );
+ benchmark_cast!(
+ "cast decimal128 to uint16",
+ decimal128_array,
+ DataType::UInt16
+ );
+ benchmark_cast!(
+ "cast decimal128 to uint32",
+ decimal128_array,
+ DataType::UInt32
+ );
+ benchmark_cast!(
+ "cast decimal128 to uint64",
+ decimal128_array,
+ DataType::UInt64
+ );
+ benchmark_cast!("cast decimal128 to int8", decimal128_array,
DataType::Int8);
+ benchmark_cast!(
+ "cast decimal128 to int16",
+ decimal128_array,
+ DataType::Int16
+ );
+ benchmark_cast!(
+ "cast decimal128 to int32",
+ decimal128_array,
+ DataType::Int32
+ );
+ benchmark_cast!(
+ "cast decimal128 to int64",
+ decimal128_array,
+ DataType::Int64
+ );
+
+ benchmark_cast!(
+ "cast decimal256 to float32",
+ decimal256_array,
+ DataType::Float32
+ );
+ benchmark_cast!(
+ "cast decimal256 to float64",
+ decimal256_array,
+ DataType::Float64
+ );
+ benchmark_cast!(
+ "cast decimal256 to uint8",
+ decimal256_array,
+ DataType::UInt8
+ );
+ benchmark_cast!(
+ "cast decimal256 to uint16",
+ decimal256_array,
+ DataType::UInt16
+ );
+ benchmark_cast!(
+ "cast decimal256 to uint32",
+ decimal256_array,
+ DataType::UInt32
+ );
+ benchmark_cast!(
+ "cast decimal256 to uint64",
+ decimal256_array,
+ DataType::UInt64
+ );
+ benchmark_cast!("cast decimal256 to int8", decimal256_array,
DataType::Int8);
+ benchmark_cast!(
+ "cast decimal256 to int16",
+ decimal256_array,
+ DataType::Int16
+ );
+ benchmark_cast!(
+ "cast decimal256 to int32",
+ decimal256_array,
+ DataType::Int32
+ );
+ benchmark_cast!(
+ "cast decimal256 to int64",
+ decimal256_array,
+ DataType::Int64
+ );
+
c.bench_function("cast string single run to ree<int32>", |b| {
let source_array = StringArray::from(vec!["a"; 8192]);
let array_ref = Arc::new(source_array) as ArrayRef;
diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs
index aba95ba4aa..01e55c12b0 100644
--- a/arrow/src/util/bench_util.rs
+++ b/arrow/src/util/bench_util.rs
@@ -51,6 +51,31 @@ where
.collect()
}
+/// Creates an random (but fixed-seeded) array of a given size and null
density,
+/// all the values located in the given range
+pub fn create_primitive_array_range<T>(
+ size: usize,
+ null_density: f32,
+ value_range: Range<T::Native>,
+) -> PrimitiveArray<T>
+where
+ T: ArrowPrimitiveType,
+ StandardUniform: Distribution<T::Native>,
+ T::Native: SampleUniform,
+{
+ let mut rng = seedable_rng();
+
+ (0..size)
+ .map(|_| {
+ if rng.random::<f32>() < null_density {
+ None
+ } else {
+ Some(rng.random_range(value_range.clone()))
+ }
+ })
+ .collect()
+}
+
/// Creates a [`PrimitiveArray`] of a given `size` and `null_density`
/// filling it with random numbers generated using the provided `seed`.
pub fn create_primitive_array_with_seed<T>(