This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 0ca7f55ebb Add benchmarks for to_timestamp and make_date functions
(#9086)
0ca7f55ebb is described below
commit 0ca7f55ebbe127d598e7fe7845f73b3b3bb3202e
Author: Bruce Ritchie <[email protected]>
AuthorDate: Wed Jan 31 15:25:40 2024 -0500
Add benchmarks for to_timestamp and make_date functions (#9086)
* Make date benchmark.
* Make date benchmark update, added to_timestamp benchmark test.
* cargo tomlfmt
---
datafusion/physical-expr/Cargo.toml | 8 ++
datafusion/physical-expr/benches/make_date.rs | 115 +++++++++++++++++++++
datafusion/physical-expr/benches/to_timestamp.rs | 125 +++++++++++++++++++++++
3 files changed, 248 insertions(+)
diff --git a/datafusion/physical-expr/Cargo.toml
b/datafusion/physical-expr/Cargo.toml
index 61eba042f9..dc3ecdb14f 100644
--- a/datafusion/physical-expr/Cargo.toml
+++ b/datafusion/physical-expr/Cargo.toml
@@ -79,3 +79,11 @@ rstest = { workspace = true }
[[bench]]
harness = false
name = "in_list"
+
+[[bench]]
+harness = false
+name = "make_date"
+
+[[bench]]
+harness = false
+name = "to_timestamp"
diff --git a/datafusion/physical-expr/benches/make_date.rs
b/datafusion/physical-expr/benches/make_date.rs
new file mode 100644
index 0000000000..819d9539f2
--- /dev/null
+++ b/datafusion/physical-expr/benches/make_date.rs
@@ -0,0 +1,115 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use std::sync::Arc;
+
+use arrow_array::{ArrayRef, Int32Array};
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use rand::rngs::ThreadRng;
+use rand::Rng;
+
+use datafusion_common::ScalarValue;
+use datafusion_expr::ColumnarValue;
+use datafusion_physical_expr::datetime_expressions::make_date;
+
+fn years(rng: &mut ThreadRng) -> Int32Array {
+ let mut years = vec![];
+ for _ in 0..1000 {
+ years.push(rng.gen_range(1900..2050));
+ }
+
+ Int32Array::from(years)
+}
+
+fn months(rng: &mut ThreadRng) -> Int32Array {
+ let mut months = vec![];
+ for _ in 0..1000 {
+ months.push(rng.gen_range(1..13));
+ }
+
+ Int32Array::from(months)
+}
+
+fn days(rng: &mut ThreadRng) -> Int32Array {
+ let mut days = vec![];
+ for _ in 0..1000 {
+ days.push(rng.gen_range(1..29));
+ }
+
+ Int32Array::from(days)
+}
+fn criterion_benchmark(c: &mut Criterion) {
+ c.bench_function("make_date_col_col_col_1000", |b| {
+ let mut rng = rand::thread_rng();
+ let years = ColumnarValue::Array(Arc::new(years(&mut rng)) as
ArrayRef);
+ let months = ColumnarValue::Array(Arc::new(months(&mut rng)) as
ArrayRef);
+ let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef);
+
+ b.iter(|| {
+ black_box(
+ make_date(&[years.clone(), months.clone(), days.clone()])
+ .expect("make_date should work on valid values"),
+ )
+ })
+ });
+
+ c.bench_function("make_date_scalar_col_col_1000", |b| {
+ let mut rng = rand::thread_rng();
+ let year = ColumnarValue::Scalar(ScalarValue::Int32(Some(2025)));
+ let months = ColumnarValue::Array(Arc::new(months(&mut rng)) as
ArrayRef);
+ let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef);
+
+ b.iter(|| {
+ black_box(
+ make_date(&[year.clone(), months.clone(), days.clone()])
+ .expect("make_date should work on valid values"),
+ )
+ })
+ });
+
+ c.bench_function("make_date_scalar_scalar_col_1000", |b| {
+ let mut rng = rand::thread_rng();
+ let year = ColumnarValue::Scalar(ScalarValue::Int32(Some(2025)));
+ let month = ColumnarValue::Scalar(ScalarValue::Int32(Some(11)));
+ let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef);
+
+ b.iter(|| {
+ black_box(
+ make_date(&[year.clone(), month.clone(), days.clone()])
+ .expect("make_date should work on valid values"),
+ )
+ })
+ });
+
+ c.bench_function("make_date_scalar_scalar_scalar", |b| {
+ let year = ColumnarValue::Scalar(ScalarValue::Int32(Some(2025)));
+ let month = ColumnarValue::Scalar(ScalarValue::Int32(Some(11)));
+ let day = ColumnarValue::Scalar(ScalarValue::Int32(Some(26)));
+
+ b.iter(|| {
+ black_box(
+ make_date(&[year.clone(), month.clone(), day.clone()])
+ .expect("make_date should work on valid values"),
+ )
+ })
+ });
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/physical-expr/benches/to_timestamp.rs
b/datafusion/physical-expr/benches/to_timestamp.rs
new file mode 100644
index 0000000000..1934f69ef1
--- /dev/null
+++ b/datafusion/physical-expr/benches/to_timestamp.rs
@@ -0,0 +1,125 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use std::sync::Arc;
+
+use arrow_array::builder::StringBuilder;
+use arrow_array::ArrayRef;
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+
+use datafusion_expr::ColumnarValue;
+use datafusion_physical_expr::datetime_expressions::to_timestamp;
+
+fn criterion_benchmark(c: &mut Criterion) {
+ c.bench_function("to_timestamp_no_formats", |b| {
+ let mut inputs = StringBuilder::new();
+ inputs.append_value("1997-01-31T09:26:56.123Z");
+ inputs.append_value("1997-01-31T09:26:56.123-05:00");
+ inputs.append_value("1997-01-31 09:26:56.123-05:00");
+ inputs.append_value("2023-01-01 04:05:06.789 -08");
+ inputs.append_value("1997-01-31T09:26:56.123");
+ inputs.append_value("1997-01-31 09:26:56.123");
+ inputs.append_value("1997-01-31 09:26:56");
+ inputs.append_value("1997-01-31 13:26:56");
+ inputs.append_value("1997-01-31 13:26:56+04:00");
+ inputs.append_value("1997-01-31");
+
+ let string_array = ColumnarValue::Array(Arc::new(inputs.finish()) as
ArrayRef);
+
+ b.iter(|| {
+ black_box(
+ to_timestamp(&[string_array.clone()])
+ .expect("to_timestamp should work on valid values"),
+ )
+ })
+ });
+
+ c.bench_function("to_timestamp_with_formats", |b| {
+ let mut inputs = StringBuilder::new();
+ let mut format1_builder = StringBuilder::with_capacity(2, 10);
+ let mut format2_builder = StringBuilder::with_capacity(2, 10);
+ let mut format3_builder = StringBuilder::with_capacity(2, 10);
+
+ inputs.append_value("1997-01-31T09:26:56.123Z");
+ format1_builder.append_value("%+");
+ format2_builder.append_value("%c");
+ format3_builder.append_value("%Y-%m-%dT%H:%M:%S%.f%Z");
+
+ inputs.append_value("1997-01-31T09:26:56.123-05:00");
+ format1_builder.append_value("%+");
+ format2_builder.append_value("%c");
+ format3_builder.append_value("%Y-%m-%dT%H:%M:%S%.f%z");
+
+ inputs.append_value("1997-01-31 09:26:56.123-05:00");
+ format1_builder.append_value("%+");
+ format2_builder.append_value("%c");
+ format3_builder.append_value("%Y-%m-%d %H:%M:%S%.f%Z");
+
+ inputs.append_value("2023-01-01 04:05:06.789 -08");
+ format1_builder.append_value("%+");
+ format2_builder.append_value("%c");
+ format3_builder.append_value("%Y-%m-%d %H:%M:%S%.f %#z");
+
+ inputs.append_value("1997-01-31T09:26:56.123");
+ format1_builder.append_value("%+");
+ format2_builder.append_value("%c");
+ format3_builder.append_value("%Y-%m-%dT%H:%M:%S%.f");
+
+ inputs.append_value("1997-01-31 09:26:56.123");
+ format1_builder.append_value("%+");
+ format2_builder.append_value("%c");
+ format3_builder.append_value("%Y-%m-%d %H:%M:%S%.f");
+
+ inputs.append_value("1997-01-31 09:26:56");
+ format1_builder.append_value("%+");
+ format2_builder.append_value("%c");
+ format3_builder.append_value("%Y-%m-%d %H:%M:%S");
+
+ inputs.append_value("1997-01-31 092656");
+ format1_builder.append_value("%+");
+ format2_builder.append_value("%c");
+ format3_builder.append_value("%Y-%m-%d %H%M%S");
+
+ inputs.append_value("1997-01-31 092656+04:00");
+ format1_builder.append_value("%+");
+ format2_builder.append_value("%c");
+ format3_builder.append_value("%Y-%m-%d %H%M%S%:z");
+
+ inputs.append_value("Sun Jul 8 00:34:60 2001");
+ format1_builder.append_value("%+");
+ format2_builder.append_value("%c");
+ format3_builder.append_value("%Y-%m-%d 00:00:00");
+
+ let args = [
+ ColumnarValue::Array(Arc::new(inputs.finish()) as ArrayRef),
+ ColumnarValue::Array(Arc::new(format1_builder.finish()) as
ArrayRef),
+ ColumnarValue::Array(Arc::new(format2_builder.finish()) as
ArrayRef),
+ ColumnarValue::Array(Arc::new(format3_builder.finish()) as
ArrayRef),
+ ];
+ b.iter(|| {
+ black_box(
+ to_timestamp(&args.clone())
+ .expect("to_timestamp should work on valid values"),
+ )
+ })
+ });
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);