This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new f9d3133c4f Improve perfomance of `reverse` function (#14025)
f9d3133c4f is described below
commit f9d3133c4fd39e8a14329bbb98bb726283bccffd
Author: Tai Le Manh <[email protected]>
AuthorDate: Fri Jan 10 04:45:57 2025 +0700
Improve perfomance of `reverse` function (#14025)
* Improve perfomance of 'reverse' function
Signed-off-by: Tai Le Manh <[email protected]>
* Apply sugestion change
* Fix typo
---------
Signed-off-by: Tai Le Manh <[email protected]>
---
datafusion/functions/Cargo.toml | 5 ++
datafusion/functions/benches/reverse.rs | 90 +++++++++++++++++++++++++++++
datafusion/functions/src/unicode/reverse.rs | 25 +++++---
3 files changed, 111 insertions(+), 9 deletions(-)
diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml
index fd986c4be4..c8025fb2d8 100644
--- a/datafusion/functions/Cargo.toml
+++ b/datafusion/functions/Cargo.toml
@@ -204,6 +204,11 @@ harness = false
name = "strpos"
required-features = ["unicode_expressions"]
+[[bench]]
+harness = false
+name = "reverse"
+required-features = ["unicode_expressions"]
+
[[bench]]
harness = false
name = "trunc"
diff --git a/datafusion/functions/benches/reverse.rs
b/datafusion/functions/benches/reverse.rs
new file mode 100644
index 0000000000..c7c1ef8a82
--- /dev/null
+++ b/datafusion/functions/benches/reverse.rs
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+extern crate criterion;
+
+use arrow::array::OffsetSizeTrait;
+use arrow::util::bench_util::{
+ create_string_array_with_len, create_string_view_array_with_len,
+};
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use datafusion_expr::ColumnarValue;
+use datafusion_functions::unicode;
+use std::sync::Arc;
+
+fn create_args<O: OffsetSizeTrait>(
+ size: usize,
+ str_len: usize,
+ force_view_types: bool,
+) -> Vec<ColumnarValue> {
+ if force_view_types {
+ let string_array =
+ Arc::new(create_string_view_array_with_len(size, 0.1, str_len,
false));
+
+ vec![ColumnarValue::Array(string_array)]
+ } else {
+ let string_array =
+ Arc::new(create_string_array_with_len::<O>(size, 0.1, str_len));
+
+ vec![ColumnarValue::Array(string_array)]
+ }
+}
+
+fn criterion_benchmark(c: &mut Criterion) {
+ let reverse = unicode::reverse();
+ for size in [1024, 4096] {
+ let str_len = 8;
+
+ let args = create_args::<i32>(size, str_len, true);
+ c.bench_function(
+ format!("reverse_string_view [size={}, str_len={}]", size,
str_len).as_str(),
+ |b| {
+ b.iter(|| {
+ // TODO use invoke_with_args
+ black_box(reverse.invoke_batch(&args, str_len))
+ })
+ },
+ );
+
+ let str_len = 32;
+
+ let args = create_args::<i32>(size, str_len, true);
+ c.bench_function(
+ format!("reverse_string_view [size={}, str_len={}]", size,
str_len).as_str(),
+ |b| {
+ b.iter(|| {
+ // TODO use invoke_with_args
+ black_box(reverse.invoke_batch(&args, str_len))
+ })
+ },
+ );
+
+ let args = create_args::<i32>(size, str_len, false);
+ c.bench_function(
+ format!("reverse_string [size={}, str_len={}]", size,
str_len).as_str(),
+ |b| {
+ b.iter(|| {
+ // TODO use invoke_with_args
+ black_box(reverse.invoke_batch(&args, str_len))
+ })
+ },
+ );
+ }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/datafusion/functions/src/unicode/reverse.rs
b/datafusion/functions/src/unicode/reverse.rs
index 5ad347ed96..f07deda70e 100644
--- a/datafusion/functions/src/unicode/reverse.rs
+++ b/datafusion/functions/src/unicode/reverse.rs
@@ -20,8 +20,7 @@ use std::sync::Arc;
use crate::utils::{make_scalar_function, utf8_to_str_type};
use arrow::array::{
- Array, ArrayAccessor, ArrayIter, ArrayRef, AsArray, GenericStringArray,
- OffsetSizeTrait,
+ Array, ArrayRef, AsArray, GenericStringBuilder, OffsetSizeTrait,
StringArrayType,
};
use arrow::datatypes::DataType;
use datafusion_common::{exec_err, Result};
@@ -105,8 +104,7 @@ impl ScalarUDFImpl for ReverseFunc {
}
}
-/// Reverses the order of the characters in the string.
-/// reverse('abcde') = 'edcba'
+/// Reverses the order of the characters in the string `reverse('abcde') =
'edcba'`.
/// The implementation uses UTF-8 code points as characters
pub fn reverse<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
if args[0].data_type() == &Utf8View {
@@ -116,14 +114,23 @@ pub fn reverse<T: OffsetSizeTrait>(args: &[ArrayRef]) ->
Result<ArrayRef> {
}
}
-fn reverse_impl<'a, T: OffsetSizeTrait, V: ArrayAccessor<Item = &'a str>>(
+fn reverse_impl<'a, T: OffsetSizeTrait, V: StringArrayType<'a>>(
string_array: V,
) -> Result<ArrayRef> {
- let result = ArrayIter::new(string_array)
- .map(|string| string.map(|string: &str|
string.chars().rev().collect::<String>()))
- .collect::<GenericStringArray<T>>();
+ let mut builder =
GenericStringBuilder::<T>::with_capacity(string_array.len(), 1024);
+
+ let mut reversed = String::new();
+ for string in string_array.iter() {
+ if let Some(s) = string {
+ reversed.extend(s.chars().rev());
+ builder.append_value(&reversed);
+ reversed.clear();
+ } else {
+ builder.append_null();
+ }
+ }
- Ok(Arc::new(result) as ArrayRef)
+ Ok(Arc::new(builder.finish()) as ArrayRef)
}
#[cfg(test)]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]