This is an automated email from the ASF dual-hosted git repository.

comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 437cbf847a Optimize performance of `character_length` function (#13696)
437cbf847a is described below

commit 437cbf847a80beae791af821d73c56d8226473be
Author: Tai Le Manh <[email protected]>
AuthorDate: Tue Dec 10 09:33:25 2024 +0700

    Optimize performance of `character_length` function (#13696)
    
    * Optimize performance of  function
    
    Signed-off-by: Tai Le Manh <[email protected]>
    
    * Add pre-check array is null
    
    * Fix clippy warnings
    
    ---------
    
    Signed-off-by: Tai Le Manh <[email protected]>
---
 .../functions/src/unicode/character_length.rs      | 57 +++++++++++++++-------
 1 file changed, 39 insertions(+), 18 deletions(-)

diff --git a/datafusion/functions/src/unicode/character_length.rs 
b/datafusion/functions/src/unicode/character_length.rs
index 822bdca9ac..ad51a8ef72 100644
--- a/datafusion/functions/src/unicode/character_length.rs
+++ b/datafusion/functions/src/unicode/character_length.rs
@@ -18,7 +18,7 @@
 use crate::strings::StringArrayType;
 use crate::utils::{make_scalar_function, utf8_to_int_type};
 use arrow::array::{
-    Array, ArrayRef, ArrowPrimitiveType, AsArray, OffsetSizeTrait, 
PrimitiveArray,
+    Array, ArrayRef, ArrowPrimitiveType, AsArray, OffsetSizeTrait, 
PrimitiveBuilder,
 };
 use arrow::datatypes::{ArrowNativeType, DataType, Int32Type, Int64Type};
 use datafusion_common::Result;
@@ -136,31 +136,52 @@ fn character_length(args: &[ArrayRef]) -> 
Result<ArrayRef> {
     }
 }
 
-fn character_length_general<'a, T: ArrowPrimitiveType, V: StringArrayType<'a>>(
-    array: V,
-) -> Result<ArrayRef>
+fn character_length_general<'a, T, V>(array: V) -> Result<ArrayRef>
 where
+    T: ArrowPrimitiveType,
     T::Native: OffsetSizeTrait,
+    V: StringArrayType<'a>,
 {
+    let mut builder = PrimitiveBuilder::<T>::with_capacity(array.len());
+
     // String characters are variable length encoded in UTF-8, counting the
     // number of chars requires expensive decoding, however checking if the
     // string is ASCII only is relatively cheap.
     // If strings are ASCII only, count bytes instead.
     let is_array_ascii_only = array.is_ascii();
-    let iter = array.iter();
-    let result = iter
-        .map(|string| {
-            string.map(|string: &str| {
-                if is_array_ascii_only {
-                    T::Native::usize_as(string.len())
-                } else {
-                    T::Native::usize_as(string.chars().count())
-                }
-            })
-        })
-        .collect::<PrimitiveArray<T>>();
-
-    Ok(Arc::new(result) as ArrayRef)
+    if array.null_count() == 0 {
+        if is_array_ascii_only {
+            for i in 0..array.len() {
+                let value = array.value(i);
+                builder.append_value(T::Native::usize_as(value.len()));
+            }
+        } else {
+            for i in 0..array.len() {
+                let value = array.value(i);
+                
builder.append_value(T::Native::usize_as(value.chars().count()));
+            }
+        }
+    } else if is_array_ascii_only {
+        for i in 0..array.len() {
+            if array.is_null(i) {
+                builder.append_null();
+            } else {
+                let value = array.value(i);
+                builder.append_value(T::Native::usize_as(value.len()));
+            }
+        }
+    } else {
+        for i in 0..array.len() {
+            if array.is_null(i) {
+                builder.append_null();
+            } else {
+                let value = array.value(i);
+                
builder.append_value(T::Native::usize_as(value.chars().count()));
+            }
+        }
+    }
+
+    Ok(Arc::new(builder.finish()) as ArrayRef)
 }
 
 #[cfg(test)]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to