This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 12314c5564 perf: Optimize `array_to_string` to avoid a copy (#20639)
12314c5564 is described below

commit 12314c5564337bf556763b8a414502d72094ed1e
Author: Neil Conway <[email protected]>
AuthorDate: Mon Mar 2 17:06:34 2026 -0500

    perf: Optimize `array_to_string` to avoid a copy (#20639)
    
    ## Which issue does this PR close?
    
    N/A
    
    ## Rationale for this change
    
    `array_to_string` used a temporary buffer to convert array elements to a
    string, before then copying that buffer into a `StringBuilder` via
    `append_value`. We can skip this copy by arranging to write directly
    into the `StringBuilder's buffer because it implements `fmt::Write`.
    This is a 12-18% performance improvement on the `array_to_string`
    benchmarks.
    
    ## What changes are included in this PR?
    
    * Optimization
    * Change the signature of the `append` closure to return `Result`, which
    avoids a lot of `unwrap` clutter
    
    ## Are these changes tested?
    
    Yes -- covered by existing tests and benchmarks.
    
    ## Are there any user-facing changes?
    
    No.
---
 datafusion/functions-nested/src/string.rs | 80 +++++++++++++++----------------
 1 file changed, 39 insertions(+), 41 deletions(-)

diff --git a/datafusion/functions-nested/src/string.rs 
b/datafusion/functions-nested/src/string.rs
index 8aabc49309..ce72e878de 100644
--- a/datafusion/functions-nested/src/string.rs
+++ b/datafusion/functions-nested/src/string.rs
@@ -29,7 +29,7 @@ use datafusion_common::utils::ListCoercion;
 use datafusion_common::{DataFusionError, Result, not_impl_err};
 
 use std::any::Any;
-use std::fmt::Write;
+use std::fmt::{self, Write};
 
 use crate::utils::make_scalar_function;
 use arrow::array::{
@@ -324,7 +324,6 @@ fn generate_string_array<O: OffsetSizeTrait>(
     null_strings: &[Option<&str>],
 ) -> Result<StringArray> {
     let mut builder = StringBuilder::with_capacity(list_arr.len(), 0);
-    let mut buf = String::new();
 
     for ((arr, &delimiter), &null_string) in list_arr
         .iter()
@@ -336,17 +335,16 @@ fn generate_string_array<O: OffsetSizeTrait>(
             continue;
         };
 
-        buf.clear();
         let mut first = true;
-        compute_array_to_string(&mut buf, &arr, delimiter, null_string, &mut 
first)?;
-        builder.append_value(&buf);
+        compute_array_to_string(&mut builder, &arr, delimiter, null_string, 
&mut first)?;
+        builder.append_value("");
     }
 
     Ok(builder.finish())
 }
 
 fn compute_array_to_string(
-    buf: &mut String,
+    w: &mut impl Write,
     arr: &ArrayRef,
     delimiter: &str,
     null_string: Option<&str>,
@@ -358,7 +356,7 @@ fn compute_array_to_string(
             for i in 0..$list_array.len() {
                 if !$list_array.is_null(i) {
                     compute_array_to_string(
-                        buf,
+                        w,
                         &$list_array.value(i),
                         delimiter,
                         null_string,
@@ -368,9 +366,9 @@ fn compute_array_to_string(
                     if *first {
                         *first = false;
                     } else {
-                        buf.push_str(delimiter);
+                        w.write_str(delimiter)?;
                     }
-                    buf.push_str(ns);
+                    w.write_str(ns)?;
                 }
             }
         };
@@ -399,71 +397,69 @@ fn compute_array_to_string(
                 DataFusionError::from(e)
                     .context("Casting dictionary to values in 
compute_array_to_string")
             })?;
-            compute_array_to_string(buf, &values, delimiter, null_string, 
first)
+            compute_array_to_string(w, &values, delimiter, null_string, first)
         }
         Null => Ok(()),
         data_type => {
             macro_rules! str_leaf {
                 ($ARRAY_TYPE:ident) => {
                     write_leaf_to_string(
-                        buf,
+                        w,
                         downcast_arg!(arr, $ARRAY_TYPE),
                         delimiter,
                         null_string,
                         first,
-                        |buf, x: &str| buf.push_str(x),
-                    )
+                        |w, x: &str| w.write_str(x),
+                    )?
                 };
             }
             macro_rules! bool_leaf {
                 ($ARRAY_TYPE:ident) => {
                     write_leaf_to_string(
-                        buf,
+                        w,
                         downcast_arg!(arr, $ARRAY_TYPE),
                         delimiter,
                         null_string,
                         first,
-                        |buf, x: bool| {
+                        |w, x: bool| {
                             if x {
-                                buf.push_str("true");
+                                w.write_str("true")
                             } else {
-                                buf.push_str("false");
+                                w.write_str("false")
                             }
                         },
-                    )
+                    )?
                 };
             }
             macro_rules! int_leaf {
                 ($ARRAY_TYPE:ident) => {
                     write_leaf_to_string(
-                        buf,
+                        w,
                         downcast_arg!(arr, $ARRAY_TYPE),
                         delimiter,
                         null_string,
                         first,
-                        |buf, x| {
+                        |w, x| {
                             let mut itoa_buf = itoa::Buffer::new();
-                            buf.push_str(itoa_buf.format(x));
+                            w.write_str(itoa_buf.format(x))
                         },
-                    )
+                    )?
                 };
             }
             macro_rules! float_leaf {
                 ($ARRAY_TYPE:ident) => {
                     write_leaf_to_string(
-                        buf,
+                        w,
                         downcast_arg!(arr, $ARRAY_TYPE),
                         delimiter,
                         null_string,
                         first,
-                        |buf, x| {
-                            // TODO: Consider switching to a more efficient
-                            // floating point display library (e.g., ryu). This
-                            // might result in some differences in the output
-                            // format, however.
-                            write!(buf, "{}", x).unwrap();
-                        },
-                    )
+                        // TODO: Consider switching to a more efficient
+                        // floating point display library (e.g., ryu). This
+                        // might result in some differences in the output
+                        // format, however.
+                        |w, x| write!(w, "{}", x),
+                    )?
                 };
             }
             match data_type {
@@ -487,7 +483,7 @@ fn compute_array_to_string(
                             .context("Casting to string in array_to_string")
                     })?;
                     return compute_array_to_string(
-                        buf,
+                        w,
                         &str_arr,
                         delimiter,
                         null_string,
@@ -506,17 +502,18 @@ fn compute_array_to_string(
 }
 
 /// Appends the string representation of each element in a leaf (non-list)
-/// array to `buf`, separated by `delimiter`. Null elements are rendered
+/// array to `w`, separated by `delimiter`. Null elements are rendered
 /// using `null_string` if provided, or skipped otherwise. The `append`
-/// closure controls how each non-null element is written to the buffer.
-fn write_leaf_to_string<'a, A, T>(
-    buf: &mut String,
+/// closure controls how each non-null element is written.
+fn write_leaf_to_string<'a, W: Write, A, T>(
+    w: &mut W,
     arr: &'a A,
     delimiter: &str,
     null_string: Option<&str>,
     first: &mut bool,
-    append: impl Fn(&mut String, T),
-) where
+    append: impl Fn(&mut W, T) -> fmt::Result,
+) -> Result<()>
+where
     &'a A: IntoIterator<Item = Option<T>>,
 {
     for x in arr {
@@ -528,14 +525,15 @@ fn write_leaf_to_string<'a, A, T>(
         if *first {
             *first = false;
         } else {
-            buf.push_str(delimiter);
+            w.write_str(delimiter)?;
         }
 
         match x {
-            Some(x) => append(buf, x),
-            None => buf.push_str(null_string.unwrap()),
+            Some(x) => append(w, x)?,
+            None => w.write_str(null_string.unwrap())?,
         }
     }
+    Ok(())
 }
 
 /// String_to_array SQL function


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to