This is an automated email from the ASF dual-hosted git repository.

Jefffrey pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 97ff198491 feat(arrow-string): concat_elements for view, fixed binary 
(#9876)
97ff198491 is described below

commit 97ff1984910656fcd76be7a2a44b92b032d3b300
Author: theirix <[email protected]>
AuthorDate: Thu May 7 03:18:46 2026 +0100

    feat(arrow-string): concat_elements for view, fixed binary (#9876)
    
    # Which issue does this PR close?
    
    - Closes #9875.
    
    # Rationale for this change
    
    `concat_elements` module lacks versions for binary view and fixed-size
    binaries. It's worth having them here.
    
    # What changes are included in this PR?
    
    - Kernel for `BinaryViewArray`
    - Kernel for `FixedSizeBinaryArray`
    - Dispatching logic under `concat_elements_dyn`
    - Unit tests
    -
    # Are these changes tested?
    
    New unit tests
    
    # Are there any user-facing changes?
---
 arrow-string/src/concat_elements.rs | 295 +++++++++++++++++++++++++++++++++++-
 1 file changed, 289 insertions(+), 6 deletions(-)

diff --git a/arrow-string/src/concat_elements.rs 
b/arrow-string/src/concat_elements.rs
index 41be8a81cb..cd4676d287 100644
--- a/arrow-string/src/concat_elements.rs
+++ b/arrow-string/src/concat_elements.rs
@@ -18,10 +18,12 @@
 //! Provides utility functions for concatenation of elements in arrays.
 use std::sync::Arc;
 
-use arrow_array::builder::BufferBuilder;
+use arrow_array::builder::{
+    BinaryViewBuilder, BufferBuilder, FixedSizeBinaryBuilder, 
StringViewBuilder,
+};
 use arrow_array::types::ByteArrayType;
 use arrow_array::*;
-use arrow_buffer::{ArrowNativeType, NullBuffer};
+use arrow_buffer::{ArrowNativeType, MutableBuffer, NullBuffer};
 use arrow_data::ArrayDataBuilder;
 use arrow_schema::{ArrowError, DataType};
 
@@ -168,6 +170,129 @@ pub fn concat_elements_utf8_many<Offset: OffsetSizeTrait>(
     Ok(unsafe { builder.build_unchecked() }.into())
 }
 
+/// Returns the elementwise concatenation of a [`FixedSizeBinaryArray`].
+///
+/// The result has `value_length = left.value_length() + right.value_length()`.
+/// An index is null if either input is null at that position.
+///
+/// An error will be returned if `left` and `right` have different lengths.
+pub fn concat_elements_fixed_size_binary(
+    left: &FixedSizeBinaryArray,
+    right: &FixedSizeBinaryArray,
+) -> Result<FixedSizeBinaryArray, ArrowError> {
+    if left.len() != right.len() {
+        return Err(ArrowError::ComputeError(format!(
+            "Arrays must have the same length: {} != {}",
+            left.len(),
+            right.len()
+        )));
+    }
+
+    let left_size = left.value_length() as usize;
+    let right_size = right.value_length() as usize;
+    let output_size = left_size + right_size;
+
+    // Pre-compute combined null bitmap so the per-row NULL check is efficient
+    let nulls = NullBuffer::union(left.nulls(), right.nulls());
+
+    let mut result = FixedSizeBinaryBuilder::with_capacity(left.len(), 
output_size as i32);
+    let mut buffer = MutableBuffer::with_capacity(output_size);
+    for i in 0..left.len() {
+        if nulls.as_ref().is_some_and(|n| n.is_null(i)) {
+            result.append_null();
+        } else {
+            buffer.clear();
+            buffer.extend_from_slice(left.value(i));
+            buffer.extend_from_slice(right.value(i));
+            result.append_value(&buffer)?;
+        }
+    }
+
+    Ok(result.finish())
+}
+
+/// Concatenates two `BinaryViewArray`s element-wise.
+/// If either element is `Null`, the result element is also `Null`.
+///
+/// # Errors
+/// - Returns an error if the input arrays have different lengths.
+/// - Returns an error if any concatenated value exceeds `u32::MAX` in length.
+pub fn concat_elements_binary_view_array(
+    left: &BinaryViewArray,
+    right: &BinaryViewArray,
+) -> Result<BinaryViewArray, ArrowError> {
+    if left.len() != right.len() {
+        return Err(ArrowError::ComputeError(format!(
+            "Arrays must have the same length: {} != {}",
+            left.len(),
+            right.len()
+        )));
+    }
+    let mut result = BinaryViewBuilder::with_capacity(left.len());
+
+    // Avoid reallocations by writing to a reused buffer
+    let mut buffer = MutableBuffer::new(0);
+
+    // Pre-compute combined null bitmap, so the per-row NULL check is efficient
+    let nulls = NullBuffer::union(left.nulls(), right.nulls());
+
+    for i in 0..left.len() {
+        if nulls.as_ref().is_some_and(|n| n.is_null(i)) {
+            result.append_null();
+        } else {
+            buffer.clear();
+            buffer.extend_from_slice(left.value(i));
+            buffer.extend_from_slice(right.value(i));
+            result.try_append_value(&buffer)?;
+        }
+    }
+    Ok(result.finish())
+}
+
+/// Concatenates two `StringViewArray`s element-wise.
+/// If either element is `Null`, the result element is also `Null`.
+///
+/// # Errors
+/// - Returns an error if the input arrays have different lengths.
+/// - Returns an error if any concatenated value exceeds `u32::MAX` in length.
+/// - Returns an error if concatenated strings do not result in a proper UTF-8 
string
+// Cannot reuse code with `GenericByteViewBuilder` since `try_append_value` 
works with
+// `AsRef<T::Native>`, and there is no conversion from `ByteViewType` to this 
or [u8]
+pub fn concat_elements_string_view_array(
+    left: &StringViewArray,
+    right: &StringViewArray,
+) -> Result<StringViewArray, ArrowError> {
+    if left.len() != right.len() {
+        return Err(ArrowError::ComputeError(format!(
+            "Arrays must have the same length: {} != {}",
+            left.len(),
+            right.len()
+        )));
+    }
+
+    let mut result = StringViewBuilder::with_capacity(left.len());
+
+    // Avoid reallocations by writing to a reused buffer
+    let mut buffer: Vec<u8> = Vec::new();
+
+    let nulls = NullBuffer::union(left.nulls(), right.nulls());
+
+    for i in 0..left.len() {
+        if nulls.as_ref().is_some_and(|n| n.is_null(i)) {
+            result.append_null();
+        } else {
+            buffer.clear();
+            buffer.extend_from_slice(left.value(i).as_bytes());
+            buffer.extend_from_slice(right.value(i).as_bytes());
+            let s = std::str::from_utf8(&buffer).map_err(|_| {
+                ArrowError::ComputeError("Concatenated values are not valid 
UTF-8".into())
+            })?;
+            result.try_append_value(s)?;
+        }
+    }
+    Ok(result.finish())
+}
+
 /// Returns the elementwise concatenation of [`Array`]s.
 ///
 /// # Errors
@@ -185,22 +310,43 @@ pub fn concat_elements_dyn(left: &dyn Array, right: &dyn 
Array) -> Result<ArrayR
         (DataType::Utf8, DataType::Utf8) => {
             let left = left.as_any().downcast_ref::<StringArray>().unwrap();
             let right = right.as_any().downcast_ref::<StringArray>().unwrap();
-            Ok(Arc::new(concat_elements_utf8(left, right).unwrap()))
+            Ok(Arc::new(concat_elements_utf8(left, right)?))
         }
         (DataType::LargeUtf8, DataType::LargeUtf8) => {
             let left = 
left.as_any().downcast_ref::<LargeStringArray>().unwrap();
             let right = 
right.as_any().downcast_ref::<LargeStringArray>().unwrap();
-            Ok(Arc::new(concat_elements_utf8(left, right).unwrap()))
+            Ok(Arc::new(concat_elements_utf8(left, right)?))
         }
         (DataType::Binary, DataType::Binary) => {
             let left = left.as_any().downcast_ref::<BinaryArray>().unwrap();
             let right = right.as_any().downcast_ref::<BinaryArray>().unwrap();
-            Ok(Arc::new(concat_element_binary(left, right).unwrap()))
+            Ok(Arc::new(concat_element_binary(left, right)?))
         }
         (DataType::LargeBinary, DataType::LargeBinary) => {
             let left = 
left.as_any().downcast_ref::<LargeBinaryArray>().unwrap();
             let right = 
right.as_any().downcast_ref::<LargeBinaryArray>().unwrap();
-            Ok(Arc::new(concat_element_binary(left, right).unwrap()))
+            Ok(Arc::new(concat_element_binary(left, right)?))
+        }
+        (DataType::BinaryView, DataType::BinaryView) => {
+            let left = 
left.as_any().downcast_ref::<BinaryViewArray>().unwrap();
+            let right = 
right.as_any().downcast_ref::<BinaryViewArray>().unwrap();
+            Ok(Arc::new(concat_elements_binary_view_array(left, right)?))
+        }
+        (DataType::Utf8View, DataType::Utf8View) => {
+            let left = 
left.as_any().downcast_ref::<StringViewArray>().unwrap();
+            let right = 
right.as_any().downcast_ref::<StringViewArray>().unwrap();
+            Ok(Arc::new(concat_elements_string_view_array(left, right)?))
+        }
+        (DataType::FixedSizeBinary(_), DataType::FixedSizeBinary(_)) => {
+            let left = left
+                .as_any()
+                .downcast_ref::<FixedSizeBinaryArray>()
+                .unwrap();
+            let right = right
+                .as_any()
+                .downcast_ref::<FixedSizeBinaryArray>()
+                .unwrap();
+            Ok(Arc::new(concat_elements_fixed_size_binary(left, right)?))
         }
         // unimplemented
         _ => Err(ArrowError::NotYetImplemented(format!(
@@ -213,6 +359,8 @@ pub fn concat_elements_dyn(left: &dyn Array, right: &dyn 
Array) -> Result<ArrayR
 #[cfg(test)]
 mod tests {
     use super::*;
+    use arrow_buffer::Buffer;
+
     #[test]
     fn test_string_concat() {
         let left = [Some("foo"), Some("bar"), None]
@@ -355,6 +503,121 @@ mod tests {
         assert_eq!(output, expected);
     }
 
+    #[test]
+    fn test_fixed_size_binary_concat() {
+        let left = FixedSizeBinaryArray::from(vec![Some(b"foo" as &[u8]), 
Some(b"bar"), None]);
+        let right = FixedSizeBinaryArray::from(vec![None, Some(b"yyy" as 
&[u8]), Some(b"zzz")]);
+
+        let output = concat_elements_fixed_size_binary(&left, &right).unwrap();
+
+        let expected = FixedSizeBinaryArray::from(vec![None, Some(b"baryyy" as 
&[u8]), None]);
+        assert_eq!(output, expected);
+    }
+
+    #[test]
+    fn test_fixed_size_binary_concat_no_null() {
+        let left = FixedSizeBinaryArray::from(vec![b"ab" as &[u8], b"cd"]);
+        let right = FixedSizeBinaryArray::from(vec![b"12" as &[u8], b"34"]);
+
+        let output = concat_elements_fixed_size_binary(&left, &right).unwrap();
+
+        let expected = FixedSizeBinaryArray::from(vec![b"ab12" as &[u8], 
b"cd34"]);
+        assert_eq!(output, expected);
+    }
+
+    #[test]
+    fn test_fixed_size_binary_concat_error() {
+        let left = FixedSizeBinaryArray::from(vec![b"ab" as &[u8], b"cd"]);
+        let right = FixedSizeBinaryArray::from(vec![b"12" as &[u8]]);
+
+        let output = concat_elements_fixed_size_binary(&left, &right);
+        assert_eq!(
+            output.unwrap_err().to_string(),
+            "Compute error: Arrays must have the same length: 2 != 
1".to_string()
+        );
+    }
+
+    #[test]
+    fn test_fixed_size_binary_concat_empty() {
+        let left = FixedSizeBinaryArray::new(0, Buffer::from(&[]), None);
+        let right = FixedSizeBinaryArray::new(0, Buffer::from(&[]), None);
+
+        let output = concat_elements_fixed_size_binary(&left, &right).unwrap();
+
+        let expected = FixedSizeBinaryArray::new(0, Buffer::from(&[]), None);
+        assert_eq!(output, expected);
+    }
+
+    #[test]
+    fn test_binary_view_concat() {
+        let left = BinaryViewArray::from_iter(vec![Some(b"foo" as &[u8]), 
Some(b"bar"), None]);
+        let right = BinaryViewArray::from_iter(vec![None, Some(b"yyy" as 
&[u8]), Some(b"zzz")]);
+
+        let output = concat_elements_binary_view_array(&left, &right).unwrap();
+
+        let expected = BinaryViewArray::from_iter(vec![None, Some(b"baryyy" as 
&[u8]), None]);
+        assert_eq!(output, expected);
+    }
+
+    #[test]
+    fn test_string_view_concat() {
+        let left = StringViewArray::from_iter(vec![Some("foo"), Some("bar"), 
None]);
+        let right = StringViewArray::from_iter(vec![None, Some("yyy"), 
Some("zzz")]);
+
+        let output = concat_elements_string_view_array(&left, &right).unwrap();
+
+        let expected = StringViewArray::from_iter(vec![None, Some("baryyy"), 
None]);
+        assert_eq!(output, expected);
+    }
+
+    #[test]
+    fn test_binary_view_concat_no_null() {
+        let left = BinaryViewArray::from_iter(vec![
+            Some(b"foo" as &[u8]),
+            Some(b"bar"),
+            Some(b""),
+            Some(b"baz"),
+        ]);
+        let right = BinaryViewArray::from_iter(vec![
+            Some(b"bar" as &[u8]),
+            Some(b"baz"),
+            Some(b""),
+            Some(b""),
+        ]);
+
+        let output = concat_elements_binary_view_array(&left, &right).unwrap();
+
+        let expected = BinaryViewArray::from_iter(vec![
+            Some(b"foobar" as &[u8]),
+            Some(b"barbaz"),
+            Some(b""),
+            Some(b"baz"),
+        ]);
+        assert_eq!(output, expected);
+    }
+
+    #[test]
+    fn test_binary_view_concat_error() {
+        let left = BinaryViewArray::from_iter(vec![Some(b"foo" as &[u8]), 
Some(b"bar")]);
+        let right = BinaryViewArray::from_iter(vec![Some(b"baz" as &[u8])]);
+
+        let output = concat_elements_binary_view_array(&left, &right);
+        assert_eq!(
+            output.unwrap_err().to_string(),
+            "Compute error: Arrays must have the same length: 2 != 
1".to_string()
+        );
+    }
+
+    #[test]
+    fn test_binary_view_concat_empty() {
+        let left = BinaryViewArray::from_iter(vec![] as Vec<Option<&[u8]>>);
+        let right = BinaryViewArray::from_iter(vec![] as Vec<Option<&[u8]>>);
+
+        let output = concat_elements_binary_view_array(&left, &right).unwrap();
+        let expected = BinaryViewArray::from_iter(vec![] as 
Vec<Option<&[u8]>>);
+        assert_eq!(output, expected);
+    }
+
     #[test]
     fn test_concat_dyn_same_type() {
         // test for StringArray
@@ -398,6 +661,26 @@ mod tests {
             .into();
         let expected = LargeBinaryArray::from_opt_vec(vec![None, 
Some(b"baryyy"), None]);
         assert_eq!(output, expected);
+
+        // test for BinaryViewArray
+        let left = BinaryViewArray::from_iter(vec![Some(b"foo" as &[u8]), 
Some(b"bar"), None]);
+        let right = BinaryViewArray::from_iter(vec![None, Some(b"yyy" as 
&[u8]), Some(b"zzz")]);
+        let output: BinaryViewArray = concat_elements_dyn(&left, &right)
+            .unwrap()
+            .into_data()
+            .into();
+        let expected = BinaryViewArray::from_iter(vec![None, Some(b"baryyy" as 
&[u8]), None]);
+        assert_eq!(output, expected);
+
+        // test for FixedSizeBinaryArray
+        let left = FixedSizeBinaryArray::from(vec![Some(b"foo" as &[u8]), 
Some(b"bar"), None]);
+        let right = FixedSizeBinaryArray::from(vec![None, Some(b"yyy" as 
&[u8]), Some(b"zzz")]);
+        let output: FixedSizeBinaryArray = concat_elements_dyn(&left, &right)
+            .unwrap()
+            .into_data()
+            .into();
+        let expected = FixedSizeBinaryArray::from(vec![None, Some(b"baryyy" as 
&[u8]), None]);
+        assert_eq!(output, expected);
     }
 
     #[test]

Reply via email to