This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 8b159ad680 Add `finish_preserve_values` to `ArrayBuilder` trait
(#9601)
8b159ad680 is described below
commit 8b159ad680469e1b4df10e3b7236c7a1ec40d3f3
Author: Adam Reichold <[email protected]>
AuthorDate: Mon Apr 13 20:46:58 2026 +0200
Add `finish_preserve_values` to `ArrayBuilder` trait (#9601)
# Which issue does this PR close?
- Closes #9600.
# Rationale for this change
Delta dictionaries require user code to call `finish_preserve_values`
instead of `finish` on the dictionary builders. But this is generally
not possible if those builders are nested within composite builders like
lists, maps or structs. Using the new trait method, user code can call
this generically which has no effect on e.g. primitive builders due to
the default implementation but forwards to this method for composite
ones.
# What changes are included in this PR?
Adds a new method `finish_preserve_values` to `ArrayBuilder` with a
default implementation that forwards to `finish`.
Implements this method for dictionary and composite builders.
# Are these changes tested?
Adds tests where new `finish_preserve_values` inherent methods were
created to check that the correct method on the constituent builders is
called using a shared mock builder `PreserveValuesMock`.
# Are there any user-facing changes?
This adds a new method `finish_preserve_values` to the central
`ArrayBuilder` trait, but it has a default implementation so this should
be backwards compatible.
---
.../fixed_size_binary_dictionary_builder.rs | 4 ++
arrow-array/src/builder/fixed_size_list_builder.rs | 42 +++++++++++++++-
.../builder/generic_bytes_dictionary_builder.rs | 4 ++
arrow-array/src/builder/generic_list_builder.rs | 36 +++++++++++++-
.../src/builder/generic_list_view_builder.rs | 36 +++++++++++++-
arrow-array/src/builder/map_builder.rs | 37 +++++++++++++-
arrow-array/src/builder/mod.rs | 58 ++++++++++++++++++++++
.../src/builder/primitive_dictionary_builder.rs | 4 ++
arrow-array/src/builder/struct_builder.rs | 50 ++++++++++++++++++-
9 files changed, 266 insertions(+), 5 deletions(-)
diff --git a/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs
b/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs
index fa3066b7e1..194305d3b6 100644
--- a/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs
+++ b/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs
@@ -201,6 +201,10 @@ where
fn finish_cloned(&self) -> ArrayRef {
Arc::new(self.finish_cloned())
}
+
+ fn finish_preserve_values(&mut self) -> ArrayRef {
+ Arc::new(self.finish_preserve_values())
+ }
}
impl<K> FixedSizeBinaryDictionaryBuilder<K>
diff --git a/arrow-array/src/builder/fixed_size_list_builder.rs
b/arrow-array/src/builder/fixed_size_list_builder.rs
index 6eb48fc052..1ad2f7aeaa 100644
--- a/arrow-array/src/builder/fixed_size_list_builder.rs
+++ b/arrow-array/src/builder/fixed_size_list_builder.rs
@@ -140,6 +140,10 @@ where
fn finish_cloned(&self) -> ArrayRef {
Arc::new(self.finish_cloned())
}
+
+ fn finish_preserve_values(&mut self) -> ArrayRef {
+ Arc::new(self.finish_preserve_values())
+ }
}
impl<T: ArrayBuilder> FixedSizeListBuilder<T>
@@ -211,6 +215,28 @@ where
FixedSizeListArray::new(field, self.list_len, values, nulls)
}
+ fn finish_preserve_values(&mut self) -> FixedSizeListArray {
+ let len = self.len();
+ let values = self.values_builder.finish_preserve_values();
+ let nulls = self.null_buffer_builder.finish();
+
+ assert_eq!(
+ values.len(),
+ len * self.list_len as usize,
+ "Length of the child array ({}) must be the multiple of the value
length ({}) and the array length ({}).",
+ values.len(),
+ self.list_len,
+ len,
+ );
+
+ let field = self
+ .field
+ .clone()
+ .unwrap_or_else(||
Arc::new(Field::new_list_field(values.data_type().clone(), true)));
+
+ FixedSizeListArray::new(field, self.list_len, values, nulls)
+ }
+
/// Returns the current null buffer as a slice
pub fn validity_slice(&self) -> Option<&[u8]> {
self.null_buffer_builder.as_slice()
@@ -224,7 +250,7 @@ mod tests {
use crate::Array;
use crate::Int32Array;
- use crate::builder::Int32Builder;
+ use crate::builder::{Int32Builder, tests::PreserveValuesMock};
fn make_list_builder(
include_null_element: bool,
@@ -491,4 +517,18 @@ mod tests {
builder.finish();
}
+
+ #[test]
+ fn test_finish_preserve_values() {
+ let mut builder =
FixedSizeListBuilder::new(PreserveValuesMock::default(), 2);
+
+ builder.values().inner.append_value(0);
+ builder.values().inner.append_value(1);
+ builder.append(true);
+
+ let arr = builder.finish_preserve_values();
+
+ assert_eq!(1, arr.len());
+ assert_eq!(1, builder.values().called);
+ }
}
diff --git a/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
b/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
index 35c7bfced1..956014a72b 100644
--- a/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_dictionary_builder.rs
@@ -256,6 +256,10 @@ where
fn finish_cloned(&self) -> ArrayRef {
Arc::new(self.finish_cloned())
}
+
+ fn finish_preserve_values(&mut self) -> ArrayRef {
+ Arc::new(self.finish_preserve_values())
+ }
}
impl<K, T> GenericByteDictionaryBuilder<K, T>
diff --git a/arrow-array/src/builder/generic_list_builder.rs
b/arrow-array/src/builder/generic_list_builder.rs
index cabf7a5140..2f130baae9 100644
--- a/arrow-array/src/builder/generic_list_builder.rs
+++ b/arrow-array/src/builder/generic_list_builder.rs
@@ -166,6 +166,10 @@ where
fn finish_cloned(&self) -> ArrayRef {
Arc::new(self.finish_cloned())
}
+
+ fn finish_preserve_values(&mut self) -> ArrayRef {
+ Arc::new(self.finish_preserve_values())
+ }
}
impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder>
GenericListBuilder<OffsetSize, T>
@@ -329,6 +333,23 @@ where
GenericListArray::new(field, offsets, values, nulls)
}
+ fn finish_preserve_values(&mut self) -> GenericListArray<OffsetSize> {
+ let values = self.values_builder.finish_preserve_values();
+ let nulls = self.null_buffer_builder.finish();
+
+ let offsets = Buffer::from_vec(std::mem::take(&mut
self.offsets_builder));
+ // Safety: Safe by construction
+ let offsets = unsafe { OffsetBuffer::new_unchecked(offsets.into()) };
+ self.offsets_builder.push(OffsetSize::zero());
+
+ let field = match &self.field {
+ Some(f) => f.clone(),
+ None => Arc::new(Field::new_list_field(values.data_type().clone(),
true)),
+ };
+
+ GenericListArray::new(field, offsets, values, nulls)
+ }
+
/// Returns the current offsets buffer as a slice
pub fn offsets_slice(&self) -> &[OffsetSize] {
self.offsets_builder.as_slice()
@@ -364,7 +385,7 @@ where
mod tests {
use super::*;
use crate::Int32Array;
- use crate::builder::{Int32Builder, ListBuilder, make_builder};
+ use crate::builder::{Int32Builder, ListBuilder, make_builder,
tests::PreserveValuesMock};
use crate::cast::AsArray;
use crate::types::Int32Type;
use arrow_schema::DataType;
@@ -818,4 +839,17 @@ mod tests {
builder.append_value([Some(1)]);
builder.finish();
}
+
+ #[test]
+ fn test_finish_preserve_values() {
+ let mut builder = ListBuilder::new(PreserveValuesMock::default());
+
+ builder.values().inner.append_value(1);
+ builder.append(true);
+
+ let arr = builder.finish_preserve_values();
+
+ assert_eq!(1, arr.len());
+ assert_eq!(1, builder.values().called);
+ }
}
diff --git a/arrow-array/src/builder/generic_list_view_builder.rs
b/arrow-array/src/builder/generic_list_view_builder.rs
index c13c21cb98..4f9bd64c5d 100644
--- a/arrow-array/src/builder/generic_list_view_builder.rs
+++ b/arrow-array/src/builder/generic_list_view_builder.rs
@@ -71,6 +71,10 @@ impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder>
ArrayBuilder
fn finish_cloned(&self) -> ArrayRef {
Arc::new(self.finish_cloned())
}
+
+ fn finish_preserve_values(&mut self) -> ArrayRef {
+ Arc::new(self.finish_preserve_values())
+ }
}
impl<OffsetSize: OffsetSizeTrait, T: ArrayBuilder>
GenericListViewBuilder<OffsetSize, T> {
@@ -216,6 +220,23 @@ where
GenericListViewArray::new(field, offsets, sizes, values, nulls)
}
+ fn finish_preserve_values(&mut self) -> GenericListViewArray<OffsetSize> {
+ let values = self.values_builder.finish_preserve_values();
+ let nulls = self.null_buffer_builder.finish();
+ let offsets = Buffer::from_vec(std::mem::take(&mut
self.offsets_builder));
+ self.current_offset = OffsetSize::zero();
+
+ // Safety: Safe by construction
+ let offsets = ScalarBuffer::from(offsets);
+ let sizes = Buffer::from_vec(std::mem::take(&mut self.sizes_builder));
+ let sizes = ScalarBuffer::from(sizes);
+ let field = match &self.field {
+ Some(f) => f.clone(),
+ None => Arc::new(Field::new("item", values.data_type().clone(),
true)),
+ };
+ GenericListViewArray::new(field, offsets, sizes, values, nulls)
+ }
+
/// Returns the current offsets buffer as a slice
pub fn offsets_slice(&self) -> &[OffsetSize] {
self.offsets_builder.as_slice()
@@ -245,7 +266,7 @@ where
#[cfg(test)]
mod tests {
use super::*;
- use crate::builder::{Int32Builder, ListViewBuilder, make_builder};
+ use crate::builder::{Int32Builder, ListViewBuilder, make_builder,
tests::PreserveValuesMock};
use crate::cast::AsArray;
use crate::types::Int32Type;
use crate::{Array, Int32Array};
@@ -703,4 +724,17 @@ mod tests {
builder.append_value([Some(1)]);
builder.finish();
}
+
+ #[test]
+ fn test_finish_preserve_values() {
+ let mut builder = ListViewBuilder::new(PreserveValuesMock::default());
+
+ builder.values().inner.append_value(1);
+ builder.append(true);
+
+ let arr = builder.finish_preserve_values();
+
+ assert_eq!(1, arr.len());
+ assert_eq!(1, builder.values().called);
+ }
}
diff --git a/arrow-array/src/builder/map_builder.rs
b/arrow-array/src/builder/map_builder.rs
index 5ff1625b49..a6d4918176 100644
--- a/arrow-array/src/builder/map_builder.rs
+++ b/arrow-array/src/builder/map_builder.rs
@@ -214,6 +214,18 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
self.finish_helper(keys_arr, values_arr, offset_buffer, nulls, len)
}
+ fn finish_preserve_values(&mut self) -> MapArray {
+ let len = self.len();
+ // Build the keys
+ let keys_arr = self.key_builder.finish_preserve_values();
+ let values_arr = self.value_builder.finish_preserve_values();
+ let offset_buffer = Buffer::from_vec(std::mem::take(&mut
self.offsets_builder));
+ self.offsets_builder.push(0);
+ let null_bit_buffer = self.null_buffer_builder.finish();
+
+ self.finish_helper(keys_arr, values_arr, offset_buffer,
null_bit_buffer, len)
+ }
+
fn finish_helper(
&self,
keys_arr: Arc<dyn Array>,
@@ -287,6 +299,10 @@ impl<K: ArrayBuilder, V: ArrayBuilder> ArrayBuilder for
MapBuilder<K, V> {
Arc::new(self.finish_cloned())
}
+ fn finish_preserve_values(&mut self) -> ArrayRef {
+ Arc::new(self.finish_preserve_values())
+ }
+
fn as_any(&self) -> &dyn Any {
self
}
@@ -303,7 +319,7 @@ impl<K: ArrayBuilder, V: ArrayBuilder> ArrayBuilder for
MapBuilder<K, V> {
#[cfg(test)]
mod tests {
use super::*;
- use crate::builder::{Int32Builder, StringBuilder, make_builder};
+ use crate::builder::{Int32Builder, StringBuilder, make_builder,
tests::PreserveValuesMock};
use crate::{Int32Array, StringArray};
use std::collections::HashMap;
@@ -516,4 +532,23 @@ mod tests {
builder.finish();
}
+
+ #[test]
+ fn test_finish_preserve_values() {
+ let mut builder = MapBuilder::new(
+ None,
+ PreserveValuesMock::default(),
+ PreserveValuesMock::default(),
+ );
+
+ builder.keys().inner.append_value(1);
+ builder.values().inner.append_value(2);
+ builder.append(true).unwrap();
+
+ let map = builder.finish_preserve_values();
+
+ assert_eq!(1, map.len());
+ assert_eq!(1, builder.keys().called);
+ assert_eq!(1, builder.values().called);
+ }
}
diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs
index 02c6df453b..7feb1b4d2b 100644
--- a/arrow-array/src/builder/mod.rs
+++ b/arrow-array/src/builder/mod.rs
@@ -341,6 +341,18 @@ pub trait ArrayBuilder: Any + Send + Sync {
/// Builds the array without resetting the underlying builder.
fn finish_cloned(&self) -> ArrayRef;
+ /// Builds the array without resetting the values builder.
+ ///
+ /// This is relevant for dictionary builders but also for composite
builders.
+ /// Those are not affected directly, but will call the corresponding method
+ /// on their constituent builders.
+ ///
+ /// The default implementation just calls [`finish`][Self::finish] which
is sufficient
+ /// for all but the above mentioned builders.
+ fn finish_preserve_values(&mut self) -> ArrayRef {
+ self.finish()
+ }
+
/// Returns the builder as a non-mutable `Any` reference.
///
/// This is most useful when one wants to call non-mutable APIs on a
specific builder
@@ -376,6 +388,10 @@ impl ArrayBuilder for Box<dyn ArrayBuilder> {
(**self).finish_cloned()
}
+ fn finish_preserve_values(&mut self) -> ArrayRef {
+ (**self).finish_preserve_values()
+ }
+
fn as_any(&self) -> &dyn Any {
(**self).as_any()
}
@@ -613,3 +629,45 @@ pub fn make_builder(datatype: &DataType, capacity: usize)
-> Box<dyn ArrayBuilde
t => unimplemented!("Data type {t} is not currently supported"),
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[derive(Default)]
+ pub struct PreserveValuesMock {
+ pub called: usize,
+ pub inner: Int32Builder,
+ }
+
+ impl ArrayBuilder for PreserveValuesMock {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn as_any_mut(&mut self) -> &mut dyn Any {
+ self
+ }
+
+ fn into_box_any(self: Box<Self>) -> Box<dyn Any> {
+ self
+ }
+
+ fn len(&self) -> usize {
+ self.inner.len()
+ }
+
+ fn finish(&mut self) -> ArrayRef {
+ panic!("finish should never be called on PreserveValuesMock")
+ }
+
+ fn finish_cloned(&self) -> ArrayRef {
+ panic!("finish_cloned should never be called on
PreserveValuesMock")
+ }
+
+ fn finish_preserve_values(&mut self) -> ArrayRef {
+ self.called += 1;
+ self.inner.finish_preserve_values()
+ }
+ }
+}
diff --git a/arrow-array/src/builder/primitive_dictionary_builder.rs
b/arrow-array/src/builder/primitive_dictionary_builder.rs
index d9544aec3b..4f21871974 100644
--- a/arrow-array/src/builder/primitive_dictionary_builder.rs
+++ b/arrow-array/src/builder/primitive_dictionary_builder.rs
@@ -268,6 +268,10 @@ where
fn finish_cloned(&self) -> ArrayRef {
Arc::new(self.finish_cloned())
}
+
+ fn finish_preserve_values(&mut self) -> ArrayRef {
+ Arc::new(self.finish_preserve_values())
+ }
}
impl<K, V> PrimitiveDictionaryBuilder<K, V>
diff --git a/arrow-array/src/builder/struct_builder.rs
b/arrow-array/src/builder/struct_builder.rs
index 795593c98a..d299f7bf9d 100644
--- a/arrow-array/src/builder/struct_builder.rs
+++ b/arrow-array/src/builder/struct_builder.rs
@@ -135,6 +135,10 @@ impl ArrayBuilder for StructBuilder {
Arc::new(self.finish_cloned())
}
+ fn finish_preserve_values(&mut self) -> ArrayRef {
+ Arc::new(self.finish_preserve_values())
+ }
+
/// Returns the builder as a non-mutable `Any` reference.
///
/// This is most useful when one wants to call non-mutable APIs on a
specific builder
@@ -265,6 +269,23 @@ impl StructBuilder {
StructArray::new(self.fields.clone(), arrays, nulls)
}
+ fn finish_preserve_values(&mut self) -> StructArray {
+ self.validate_content();
+ if self.fields.is_empty() {
+ return StructArray::new_empty_fields(self.len(),
self.null_buffer_builder.finish());
+ }
+
+ let arrays = self
+ .field_builders
+ .iter_mut()
+ .map(|f| f.finish_preserve_values())
+ .collect();
+
+ let nulls = self.null_buffer_builder.finish();
+
+ StructArray::new(self.fields.clone(), arrays, nulls)
+ }
+
/// Constructs and validates contents in the builder to ensure that
/// - fields and field_builders are of equal length
/// - the number of items in individual field_builders are equal to
self.len()
@@ -304,7 +325,7 @@ mod tests {
use arrow_data::ArrayData;
use arrow_schema::Field;
- use crate::{array::Array, types::ArrowDictionaryKeyType};
+ use crate::{array::Array, builder::tests::PreserveValuesMock,
types::ArrowDictionaryKeyType};
#[test]
fn test_struct_array_builder() {
@@ -523,6 +544,33 @@ mod tests {
assert_eq!(0, builder.len());
}
+ #[test]
+ fn test_struct_array_builder_finish_preserve_values() {
+ let fields = vec![Field::new("mock", DataType::Int32, false)];
+ let field_builders = vec![Box::new(PreserveValuesMock::default()) as
Box<dyn ArrayBuilder>];
+
+ let mut builder = StructBuilder::new(fields, field_builders);
+ builder
+ .field_builder::<PreserveValuesMock>(0)
+ .unwrap()
+ .inner
+ .append_value(1);
+ builder.append(true);
+
+ assert_eq!(1, builder.len());
+
+ let arr = builder.finish_preserve_values();
+
+ assert_eq!(1, arr.len());
+ assert_eq!(
+ 1,
+ builder
+ .field_builder::<PreserveValuesMock>(0)
+ .unwrap()
+ .called
+ );
+ }
+
#[test]
fn test_struct_array_builder_from_schema() {
let mut fields = vec![