This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 19379e1b42 Add `append_n` method to `FixedSizeBinaryDictionaryBuilder`
(#8498)
19379e1b42 is described below
commit 19379e1b4231b7686eb486464a9daaf81761e0c2
Author: albertlockett <[email protected]>
AuthorDate: Tue Sep 30 16:08:24 2025 -0300
Add `append_n` method to `FixedSizeBinaryDictionaryBuilder` (#8498)
# Which issue does this PR close?
- Closes https://github.com/apache/arrow-rs/issues/8497
# Rationale for this change
Adds a helpful method for the same value multiple times, which also
avoids multiple key lookups which could be a slight performance win.
# What changes are included in this PR?
Adds the helpful `append_n` method to
`FixedSizeBinaryDictionaryBuilder`.
# Are these changes tested?
Yes, I added unit tests covering the changes
# Are there any user-facing changes?
Yes, the new method is public
---------
Co-authored-by: Matthijs Brobbel <[email protected]>
---
.../fixed_size_binary_dictionary_builder.rs | 60 ++++++++++++++++++++++
1 file changed, 60 insertions(+)
diff --git a/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs
b/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs
index 79d7754257..fa3066b7e1 100644
--- a/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs
+++ b/arrow-array/src/builder/fixed_size_binary_dictionary_builder.rs
@@ -252,6 +252,28 @@ where
}
}
+ /// Append a value multiple times to the array.
+ /// This is the same as [`Self::append`] but allows to append the same
value multiple times without doing multiple lookups.
+ ///
+ /// Returns an error if the new index would overflow the key type.
+ pub fn append_n(
+ &mut self,
+ value: impl AsRef<[u8]>,
+ count: usize,
+ ) -> Result<K::Native, ArrowError> {
+ if self.byte_width != value.as_ref().len() as i32 {
+ Err(ArrowError::InvalidArgumentError(format!(
+ "Invalid input length passed to FixedSizeBinaryBuilder.
Expected {} got {}",
+ self.byte_width,
+ value.as_ref().len()
+ )))
+ } else {
+ let key = self.get_or_insert_key(value)?;
+ self.keys_builder.append_value_n(key, count);
+ Ok(key)
+ }
+ }
+
/// Appends a null slot into the builder
#[inline]
pub fn append_null(&mut self) {
@@ -401,6 +423,39 @@ mod tests {
assert_eq!(ava.value(1), values[1].as_bytes());
}
+ #[test]
+ fn test_fixed_size_dictionary_builder_append_n() {
+ let values = ["abc", "def"];
+ let mut b = FixedSizeBinaryDictionaryBuilder::<Int8Type>::new(3);
+ assert_eq!(b.append_n(values[0], 2).unwrap(), 0);
+ assert_eq!(b.append_n(values[1], 3).unwrap(), 1);
+ assert_eq!(b.append_n(values[0], 2).unwrap(), 0);
+ let array = b.finish();
+
+ assert_eq!(
+ array.keys(),
+ &Int8Array::from(vec![
+ Some(0),
+ Some(0),
+ Some(1),
+ Some(1),
+ Some(1),
+ Some(0),
+ Some(0),
+ ]),
+ );
+
+ // Values are polymorphic and so require a downcast.
+ let ava = array
+ .values()
+ .as_any()
+ .downcast_ref::<FixedSizeBinaryArray>()
+ .unwrap();
+
+ assert_eq!(ava.value(0), values[0].as_bytes());
+ assert_eq!(ava.value(1), values[1].as_bytes());
+ }
+
#[test]
fn test_fixed_size_dictionary_builder_wrong_size() {
let mut b = FixedSizeBinaryDictionaryBuilder::<Int8Type>::new(3);
@@ -414,6 +469,11 @@ mod tests {
err,
"Invalid argument error: Invalid input length passed to
FixedSizeBinaryBuilder. Expected 3 got 0"
);
+ let err = b.append_n("a", 3).unwrap_err().to_string();
+ assert_eq!(
+ err,
+ "Invalid argument error: Invalid input length passed to
FixedSizeBinaryBuilder. Expected 3 got 1"
+ );
}
#[test]