lichuang commented on code in PR #8990:
URL: https://github.com/apache/arrow-rs/pull/8990#discussion_r2617883426
##########
arrow-array/src/builder/generic_bytes_view_builder.rs:
##########
@@ -636,8 +658,58 @@ pub fn make_view(data: &[u8], block_id: u32, offset: u32)
-> u128 {
mod tests {
use core::str;
+ use arrow_buffer::ArrowNativeType;
+
use super::*;
+ #[test]
+ fn test_string_max_deduplication_len() {
+ let value_1 = "short";
+ let value_2 = "not so similar string but long";
+ let value_3 = "1234567890123";
+
+ let mut builder = StringViewBuilder::new()
+ .with_deduplicate_strings()
+ .with_max_deduplication_len(MAX_INLINE_VIEW_LEN * 2);
+ // safe to unwrap
+ let max_deduplication_len = builder.max_deduplication_len.unwrap();
+ assert!(builder.string_tracker.is_some());
+ assert!(max_deduplication_len > MAX_INLINE_VIEW_LEN);
+ assert!(value_1.len() < MAX_INLINE_VIEW_LEN.as_usize());
+ assert!(value_2.len() > max_deduplication_len.as_usize());
+ assert!(
+ value_3.len() > MAX_INLINE_VIEW_LEN.as_usize()
+ && value_3.len() < max_deduplication_len.as_usize()
+ );
+
+ let value_checker = |v: &[u8], builder: &StringViewBuilder| {
Review Comment:
Since `1` < `MAX_INLINE_VIEW_LEN`, it will be save and return directly:
```rust
pub fn try_append_value(&mut self, value: impl AsRef<T::Native>) ->
Result<(), ArrowError> {
let v: &[u8] = value.as_ref().as_ref();
let length: u32 = v.len().try_into().map_err(|_| {
ArrowError::InvalidArgumentError(format!("String length {}
exceeds u32::MAX", v.len()))
})?;
if length <= MAX_INLINE_VIEW_LEN {
let mut view_buffer = [0; 16];
view_buffer[0..4].copy_from_slice(&length.to_le_bytes());
view_buffer[4..4 + v.len()].copy_from_slice(v);
self.views_buffer.push(u128::from_le_bytes(view_buffer));
self.null_buffer_builder.append_non_null();
return Ok(());
}
// ...
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]