This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 6471e9ac72 Pre-reserve output capacity in ByteView/ByteArray 
dictionary decoding (#9590)
6471e9ac72 is described below

commit 6471e9ac72a79fd13963568ec3294a76fab826a6
Author: Daniël Heres <[email protected]>
AuthorDate: Fri Mar 20 20:38:26 2026 +0100

    Pre-reserve output capacity in ByteView/ByteArray dictionary decoding 
(#9590)
    
    ## Summary
    
    - Reserve `output.views` capacity in
    `ByteViewArrayDecoderDictionary::read` before the decode loop
    - Reserve `output.offsets` capacity in
    `ByteArrayDecoderDictionary::read` before the decode loop
    
    This avoids per-chunk reallocation during `extend` calls inside the
    dictionary decode loop.
    
    Closes #9587
    
    ## Test plan
    
    - [ ] Existing tests pass (no functional change, only pre-allocation)
    - [ ] Benchmark dictionary-encoded StringView/BinaryView/String reads
    
    🤖 Generated with [Claude Code](https://claude.com/claude-code)
    
    Co-authored-by: Claude Opus 4.6 (1M context) <[email protected]>
---
 parquet/src/arrow/array_reader/byte_array.rs      | 3 +++
 parquet/src/arrow/array_reader/byte_view_array.rs | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/parquet/src/arrow/array_reader/byte_array.rs 
b/parquet/src/arrow/array_reader/byte_array.rs
index 0acbe65019..2d0d44fbe2 100644
--- a/parquet/src/arrow/array_reader/byte_array.rs
+++ b/parquet/src/arrow/array_reader/byte_array.rs
@@ -580,6 +580,9 @@ impl ByteArrayDecoderDictionary {
             return Ok(0);
         }
 
+        // Pre-reserve offsets capacity to avoid per-chunk reallocation
+        output.offsets.reserve(len);
+
         self.decoder.read(len, |keys| {
             output.extend_from_dictionary(keys, dict.offsets.as_slice(), 
dict.values.as_slice())
         })
diff --git a/parquet/src/arrow/array_reader/byte_view_array.rs 
b/parquet/src/arrow/array_reader/byte_view_array.rs
index 65b627aae4..1933654118 100644
--- a/parquet/src/arrow/array_reader/byte_view_array.rs
+++ b/parquet/src/arrow/array_reader/byte_view_array.rs
@@ -500,6 +500,9 @@ impl ByteViewArrayDecoderDictionary {
         // then the base_buffer_idx is 5 - 2 = 3
         let base_buffer_idx = output.buffers.len() as u32 - dict.buffers.len() 
as u32;
 
+        // Pre-reserve output capacity to avoid per-chunk reallocation in 
extend
+        output.views.reserve(len);
+
         let mut error = None;
         let read = self.decoder.read(len, |keys| {
             if base_buffer_idx == 0 {

Reply via email to