This is an automated email from the ASF dual-hosted git repository.

etseidl pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new d7d9ad30c8 Refactor `RleEncoder::flush_bit_packed_run` to make flow 
clearer (#9735)
d7d9ad30c8 is described below

commit d7d9ad30c848f854032610d13518e1d7c67e9939
Author: Ed Seidl <[email protected]>
AuthorDate: Thu Apr 16 07:16:13 2026 -0700

    Refactor `RleEncoder::flush_bit_packed_run` to make flow clearer (#9735)
    
    # Which issue does this PR close?
    
    - Closes #9734.
    
    # Rationale for this change
    
    The transition from bit-packed to RLE encoding is a bit confusing. See
    issue for more details.
    
    # What changes are included in this PR?
    
    Splits `flush_bit_packed_run` into two functions and modifies
    `flush_buffered_values` to use the new `finish_bit_packed_run`.
    
    # Are these changes tested?
    
    Should be covered by existing tests
    
    # Are there any user-facing changes?
    
    No, only makes changes to APIs marked experimental
---
 parquet/src/encodings/rle.rs | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/parquet/src/encodings/rle.rs b/parquet/src/encodings/rle.rs
index c2beb4a534..ea236f652a 100644
--- a/parquet/src/encodings/rle.rs
+++ b/parquet/src/encodings/rle.rs
@@ -245,7 +245,7 @@ impl RleEncoder {
         self.repeat_count = 0;
     }
 
-    fn flush_bit_packed_run(&mut self, update_indicator_byte: bool) {
+    fn flush_bit_packed_run(&mut self, end_current_run: bool) {
         if self.indicator_byte_pos < 0 {
             self.indicator_byte_pos = self.bit_writer.skip(1) as i64;
         }
@@ -255,25 +255,31 @@ impl RleEncoder {
             self.bit_writer.put_value(*v, self.bit_width as usize);
         }
         self.num_buffered_values = 0;
-        if update_indicator_byte {
-            // Write the indicator byte to the reserved position in 
`bit_writer`
-            let num_groups = self.bit_packed_count / 8;
-            let indicator_byte = ((num_groups << 1) | 1) as u8;
-            self.bit_writer
-                .put_aligned_offset(indicator_byte, 1, self.indicator_byte_pos 
as usize);
-            self.indicator_byte_pos = -1;
-            self.bit_packed_count = 0;
+        if end_current_run {
+            self.finish_bit_packed_run();
         }
     }
 
+    // Called when ending a bit-packed run. Writes the indicator byte to the 
reserved
+    // position in `bit_writer`
+    fn finish_bit_packed_run(&mut self) {
+        let num_groups = self.bit_packed_count / 8;
+        let indicator_byte = ((num_groups << 1) | 1) as u8;
+        self.bit_writer
+            .put_aligned_offset(indicator_byte, 1, self.indicator_byte_pos as 
usize);
+        self.indicator_byte_pos = -1;
+        self.bit_packed_count = 0;
+    }
+
     fn flush_buffered_values(&mut self) {
         if self.repeat_count >= 8 {
+            // Clear buffered values as they are not needed
             self.num_buffered_values = 0;
             if self.bit_packed_count > 0 {
-                // In this case we choose RLE encoding. Flush the current 
buffered values
-                // as bit-packed encoding.
+                // In this case we have chosen to switch to RLE encoding. 
Close out the
+                // previous bit-packed run.
                 debug_assert_eq!(self.bit_packed_count % 8, 0);
-                self.flush_bit_packed_run(true)
+                self.finish_bit_packed_run();
             }
             return;
         }

Reply via email to