This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new ade038153a No longer allow BIT_PACKED level encoding in Parquet writer 
(#9656)
ade038153a is described below

commit ade038153a66464c56e21e350085bfcf950be09f
Author: Ed Seidl <[email protected]>
AuthorDate: Fri Apr 3 13:55:38 2026 -0700

    No longer allow BIT_PACKED level encoding in Parquet writer (#9656)
    
    # Which issue does this PR close?
    
    - Closes #9635.
    
    # Rationale for this change
    The `BIT_PACKED` encoding for repetition and definition levels has long
    been deprecated. Remove the possibility of using it.
    
    # What changes are included in this PR?
    
    <!--
    There is no need to duplicate the description in the issue here but it
    is sometimes worth providing a summary of the individual changes in this
    PR.
    -->
    
    # Are these changes tested?
    
    Should be covered by existing tests
    
    # Are there any user-facing changes?
    
    No, only changes to API marked "experimental"
---
 parquet/src/column/writer/mod.rs          |  2 +-
 parquet/src/encodings/levels.rs           | 34 +++++++------------------------
 parquet/src/util/test_common/page_util.rs |  2 +-
 3 files changed, 9 insertions(+), 29 deletions(-)

diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs
index cdf489f3b6..46f90d3f77 100644
--- a/parquet/src/column/writer/mod.rs
+++ b/parquet/src/column/writer/mod.rs
@@ -650,7 +650,7 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
     /// Creates a new streaming level encoder appropriate for the writer 
version.
     fn create_level_encoder(max_level: i16, props: &WriterProperties) -> 
LevelEncoder {
         match props.writer_version() {
-            WriterVersion::PARQUET_1_0 => 
LevelEncoder::v1_streaming(Encoding::RLE, max_level),
+            WriterVersion::PARQUET_1_0 => 
LevelEncoder::v1_streaming(max_level),
             WriterVersion::PARQUET_2_0 => 
LevelEncoder::v2_streaming(max_level),
         }
     }
diff --git a/parquet/src/encodings/levels.rs b/parquet/src/encodings/levels.rs
index b761a5ac5d..5d85e240cb 100644
--- a/parquet/src/encodings/levels.rs
+++ b/parquet/src/encodings/levels.rs
@@ -19,43 +19,32 @@ use std::mem;
 
 use super::rle::RleEncoder;
 
-use crate::basic::Encoding;
 use crate::data_type::AsBytes;
-use crate::util::bit_util::{BitWriter, num_required_bits};
+use crate::util::bit_util::num_required_bits;
 
 /// Encoder for definition/repetition levels.
 /// Currently only supports Rle and BitPacked (dev/null) encoding, including 
v2.
 pub enum LevelEncoder {
     Rle(RleEncoder),
     RleV2(RleEncoder),
-    BitPacked(u8, BitWriter),
 }
 
 impl LevelEncoder {
     /// Creates a new streaming level encoder for Data Page v1.
     ///
-    /// Unlike [`v1`](Self::v1), this does not require knowing the number of 
values
+    /// This does not require knowing the number of values
     /// upfront, making it suitable for incremental encoding where levels are 
fed in
     /// as they arrive via [`put`](Self::put).
-    pub fn v1_streaming(encoding: Encoding, max_level: i16) -> Self {
+    pub fn v1_streaming(max_level: i16) -> Self {
         let bit_width = num_required_bits(max_level as u64);
-        match encoding {
-            Encoding::RLE => {
-                // Reserve space for length header
-                let buffer = vec![0u8; 4];
-                LevelEncoder::Rle(RleEncoder::new_from_buf(bit_width, buffer))
-            }
-            #[allow(deprecated)]
-            Encoding::BIT_PACKED => {
-                LevelEncoder::BitPacked(bit_width, 
BitWriter::new_from_buf(Vec::new()))
-            }
-            _ => panic!("Unsupported encoding type {encoding}"),
-        }
+        // Reserve space for length header
+        let buffer = vec![0u8; 4];
+        LevelEncoder::Rle(RleEncoder::new_from_buf(bit_width, buffer))
     }
 
     /// Creates a new streaming RLE level encoder for Data Page v2.
     ///
-    /// Unlike [`v2`](Self::v2), this does not require knowing the number of 
values
+    /// This does not require knowing the number of values
     /// upfront, making it suitable for incremental encoding where levels are 
fed in
     /// as they arrive via [`put`](Self::put).
     pub fn v2_streaming(max_level: i16) -> Self {
@@ -80,12 +69,6 @@ impl LevelEncoder {
                     num_encoded += 1;
                 }
             }
-            LevelEncoder::BitPacked(bit_width, ref mut encoder) => {
-                for value in buffer {
-                    encoder.put_value(*value as u64, bit_width as usize);
-                    num_encoded += 1;
-                }
-            }
         }
         num_encoded
     }
@@ -106,7 +89,6 @@ impl LevelEncoder {
                 encoded_data
             }
             LevelEncoder::RleV2(encoder) => encoder.consume(),
-            LevelEncoder::BitPacked(_, encoder) => encoder.consume(),
         }
     }
 
@@ -126,7 +108,6 @@ impl LevelEncoder {
                 f(data)
             }
             LevelEncoder::RleV2(encoder) => f(encoder.flush_buffer()),
-            LevelEncoder::BitPacked(_, encoder) => f(encoder.flush_buffer()),
         };
         match self {
             LevelEncoder::Rle(encoder) => {
@@ -135,7 +116,6 @@ impl LevelEncoder {
                 encoder.skip(mem::size_of::<i32>());
             }
             LevelEncoder::RleV2(encoder) => encoder.clear(),
-            LevelEncoder::BitPacked(_, encoder) => encoder.clear(),
         }
         result
     }
diff --git a/parquet/src/util/test_common/page_util.rs 
b/parquet/src/util/test_common/page_util.rs
index 6a99beaea1..7797427872 100644
--- a/parquet/src/util/test_common/page_util.rs
+++ b/parquet/src/util/test_common/page_util.rs
@@ -75,7 +75,7 @@ impl DataPageBuilderImpl {
         if max_level <= 0 {
             return 0;
         }
-        let mut level_encoder = LevelEncoder::v1_streaming(Encoding::RLE, 
max_level);
+        let mut level_encoder = LevelEncoder::v1_streaming(max_level);
         level_encoder.put(levels);
         let encoded_levels = level_encoder.consume();
         // Actual encoded bytes (without length offset)

Reply via email to