This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 88b7fca230 ParquetMetaDataPushDecoder API to clear all buffered ranges 
(#9673)
88b7fca230 is described below

commit 88b7fca2304b07678d4543179946ddd032d31d45
Author: Nathan <[email protected]>
AuthorDate: Mon Apr 13 08:29:23 2026 -0400

    ParquetMetaDataPushDecoder API to clear all buffered ranges (#9673)
    
    This PR is a follow up for [this ticket
    ](https://github.com/apache/arrow-rs/issues/8676). Implement same API
    but for the metadata decoder.
    
    See also
    https://github.com/apache/arrow-rs/pull/9624#issuecomment-4195349910
    
    ## Rationale for this change
    `ParquetMetaDataPushDecoder` clears exact requested ranges, but larger
    speculative pushed ranges can remain buffered in `PushBuffers`. This
    adds a way for callers to explicitly release non exact ranges
    
    ## What changes are included in this PR?
    This adds `clear_all_ranges()`, which clears all byte ranges still
    staged in the decoder's internal `PushBuffers`
    ## Are these changes tested?
    yes
    ## Are there any user-facing changes?
    Yes, this adds a new public `clear_all_ranges()` API on
    `ParquetMetaDataPushDecoder`
---
 parquet/src/file/metadata/push_decoder.rs | 22 ++++++++++++++++++++++
 parquet/src/util/push_buffers.rs          |  1 -
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/parquet/src/file/metadata/push_decoder.rs 
b/parquet/src/file/metadata/push_decoder.rs
index e322525b71..7e4beb5ad9 100644
--- a/parquet/src/file/metadata/push_decoder.rs
+++ b/parquet/src/file/metadata/push_decoder.rs
@@ -358,6 +358,11 @@ impl ParquetMetaDataPushDecoder {
         Ok(())
     }
 
+    /// Clear any staged byte ranges currently buffered for future decode work.
+    pub fn clear_all_ranges(&mut self) {
+        self.buffers.clear_all_ranges();
+    }
+
     /// Try to decode the metadata from the pushed data, returning the
     /// decoded metadata or an error if not enough data is available.
     pub fn try_decode(&mut self) -> Result<DecodeResult<ParquetMetaData>> {
@@ -573,6 +578,23 @@ mod tests {
         assert!(metadata.offset_index().is_some());
     }
 
+    #[test]
+    fn test_metadata_decoder_clear_all_ranges() {
+        let file_len = test_file_len();
+        let mut metadata_decoder = 
ParquetMetaDataPushDecoder::try_new(file_len).unwrap();
+
+        metadata_decoder
+            .push_range(test_file_range(), TEST_FILE_DATA.clone())
+            .unwrap();
+        assert_eq!(metadata_decoder.buffers.buffered_bytes(), test_file_len());
+
+        metadata_decoder.clear_all_ranges();
+        assert_eq!(metadata_decoder.buffers.buffered_bytes(), 0);
+
+        let ranges = expect_needs_data(metadata_decoder.try_decode());
+        assert_eq!(ranges, vec![test_file_len() - 8..test_file_len()]);
+    }
+
     /// Decode the metadata incrementally, simulating a scenario where exactly 
the data needed
     /// is read in each step
     #[test]
diff --git a/parquet/src/util/push_buffers.rs b/parquet/src/util/push_buffers.rs
index eb4982fb3c..b8225ab3a1 100644
--- a/parquet/src/util/push_buffers.rs
+++ b/parquet/src/util/push_buffers.rs
@@ -156,7 +156,6 @@ impl PushBuffers {
     }
 
     /// Clear all buffered ranges and their corresponding data
-    #[cfg(feature = "arrow")]
     pub fn clear_all_ranges(&mut self) {
         self.ranges.clear();
         self.buffers.clear();

Reply via email to