This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/main by this push:
     new 2be261b78b Deprecate old Parquet page index parsing functions (#7640)
2be261b78b is described below

commit 2be261b78b16a4aa7b5b9aece648bec663c0dbf1
Author: Ed Seidl <[email protected]>
AuthorDate: Tue Jun 10 21:24:19 2025 -0700

    Deprecate old Parquet page index parsing functions (#7640)
    
    # Which issue does this PR close?
    
    - Closes #6447.
    
    # Rationale for this change
    
    This deprecates the last of the old standalone Parquet metadata parsing
    functions that have since been replaced by `ParquetMetaDataReader`.
    
    # What changes are included in this PR?
    
    # Are there any user-facing changes?
    
    No, only adds deprecation warnings to public API
---
 parquet/src/arrow/arrow_writer/mod.rs       | 10 ++++++----
 parquet/src/file/page_index/index_reader.rs |  8 ++++++++
 parquet/src/file/serialized_reader.rs       |  2 ++
 3 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/parquet/src/arrow/arrow_writer/mod.rs 
b/parquet/src/arrow/arrow_writer/mod.rs
index fbc32b0c4b..147c553443 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -1345,7 +1345,6 @@ mod tests {
     use crate::data_type::AsBytes;
     use crate::file::metadata::ParquetMetaData;
     use crate::file::page_index::index::Index;
-    use crate::file::page_index::index_reader::read_offset_indexes;
     use crate::file::properties::{
         BloomFilterPosition, EnabledStatistics, ReaderProperties, 
WriterVersion,
     };
@@ -2026,7 +2025,9 @@ mod tests {
         writer.write(&batch).unwrap();
         writer.close().unwrap();
 
-        let reader = 
SerializedFileReader::new(file.try_clone().unwrap()).unwrap();
+        let options = ReadOptionsBuilder::new().with_page_index().build();
+        let reader =
+            SerializedFileReader::new_with_options(file.try_clone().unwrap(), 
options).unwrap();
 
         let column = reader.metadata().row_group(0).columns();
 
@@ -2039,7 +2040,8 @@ mod tests {
             "Expected a dictionary page"
         );
 
-        let offset_indexes = read_offset_indexes(&file, 
column).unwrap().unwrap();
+        assert!(reader.metadata().offset_index().is_some());
+        let offset_indexes = &reader.metadata().offset_index().unwrap()[0];
 
         let page_locations = offset_indexes[0].page_locations.clone();
 
@@ -2048,7 +2050,7 @@ mod tests {
         assert_eq!(
             page_locations.len(),
             10,
-            "Expected 9 pages but got {page_locations:#?}"
+            "Expected 10 pages but got {page_locations:#?}"
         );
     }
 
diff --git a/parquet/src/file/page_index/index_reader.rs 
b/parquet/src/file/page_index/index_reader.rs
index c472ceb291..368ede8b40 100644
--- a/parquet/src/file/page_index/index_reader.rs
+++ b/parquet/src/file/page_index/index_reader.rs
@@ -48,6 +48,10 @@ pub(crate) fn acc_range(a: Option<Range<u64>>, b: 
Option<Range<u64>>) -> Option<
 /// See [Page Index Documentation] for more details.
 ///
 /// [Page Index Documentation]: 
https://github.com/apache/parquet-format/blob/master/PageIndex.md
+#[deprecated(
+    since = "55.2.0",
+    note = "Use ParquetMetaDataReader instead; will be removed in 58.0.0"
+)]
 pub fn read_columns_indexes<R: ChunkReader>(
     reader: &R,
     chunks: &[ColumnChunkMetaData],
@@ -128,6 +132,10 @@ pub fn read_pages_locations<R: ChunkReader>(
 /// See [Page Index Documentation] for more details.
 ///
 /// [Page Index Documentation]: 
https://github.com/apache/parquet-format/blob/master/PageIndex.md
+#[deprecated(
+    since = "55.2.0",
+    note = "Use ParquetMetaDataReader instead; will be removed in 58.0.0"
+)]
 pub fn read_offset_indexes<R: ChunkReader>(
     reader: &R,
     chunks: &[ColumnChunkMetaData],
diff --git a/parquet/src/file/serialized_reader.rs 
b/parquet/src/file/serialized_reader.rs
index 5d50a8c49d..ac43381ae8 100644
--- a/parquet/src/file/serialized_reader.rs
+++ b/parquet/src/file/serialized_reader.rs
@@ -1108,6 +1108,7 @@ mod tests {
     use crate::data_type::private::ParquetValueType;
     use crate::data_type::{AsBytes, FixedLenByteArrayType, Int32Type};
     use crate::file::page_index::index::{Index, NativeIndex};
+    #[allow(deprecated)]
     use crate::file::page_index::index_reader::{read_columns_indexes, 
read_offset_indexes};
     use crate::file::writer::SerializedFileWriter;
     use crate::record::RowAccessor;
@@ -1940,6 +1941,7 @@ mod tests {
     }
 
     #[test]
+    #[allow(deprecated)]
     fn test_page_index_reader_out_of_order() {
         let test_file = get_test_file("alltypes_tiny_pages_plain.parquet");
         let options = ReadOptionsBuilder::new().with_page_index().build();

Reply via email to