tustvold commented on code in PR #2460:
URL: https://github.com/apache/arrow-rs/pull/2460#discussion_r948776505


##########
parquet/src/arrow/arrow_reader/selection.rs:
##########
@@ -119,6 +119,7 @@ impl RowSelection {
     }
 
     /// Given an offset index, return the offset ranges for all data pages 
selected by `self`
+    #[allow(unused)]

Review Comment:
   #[cfg(any(test, feature = "async"))]
   
   Or something



##########
parquet/src/file/serialized_reader.rs:
##########
@@ -648,20 +659,60 @@ impl<R: ChunkReader> PageReader for 
SerializedPageReader<R> {
     }
 
     fn peek_next_page(&mut self) -> Result<Option<PageMetadata>> {
-        match &self.state {
-            SerializedPageReaderState::Values {..} => Err(general_err!("Must 
set page_offset_index when using peek_next_page in SerializedPageReader.")),
-            SerializedPageReaderState::Pages { page_locations, 
dictionary_page, total_rows } => {
+        match &mut self.state {
+            SerializedPageReaderState::Values {
+                offset,
+                remaining_bytes,
+                next_page_header,
+            } => {
+                loop {
+                    if *remaining_bytes == 0 {
+                        return Ok(None);
+                    }
+                    return if let Some(header) = next_page_header.take() {

Review Comment:
   I think this probably shouldn't take but just be as_ref?



##########
parquet/src/file/serialized_reader.rs:
##########
@@ -484,13 +484,17 @@ pub(crate) fn decode_page(
     Ok(result)
 }
 
+#[allow(clippy::large_enum_variant)]
 enum SerializedPageReaderState {
     Values {
         /// The current byte offset in the reader
         offset: usize,
 
         /// The length of the chunk in bytes
         remaining_bytes: usize,
+
+        // If the next page header has already been "peeked", we will cache it 
and it`s length here
+        next_page_header: Option<PageHeader>,

Review Comment:
   ```suggestion
           next_page_header: Option<Box<PageHeader>>,
   ```
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to