alamb commented on code in PR #8340:
URL: https://github.com/apache/arrow-rs/pull/8340#discussion_r2365583592
##########
parquet/src/file/metadata/push_decoder.rs:
##########
@@ -261,68 +269,152 @@ impl ParquetMetaDataPushDecoder {
&mut self,
ranges: Vec<std::ops::Range<u64>>,
buffers: Vec<bytes::Bytes>,
- ) -> std::result::Result<(), String> {
- if self.done {
- return Err(
+ ) -> Result<(), ParquetError> {
+ if matches!(&self.state, DecodeState::Finished) {
+ return Err(general_err!(
"ParquetMetaDataPushDecoder: cannot push data after decoding
is finished"
- .to_string(),
- );
+ ));
}
self.buffers.push_ranges(ranges, buffers);
Ok(())
}
/// Try to decode the metadata from the pushed data, returning the
/// decoded metadata or an error if not enough data is available.
- pub fn try_decode(
- &mut self,
- ) -> std::result::Result<DecodeResult<ParquetMetaData>, ParquetError> {
- if self.done {
- return Ok(DecodeResult::Finished);
- }
-
- // need to have the last 8 bytes of the file to decode the metadata
+ pub fn try_decode(&mut self) -> Result<DecodeResult<ParquetMetaData>,
ParquetError> {
let file_len = self.buffers.file_len();
- if !self.buffers.has_range(&(file_len - 8..file_len)) {
- #[expect(clippy::single_range_in_vec_init)]
- return Ok(DecodeResult::NeedsData(vec![file_len - 8..file_len]));
+ let footer_len = FOOTER_SIZE as u64;
+ loop {
+ match std::mem::replace(&mut self.state,
DecodeState::Intermediate) {
Review Comment:
Here is the core state machine that makes it very clear, in my mind, what is
happening.
I am quite pleased with how this decoder state machine is looking
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]