adamreeve commented on code in PR #6637: URL: https://github.com/apache/arrow-rs/pull/6637#discussion_r1983975973
########## parquet/src/file/metadata/reader.rs: ########## @@ -578,56 +644,163 @@ impl ParquetMetaDataReader { if length > suffix_len - FOOTER_SIZE { let metadata_start = file_size - length - FOOTER_SIZE; let meta = fetch.fetch(metadata_start..file_size - FOOTER_SIZE).await?; - Ok((Self::decode_metadata(&meta)?, None)) + Ok(( + Self::decode_metadata( + &meta, + // footer.is_encrypted_footer(), + // #[cfg(feature = "encryption")] + // file_decryption_properties, + )?, + None, + )) } else { let metadata_start = file_size - length - FOOTER_SIZE - footer_start; let slice = &suffix[metadata_start..suffix_len - FOOTER_SIZE]; Ok(( - Self::decode_metadata(slice)?, + Self::decode_metadata( + slice, + // footer.is_encrypted_footer(), + // #[cfg(feature = "encryption")] + // file_decryption_properties, + )?, Some((footer_start, suffix.slice(..metadata_start))), )) } } - /// Decodes the Parquet footer returning the metadata length in bytes + /// Decodes the end of the Parquet footer /// - /// A parquet footer is 8 bytes long and has the following layout: + /// There are 8 bytes at the end of the Parquet footer with the following layout: /// * 4 bytes for the metadata length - /// * 4 bytes for the magic bytes 'PAR1' + /// * 4 bytes for the magic bytes 'PAR1' or 'PARE' (encrypted footer) /// /// ```text - /// +-----+--------+ - /// | len | 'PAR1' | - /// +-----+--------+ + /// +-----+------------------+ + /// | len | 'PAR1' or 'PARE' | + /// +-----+------------------+ /// ``` - pub fn decode_footer(slice: &[u8; FOOTER_SIZE]) -> Result<usize> { - // check this is indeed a parquet file - if slice[4..] != PARQUET_MAGIC { + pub fn decode_footer_tail(slice: &[u8; FOOTER_SIZE]) -> Result<FooterTail> { + let magic = &slice[4..]; + let encrypted_footer = if magic == PARQUET_MAGIC_ENCR_FOOTER { + true + } else if magic == PARQUET_MAGIC { + false + } else { return Err(general_err!("Invalid Parquet file. Corrupt footer")); - } - + }; // get the metadata length from the footer let metadata_len = u32::from_le_bytes(slice[..4].try_into().unwrap()); - // u32 won't be larger than usize in most cases - Ok(metadata_len as usize) + Ok(FooterTail { + // u32 won't be larger than usize in most cases + metadata_length: metadata_len as usize, + encrypted_footer, + }) + } + + /// Decodes the Parquet footer, returning the metadata length in bytes + #[deprecated(note = "use decode_footer_tail instead")] + pub fn decode_footer(slice: &[u8; FOOTER_SIZE]) -> Result<usize> { + Self::decode_footer_tail(slice).map(|f| f.metadata_length) + } + + pub fn decrypt_metadata( Review Comment: Hmm it doesn't look like it should need to be public, although adding some docs could still be helpful for developers. We should probably review which parts of the API need to be public. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org