alamb commented on code in PR #7752: URL: https://github.com/apache/arrow-rs/pull/7752#discussion_r2162581909
########## parquet-variant/src/variant/object.rs: ########## @@ -140,7 +151,11 @@ impl<'m, 'v> VariantObject<'m, 'v> { self.field_offsets_start_byte, i, )?; - let value_bytes = slice_from_slice(self.value, self.values_start_byte + start_offset..)?; + let value_start = self + .values_start_byte + .checked_add(start_offset) + .ok_or_else(|| ArrowError::InvalidArgumentError("Integer overflow".into()))?; Review Comment: ```suggestion .ok_or_else(|| ArrowError::InvalidArgumentError("Integer overflow computing value offset in variant field access".into()))?; ``` ########## parquet-variant/src/variant/object.rs: ########## @@ -72,36 +75,44 @@ impl<'m, 'v> VariantObject<'m, 'v> { let header_byte = first_byte_from_slice(value)?; let header = VariantObjectHeader::try_new(header_byte)?; - // Determine num_elements size based on is_large flag + // Determine num_elements size based on is_large flag and fetch the value let num_elements_size = if header.is_large { OffsetSizeBytes::Four } else { OffsetSizeBytes::One }; + let num_elements = num_elements_size.unpack_usize(value, NUM_HEADER_BYTES, 0)?; + + // Calculate byte offsets for different sections with overflow protection + let overflow = || ArrowError::InvalidArgumentError("Integer overflow".into()); Review Comment: ```suggestion let overflow = || ArrowError::InvalidArgumentError("Integer overflow decoding variant object header".into()); ``` ########## parquet-variant/src/variant/list.rs: ########## @@ -78,25 +81,16 @@ impl<'m, 'v> VariantList<'m, 'v> { false => OffsetSizeBytes::One, }; - // Skip the header byte to read the num_elements - let num_elements = num_elements_size.unpack_usize(value, 1, 0)?; - let first_offset_byte = 1 + num_elements_size as usize; - - let overflow = - || ArrowError::InvalidArgumentError("Variant value_byte_length overflow".into()); - - // 1. num_elements + 1 - let n_offsets = num_elements.checked_add(1).ok_or_else(overflow)?; - - // 2. (num_elements + 1) * offset_size - let value_bytes = n_offsets - .checked_mul(header.offset_size as usize) - .ok_or_else(overflow)?; + // Skip the header byte to read the num_elements; the offset array immediately follows + let num_elements = num_elements_size.unpack_usize(value, NUM_HEADER_BYTES, 0)?; + let first_offset_byte = NUM_HEADER_BYTES + num_elements_size as usize; - // 3. first_offset_byte + ... - let first_value_byte = first_offset_byte - .checked_add(value_bytes) - .ok_or_else(overflow)?; + // (num_elements + 1) * offset_size + first_offset_byte + let first_value_byte = num_elements + .checked_add(1) + .and_then(|n| n.checked_mul(header.offset_size as usize)) + .and_then(|n| n.checked_add(first_offset_byte)) + .ok_or_else(|| ArrowError::InvalidArgumentError("Integer overflow".into()))?; Review Comment: Adding some context might help here: ```suggestion .ok_or_else(|| ArrowError::InvalidArgumentError("Integer overflow decoding Variant array header".into()))?; ``` ########## parquet-variant/src/variant/list.rs: ########## @@ -15,11 +15,14 @@ // specific language governing permissions and limitations // under the License. use crate::decoder::OffsetSizeBytes; -use crate::utils::{first_byte_from_slice, slice_from_slice, validate_fallible_iterator}; +use crate::utils::{first_byte_from_slice, slice_from_slice_at_offset, validate_fallible_iterator}; use crate::variant::{Variant, VariantMetadata}; use arrow_schema::ArrowError; +// The value header occupies one byte; use a named constant for readability Review Comment: 👍 -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org