alamb commented on code in PR #7752:
URL: https://github.com/apache/arrow-rs/pull/7752#discussion_r2162581909
##########
parquet-variant/src/variant/object.rs:
##########
@@ -140,7 +151,11 @@ impl<'m, 'v> VariantObject<'m, 'v> {
self.field_offsets_start_byte,
i,
)?;
- let value_bytes = slice_from_slice(self.value, self.values_start_byte
+ start_offset..)?;
+ let value_start = self
+ .values_start_byte
+ .checked_add(start_offset)
+ .ok_or_else(|| ArrowError::InvalidArgumentError("Integer
overflow".into()))?;
Review Comment:
```suggestion
.ok_or_else(|| ArrowError::InvalidArgumentError("Integer
overflow computing value offset in variant field access".into()))?;
```
##########
parquet-variant/src/variant/object.rs:
##########
@@ -72,36 +75,44 @@ impl<'m, 'v> VariantObject<'m, 'v> {
let header_byte = first_byte_from_slice(value)?;
let header = VariantObjectHeader::try_new(header_byte)?;
- // Determine num_elements size based on is_large flag
+ // Determine num_elements size based on is_large flag and fetch the
value
let num_elements_size = if header.is_large {
OffsetSizeBytes::Four
} else {
OffsetSizeBytes::One
};
+ let num_elements = num_elements_size.unpack_usize(value,
NUM_HEADER_BYTES, 0)?;
+
+ // Calculate byte offsets for different sections with overflow
protection
+ let overflow = || ArrowError::InvalidArgumentError("Integer
overflow".into());
Review Comment:
```suggestion
let overflow = || ArrowError::InvalidArgumentError("Integer overflow
decoding variant object header".into());
```
##########
parquet-variant/src/variant/list.rs:
##########
@@ -78,25 +81,16 @@ impl<'m, 'v> VariantList<'m, 'v> {
false => OffsetSizeBytes::One,
};
- // Skip the header byte to read the num_elements
- let num_elements = num_elements_size.unpack_usize(value, 1, 0)?;
- let first_offset_byte = 1 + num_elements_size as usize;
-
- let overflow =
- || ArrowError::InvalidArgumentError("Variant value_byte_length
overflow".into());
-
- // 1. num_elements + 1
- let n_offsets = num_elements.checked_add(1).ok_or_else(overflow)?;
-
- // 2. (num_elements + 1) * offset_size
- let value_bytes = n_offsets
- .checked_mul(header.offset_size as usize)
- .ok_or_else(overflow)?;
+ // Skip the header byte to read the num_elements; the offset array
immediately follows
+ let num_elements = num_elements_size.unpack_usize(value,
NUM_HEADER_BYTES, 0)?;
+ let first_offset_byte = NUM_HEADER_BYTES + num_elements_size as usize;
- // 3. first_offset_byte + ...
- let first_value_byte = first_offset_byte
- .checked_add(value_bytes)
- .ok_or_else(overflow)?;
+ // (num_elements + 1) * offset_size + first_offset_byte
+ let first_value_byte = num_elements
+ .checked_add(1)
+ .and_then(|n| n.checked_mul(header.offset_size as usize))
+ .and_then(|n| n.checked_add(first_offset_byte))
+ .ok_or_else(|| ArrowError::InvalidArgumentError("Integer
overflow".into()))?;
Review Comment:
Adding some context might help here:
```suggestion
.ok_or_else(|| ArrowError::InvalidArgumentError("Integer
overflow decoding Variant array header".into()))?;
```
##########
parquet-variant/src/variant/list.rs:
##########
@@ -15,11 +15,14 @@
// specific language governing permissions and limitations
// under the License.
use crate::decoder::OffsetSizeBytes;
-use crate::utils::{first_byte_from_slice, slice_from_slice,
validate_fallible_iterator};
+use crate::utils::{first_byte_from_slice, slice_from_slice_at_offset,
validate_fallible_iterator};
use crate::variant::{Variant, VariantMetadata};
use arrow_schema::ArrowError;
+// The value header occupies one byte; use a named constant for readability
Review Comment:
👍
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]