alamb commented on code in PR #7906:
URL: https://github.com/apache/arrow-rs/pull/7906#discussion_r2201478740


##########
parquet-variant/src/variant/object.rs:
##########
@@ -242,6 +242,8 @@ impl<'m, 'v> VariantObject<'m, 'v> {
             } else {
                 // The metadata dictionary can't guarantee uniqueness or 
sortedness, so we have to parse out the corresponding field names
                 // to check lexicographical order
+                //
+                // Since we are probing the metadata dictionary by field id, 
we can also verify field ids are in-bounds

Review Comment:
   ```suggestion
                   // Since we are probing the metadata dictionary by field id, 
this also verifies field ids are in-bounds
   ```



##########
parquet-variant/src/variant/list.rs:
##########
@@ -216,28 +216,19 @@ impl<'m, 'v> VariantList<'m, 'v> {
                 self.header.first_offset_byte() as _..self.first_value_byte as 
_,
             )?;
 
-            let offsets =
-                map_bytes_to_offsets(offset_buffer, 
self.header.offset_size).collect::<Vec<_>>();
-
-            // Validate offsets are in-bounds and monotonically increasing.
-            // Since shallow verification checks whether the first and last 
offsets are in-bounds,
-            // we can also verify all offsets are in-bounds by checking if 
offsets are monotonically increasing.
-            let are_offsets_monotonic = offsets.is_sorted_by(|a, b| a < b);
-            if !are_offsets_monotonic {
-                return Err(ArrowError::InvalidArgumentError(
-                    "offsets are not monotonically increasing".to_string(),
-                ));
-            }
-
             let value_buffer = slice_from_slice(self.value, 
self.first_value_byte as _..)?;
 
             // Validate whether values are valid variant objects
-            for i in 1..offsets.len() {
-                let start_offset = offsets[i - 1];
-                let end_offset = offsets[i];
-
-                let value_bytes = slice_from_slice(value_buffer, 
start_offset..end_offset)?;
+            //
+            // Since we use offsets to slice into the value buffer, we can 
also verify all offsets are in-bounds

Review Comment:
   ```suggestion
               // Since we use offsets to slice into the value buffer, this 
also verify all offsets are in-bounds
   ```



##########
parquet-variant/src/variant/metadata.rs:
##########
@@ -237,22 +237,15 @@ impl<'m> VariantMetadata<'m> {
             let offsets =
                 map_bytes_to_offsets(offset_bytes, 
self.header.offset_size).collect::<Vec<_>>();
 
-            // Validate offsets are in-bounds and monotonically increasing.
-            // Since shallow validation ensures the first and last offsets are 
in bounds, we can also verify all offsets
-            // are in-bounds by checking if offsets are monotonically 
increasing.
-            let are_offsets_monotonic = offsets.is_sorted_by(|a, b| a < b);
-            if !are_offsets_monotonic {
-                return Err(ArrowError::InvalidArgumentError(
-                    "offsets not monotonically increasing".to_string(),
-                ));
-            }
-
             // Verify the string values in the dictionary are UTF-8 encoded 
strings.
             let value_buffer =
                 string_from_slice(self.bytes, 0, self.first_value_byte as 
_..self.bytes.len())?;
 
             if self.header.is_sorted {
                 // Validate the dictionary values are unique and 
lexicographically sorted
+                //
+                // Since we use the offsets to access dictionary values, we 
can also validate

Review Comment:
   ```suggestion
                   // Since we use the offsets to access dictionary values, 
this also validates
   ```



##########
parquet-variant/src/variant/list.rs:
##########
@@ -216,28 +216,19 @@ impl<'m, 'v> VariantList<'m, 'v> {
                 self.header.first_offset_byte() as _..self.first_value_byte as 
_,
             )?;
 
-            let offsets =
-                map_bytes_to_offsets(offset_buffer, 
self.header.offset_size).collect::<Vec<_>>();
-
-            // Validate offsets are in-bounds and monotonically increasing.
-            // Since shallow verification checks whether the first and last 
offsets are in-bounds,
-            // we can also verify all offsets are in-bounds by checking if 
offsets are monotonically increasing.
-            let are_offsets_monotonic = offsets.is_sorted_by(|a, b| a < b);
-            if !are_offsets_monotonic {
-                return Err(ArrowError::InvalidArgumentError(
-                    "offsets are not monotonically increasing".to_string(),
-                ));
-            }
-
             let value_buffer = slice_from_slice(self.value, 
self.first_value_byte as _..)?;
 
             // Validate whether values are valid variant objects
-            for i in 1..offsets.len() {
-                let start_offset = offsets[i - 1];
-                let end_offset = offsets[i];
-
-                let value_bytes = slice_from_slice(value_buffer, 
start_offset..end_offset)?;
+            //
+            // Since we use offsets to slice into the value buffer, we can 
also verify all offsets are in-bounds
+            // and monotonically increasing

Review Comment:
   I don't quite follow how this check ensures the offsets are monotonically 
increasing
   
   Is it because slice_from_slice requires the bounds to be increasing? 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to