tustvold commented on code in PR #1995:
URL: https://github.com/apache/arrow-rs/pull/1995#discussion_r912420202
##########
parquet/src/column/reader.rs:
##########
@@ -205,94 +197,74 @@ where
// Read exhaustively all pages until we read all batch_size
values/levels
// or there are no more values/levels to read.
- while max(values_read, levels_read) < batch_size {
+ while levels_read < batch_size {
if !self.has_next()? {
break;
}
// Batch size for the current iteration
- let iter_batch_size = {
- // Compute approximate value based on values decoded so far
- let mut adjusted_size = min(
- batch_size,
- (self.num_buffered_values - self.num_decoded_values) as
usize,
- );
-
- // Adjust batch size by taking into account how much data there
- // to read. As batch_size is also smaller than value and level
- // slices (if available), this ensures that available space is
not
- // exceeded.
- adjusted_size = min(adjusted_size, batch_size - values_read);
- adjusted_size = min(adjusted_size, batch_size - levels_read);
-
- adjusted_size
- };
+ let iter_batch_size = (batch_size - levels_read)
+ .min((self.num_buffered_values - self.num_decoded_values) as
usize);
// If the field is required and non-repeated, there are no
definition levels
- let (num_def_levels, null_count) = match def_levels.as_mut() {
- Some(levels) if self.descr.max_def_level() > 0 => {
+ let null_count = match self.descr.max_def_level() > 0 {
+ true => {
+ let levels = def_levels
+ .as_mut()
+ .ok_or_else(|| general_err!("must specify definition
levels"))?;
+
let num_def_levels = self
.def_level_decoder
.as_mut()
.expect("def_level_decoder be set")
- .read(*levels, levels_read..levels_read +
iter_batch_size)?;
+ .read(levels, levels_read..levels_read +
iter_batch_size)?;
+
+ if num_def_levels != iter_batch_size {
+ return Err(general_err!("insufficient definition
levels read from column - expected {}, got {}", iter_batch_size,
num_def_levels));
Review Comment:
This, and similar checks below, will prevent #1997
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]