emkornfield commented on code in PR #14142: URL: https://github.com/apache/arrow/pull/14142#discussion_r997720489
########## cpp/src/parquet/column_reader.cc: ########## @@ -1401,16 +1408,50 @@ class TypedRecordReader : public TypedColumnReaderImpl<DType>, return skipped_records; } - // Skip records for repeated fields. Returns number of skipped records. + // Attempts to skip num_records from the buffer. Will throw away levels + // and corresponding values for the records it skipped and consumes them from the + // underlying decoder. Will advance levels_position_ and update + // at_record_start_. + // Returns how many records were skipped. + int64_t DelimitAndSkipRecordsInBuffer(int64_t num_records) { + if (num_records == 0) return 0; + // Look at the buffered levels, delimit them based on + // (rep_level == 0), report back how many records are in there, and + // fill in how many not-null values (def_level == max_def_level_). + // DelimitRecords updates levels_position_. + int64_t start_levels_position = levels_position_; + int64_t values_seen = 0; + int64_t skipped_records = DelimitRecords(num_records, &values_seen); + if (ReadAndThrowAwayValues(values_seen) != values_seen) { + throw ParquetException("Could not read and throw away requested values"); Review Comment: adding details on values read/values and position might be useful when debugging issues. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org