coderex2522 commented on code in PR #1087: URL: https://github.com/apache/orc/pull/1087#discussion_r851590537
########## c++/src/Reader.cc: ########## @@ -1186,41 +1186,46 @@ namespace orc { uint64_t currentRowInStripe, uint64_t rowsInCurrentStripe, uint64_t rowIndexStride, - const std::vector<bool>& includedRowGroups) { + const std::vector<uint64_t>& nextSkippedRows) { // In case of PPD, batch size should be aware of row group boundaries. If only a subset of row // groups are selected then marker position is set to the end of range (subset of row groups // within stripe). uint64_t endRowInStripe = rowsInCurrentStripe; - if (!includedRowGroups.empty()) { - endRowInStripe = currentRowInStripe; - uint32_t rg = static_cast<uint32_t>(currentRowInStripe / rowIndexStride); - for (; rg < includedRowGroups.size(); ++rg) { - if (!includedRowGroups[rg]) { - break; - } else { - endRowInStripe = std::min(rowsInCurrentStripe, (rg + 1) * rowIndexStride); - } - } + uint64_t groupsInStripe = nextSkippedRows.size(); + if (groupsInStripe > 0) { + auto rg = static_cast<uint32_t>(currentRowInStripe / rowIndexStride); + if (rg >= groupsInStripe) return 0; + uint64_t nextSkippedRow = nextSkippedRows[rg]; + if (nextSkippedRow == 0) return 0; + endRowInStripe = nextSkippedRow; } return std::min(requestedSize, endRowInStripe - currentRowInStripe); } uint64_t RowReaderImpl::advanceToNextRowGroup(uint64_t currentRowInStripe, uint64_t rowsInCurrentStripe, uint64_t rowIndexStride, - const std::vector<bool>& includedRowGroups) { - if (!includedRowGroups.empty()) { - uint32_t rg = static_cast<uint32_t>(currentRowInStripe / rowIndexStride); - for (; rg < includedRowGroups.size(); ++rg) { - if (includedRowGroups[rg]) { - return currentRowInStripe; - } else { - // advance to start of next row group - currentRowInStripe = (rg + 1) * rowIndexStride; - } - } - } - return std::min(currentRowInStripe, rowsInCurrentStripe); + const std::vector<uint64_t>& nextSkippedRows) { + auto groupsInStripe = nextSkippedRows.size(); + if (groupsInStripe == 0) { + // No PPD, keeps using the current row in stripe + return std::min(currentRowInStripe, rowsInCurrentStripe); + } + auto rg = static_cast<uint32_t>(currentRowInStripe / rowIndexStride); + if (rg >= groupsInStripe) { + // Points to the end of the stripe + return rowsInCurrentStripe; + } + if (nextSkippedRows[rg] != 0) { + // Current row group is selected + return currentRowInStripe; + } Review Comment: Does Line 1219-1221 have redundancy with the following logic(Line 1224-1227)? It seems that deleting Line 1219-1221 does not affect the results. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@orc.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org