ffacs commented on code in PR #2406:
URL: https://github.com/apache/orc/pull/2406#discussion_r2358321226
##########
c++/src/Reader.cc:
##########
@@ -1138,13 +1171,37 @@ namespace orc {
if (currentStripe_ < lastStripe_) {
if (enableAsyncPrefetch_) {
- // FIXME: this is very coarse since I/O ranges of all selected columns
are about to
- // prefetch. We can further evaluate index stream with knowledge of
pruned row groups
- // to issue less I/O ranges.
- auto ranges = extractReadRangesForStripe(currentStripe_,
currentStripeInfo_,
- currentStripeFooter_,
selectedColumns_);
+ if (fullyCachedStripes_.find(currentStripe_) !=
fullyCachedStripes_.cend()) {
+ // Current stripe has been fully cached, do nothing.
+ } else if (isSmallStripe(currentStripeInfo_,
contents_->cacheOptions.rangeSizeLimit)) {
+ std::vector<ReadRange> ranges;
+ uint64_t maxStripe =
+ std::min(lastStripe_, currentStripe_ +
smallStripeLookAheadLimit_ + 1);
+ for (uint64_t stripe = currentStripe_; stripe < maxStripe; stripe++)
{
+ const auto& stripeInfo =
footer_->stripes(static_cast<int>(stripe));
+ if (!isSmallStripe(stripeInfo,
contents_->cacheOptions.rangeSizeLimit)) {
+ break;
+ }
+ ranges.push_back(ReadRange{stripeInfo.offset(),
getStripeSize(stripeInfo)});
+ fullyCachedStripes_.insert(stripe);
+ }
+ contents_->cacheRanges(std::move(ranges));
+ } else {
+ // This is very coarse since I/O ranges of all selected columns are
about to prefetch.
+ // We can further evaluate index stream with knowledge of pruned row
groups to issue
+ // less I/O ranges.
+ contents_->cacheRanges(extractReadRangesForStripe(
+ currentStripe_, currentStripeInfo_, currentStripeFooter_,
selectedColumns_));
+ // Cache footer of next stripe to avoid blocking I/O.
+ if (currentStripe_ + 1 < lastStripe_) {
+ const auto& nextStripe =
footer_->stripes(static_cast<int>(currentStripe_ + 1));
+ contents_->cacheRanges(std::vector<ReadRange>{ReadRange{
+ nextStripe.offset() + nextStripe.index_length() +
nextStripe.data_length(),
+ nextStripe.footer_length()}});
+ }
+ }
+
contents_->evictCache(currentStripeInfo_.offset());
Review Comment:
Maybe evictCache before prefetching would be better?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]