wgtmac commented on code in PR #2406:
URL: https://github.com/apache/orc/pull/2406#discussion_r2358389192


##########
c++/src/Reader.cc:
##########
@@ -1138,13 +1171,37 @@ namespace orc {
 
     if (currentStripe_ < lastStripe_) {
       if (enableAsyncPrefetch_) {
-        // FIXME: this is very coarse since I/O ranges of all selected columns 
are about to
-        // prefetch. We can further evaluate index stream with knowledge of 
pruned row groups
-        // to issue less I/O ranges.
-        auto ranges = extractReadRangesForStripe(currentStripe_, 
currentStripeInfo_,
-                                                 currentStripeFooter_, 
selectedColumns_);
+        if (fullyCachedStripes_.find(currentStripe_) != 
fullyCachedStripes_.cend()) {
+          // Current stripe has been fully cached, do nothing.
+        } else if (isSmallStripe(currentStripeInfo_, 
contents_->cacheOptions.rangeSizeLimit)) {
+          std::vector<ReadRange> ranges;
+          uint64_t maxStripe =
+              std::min(lastStripe_, currentStripe_ + 
smallStripeLookAheadLimit_ + 1);
+          for (uint64_t stripe = currentStripe_; stripe < maxStripe; stripe++) 
{
+            const auto& stripeInfo = 
footer_->stripes(static_cast<int>(stripe));
+            if (!isSmallStripe(stripeInfo, 
contents_->cacheOptions.rangeSizeLimit)) {
+              break;
+            }
+            ranges.push_back(ReadRange{stripeInfo.offset(), 
getStripeSize(stripeInfo)});
+            fullyCachedStripes_.insert(stripe);
+          }
+          contents_->cacheRanges(std::move(ranges));
+        } else {
+          // This is very coarse since I/O ranges of all selected columns are 
about to prefetch.
+          // We can further evaluate index stream with knowledge of pruned row 
groups to issue
+          // less I/O ranges.
+          contents_->cacheRanges(extractReadRangesForStripe(
+              currentStripe_, currentStripeInfo_, currentStripeFooter_, 
selectedColumns_));
+          // Cache footer of next stripe to avoid blocking I/O.
+          if (currentStripe_ + 1 < lastStripe_) {
+            const auto& nextStripe = 
footer_->stripes(static_cast<int>(currentStripe_ + 1));
+            contents_->cacheRanges(std::vector<ReadRange>{ReadRange{
+                nextStripe.offset() + nextStripe.index_length() + 
nextStripe.data_length(),
+                nextStripe.footer_length()}});
+          }
+        }
+
         contents_->evictCache(currentStripeInfo_.offset());

Review Comment:
   Good point!



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to