bbeaudreault commented on code in PR #5373: URL: https://github.com/apache/hbase/pull/5373#discussion_r1386978831
########## hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java: ########## @@ -486,52 +500,184 @@ public boolean shouldUseScanner(Scan scan, HStore store, long oldestUnexpiredTS) @Override public boolean seekToPreviousRow(Cell originalKey) throws IOException { try { - try { - boolean keepSeeking = false; - Cell key = originalKey; - do { - Cell seekKey = PrivateCellUtil.createFirstOnRow(key); - if (seekCount != null) seekCount.increment(); - if (!hfs.seekBefore(seekKey)) { - this.cur = null; - return false; - } - Cell curCell = hfs.getCell(); - Cell firstKeyOfPreviousRow = PrivateCellUtil.createFirstOnRow(curCell); - - if (seekCount != null) seekCount.increment(); - if (!seekAtOrAfter(hfs, firstKeyOfPreviousRow)) { - this.cur = null; - return false; - } - - setCurrentCell(hfs.getCell()); - this.stopSkippingKVsIfNextRow = true; - boolean resultOfSkipKVs; - try { - resultOfSkipKVs = skipKVsNewerThanReadpoint(); - } finally { - this.stopSkippingKVsIfNextRow = false; - } - if (!resultOfSkipKVs || getComparator().compareRows(cur, firstKeyOfPreviousRow) > 0) { - keepSeeking = true; - key = firstKeyOfPreviousRow; - continue; - } else { - keepSeeking = false; - } - } while (keepSeeking); - return true; - } finally { - realSeekDone = true; + if (isFastSeekingEncoding) { + return seekToPreviousRowStateless(originalKey); + } else if (previousRow == null || getComparator().compareRows(previousRow, originalKey) > 0) { + return seekToPreviousRowWithoutHint(originalKey); + } else { + return seekToPreviousRowWithHint(); } } catch (FileNotFoundException e) { throw e; } catch (IOException ioe) { throw new IOException("Could not seekToPreviousRow " + this + " to key " + originalKey, ioe); + } finally { + this.realSeekDone = true; } } + /** + * This variant of the {@link StoreFileScanner#seekToPreviousRow(Cell)} method requires one seek + * and one reseek. This method maintains state in {@link StoreFileScanner#previousRow} which only + * makes sense in the context of a sequential row-by-row reverse scan. + * {@link StoreFileScanner#previousRow} should be reset if that is not the case. The reasoning for + * why this method is faster than {@link StoreFileScanner#seekToPreviousRowStateless(Cell)} is + * that seeks are slower as they need to start from the beginning of the file, while reseeks go + * forward from the current position. + */ + private boolean seekToPreviousRowWithHint() throws IOException { + do { + Cell firstKeyOfPreviousRow = PrivateCellUtil.createFirstOnRow(previousRow); + if (!seekBeforeAndSaveKeyToPreviousRow(firstKeyOfPreviousRow)) { + return false; + } + + if (!reseekAtOrAfter(firstKeyOfPreviousRow)) { + return false; + } + + if (isStillAtSeekTargetAfterSkippingNewerKvs(firstKeyOfPreviousRow)) { + return true; + } + + if (previousRow == null) { Review Comment: it probably would be useful to add a comment here as to why this might happen and why we handle it this way -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@hbase.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org