(hbase) 01/01: HBASE-29103 Avoid excessive allocations during reverse scanning when seeking to next row (#6643)

rmattingly Wed, 19 Mar 2025 15:21:22 -0700

This is an automated email from the ASF dual-hosted git repository.

rmattingly pushed a commit to branch HBASE-29103-branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


commit 4c7347939a106fc311f4553079230d1b7ac4b273
Author: jbewing <[email protected]>
AuthorDate: Wed Mar 19 18:17:44 2025 -0400

    HBASE-29103 Avoid excessive allocations during reverse scanning when 
seeking to next row (#6643)
    
    Signed-off-by: Nick Dimiduk <[email protected]>
    Signed-off-by: Ray Mattingly <[email protected]>
---
 .../hbase/regionserver/StoreFileScanner.java       | 40 ++++++++++++----------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java
index 6e0824a16c6..6ce1a3236c4 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileScanner.java
@@ -63,7 +63,10 @@ public class StoreFileScanner implements KeyValueScanner {
   // if have encountered the next row. Only used for reversed scan
   private boolean stopSkippingKVsIfNextRow = false;
   // A Cell that represents the row before the most previously seeked to row 
in seekToPreviousRow
-  private Cell previousRow = null;
+  // Note: Oftentimes this will contain an instance of a KeyOnly 
implementation of the Cell as it's
+  // not returned to callers and only used as a hint for seeking (so we can 
save on
+  // memory/allocations)
+  private ExtendedCell previousRow = null;
   // Whether the underlying HFile is using a data block encoding that has 
lower cost for seeking to
   // a row from the beginning of a block (i.e. RIV1). If the data block 
encoding has a high cost for
   // seeks, then we can use a modified reverse scanning algorithm to reduce 
seeks from the beginning
@@ -519,13 +522,14 @@ public class StoreFileScanner implements KeyValueScanner {
   }
 
   /**
-   * This variant of the {@link StoreFileScanner#seekToPreviousRow(Cell)} 
method requires one seek
-   * and one reseek. This method maintains state in {@link 
StoreFileScanner#previousRow} which only
-   * makes sense in the context of a sequential row-by-row reverse scan.
+   * This variant of the {@link 
StoreFileScanner#seekToPreviousRow(ExtendedCell)} method requires
+   * one seek and one reseek. This method maintains state in {@link 
StoreFileScanner#previousRow}
+   * which only makes sense in the context of a sequential row-by-row reverse 
scan.
    * {@link StoreFileScanner#previousRow} should be reset if that is not the 
case. The reasoning for
-   * why this method is faster than {@link 
StoreFileScanner#seekToPreviousRowStateless(Cell)} is
-   * that seeks are slower as they need to start from the beginning of the 
file, while reseeks go
-   * forward from the current position.
+   * why this method is faster than
+   * {@link StoreFileScanner#seekToPreviousRowStateless(ExtendedCell)} is that 
seeks are slower as
+   * they need to start from the beginning of the file, while reseeks go 
forward from the current
+   * position.
    */
   private boolean seekToPreviousRowWithHint() throws IOException {
     do {
@@ -555,12 +559,12 @@ public class StoreFileScanner implements KeyValueScanner {
   }
 
   /**
-   * This variant of the {@link StoreFileScanner#seekToPreviousRow(Cell)} 
method requires two seeks
-   * and one reseek. The extra expense/seek is with the intent of speeding up 
subsequent calls by
-   * using the {@link StoreFileScanner#seekToPreviousRowWithHint} which this 
method seeds the state
-   * for by setting {@link StoreFileScanner#previousRow}
+   * This variant of the {@link 
StoreFileScanner#seekToPreviousRow(ExtendedCell)} method requires
+   * two seeks and one reseek. The extra expense/seek is with the intent of 
speeding up subsequent
+   * calls by using the {@link StoreFileScanner#seekToPreviousRowWithHint} 
which this method seeds
+   * the state for by setting {@link StoreFileScanner#previousRow}
    */
-  private boolean seekToPreviousRowWithoutHint(Cell originalKey) throws 
IOException {
+  private boolean seekToPreviousRowWithoutHint(ExtendedCell originalKey) 
throws IOException {
     // Rewind to the cell before the beginning of this row
     ExtendedCell keyAtBeginningOfRow = 
PrivateCellUtil.createFirstOnRow(originalKey);
     if (!seekBefore(keyAtBeginningOfRow)) {
@@ -568,7 +572,7 @@ public class StoreFileScanner implements KeyValueScanner {
     }
 
     // Rewind before this row and save what we find as a seek hint
-    ExtendedCell firstKeyOfPreviousRow = 
PrivateCellUtil.createFirstOnRow(hfs.getCell());
+    ExtendedCell firstKeyOfPreviousRow = 
PrivateCellUtil.createFirstOnRow(hfs.getKey());
     seekBeforeAndSaveKeyToPreviousRow(firstKeyOfPreviousRow);
 
     // Seek back to the start of the previous row
@@ -596,9 +600,9 @@ public class StoreFileScanner implements KeyValueScanner {
   }
 
   /**
-   * This variant of the {@link StoreFileScanner#seekToPreviousRow(Cell)} 
method requires two seeks.
-   * It should be used if the cost for seeking is lower i.e. when using a fast 
seeking data block
-   * encoding like RIV1.
+   * This variant of the {@link 
StoreFileScanner#seekToPreviousRow(ExtendedCell)} method requires
+   * two seeks. It should be used if the cost for seeking is lower i.e. when 
using a fast seeking
+   * data block encoding like RIV1.
    */
   private boolean seekToPreviousRowStateless(ExtendedCell originalKey) throws 
IOException {
     ExtendedCell key = originalKey;
@@ -608,7 +612,7 @@ public class StoreFileScanner implements KeyValueScanner {
         return false;
       }
 
-      ExtendedCell firstKeyOfPreviousRow = 
PrivateCellUtil.createFirstOnRow(hfs.getCell());
+      ExtendedCell firstKeyOfPreviousRow = 
PrivateCellUtil.createFirstOnRow(hfs.getKey());
       if (!seekAtOrAfter(firstKeyOfPreviousRow)) {
         return false;
       }
@@ -651,7 +655,7 @@ public class StoreFileScanner implements KeyValueScanner {
       hfs.seekTo();
       this.previousRow = null;
     } else {
-      this.previousRow = hfs.getCell();
+      this.previousRow = hfs.getKey();
     }
   }

(hbase) 01/01: HBASE-29103 Avoid excessive allocations during reverse scanning when seeking to next row (#6643)

Reply via email to