This is an automated email from the ASF dual-hosted git repository.
virajjasani pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2 by this push:
new 1814f60ea41 HBASE-29974: Persist filter hints across scan circuit
breaks (#8213) (#7882)
1814f60ea41 is described below
commit 1814f60ea4128387643d2b302367be25d116cd6f
Author: Shubham Roy <[email protected]>
AuthorDate: Sun May 10 23:44:15 2026 +0530
HBASE-29974: Persist filter hints across scan circuit breaks (#8213) (#7882)
Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]>
Generated-by: Claude Opus 4.6 (1M context) <[email protected]>
Signed-off-by: Viraj Jasani <[email protected]>
---
.../org/apache/hadoop/hbase/filter/Filter.java | 83 +++
.../org/apache/hadoop/hbase/filter/FilterBase.java | 22 +
.../apache/hadoop/hbase/filter/FilterWrapper.java | 20 +
.../hbase/regionserver/RegionScannerImpl.java | 102 +++-
.../querymatcher/UserScanQueryMatcher.java | 93 ++-
.../hbase/filter/TestFilterHintForRejectedRow.java | 646 +++++++++++++++++++++
.../querymatcher/TestUserScanQueryMatcher.java | 224 +++++++
7 files changed, 1187 insertions(+), 3 deletions(-)
diff --git
a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java
b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java
index 145d532fa7f..7b958ac8b5d 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/Filter.java
@@ -30,6 +30,10 @@ import org.apache.yetus.audience.InterfaceAudience;
* <li>{@link #reset()} : reset the filter state before filtering a new
row.</li>
* <li>{@link #filterAllRemaining()}: true means row scan is over; false means
keep going.</li>
* <li>{@link #filterRowKey(Cell)}: true means drop this row; false means
include.</li>
+ * <li>{@link #getHintForRejectedRow(Cell)}: if {@code filterRowKey} returned
true, optionally
+ * provide a seek hint to skip past the rejected row efficiently.</li>
+ * <li>{@link #getSkipHint(Cell)}: when a cell is structurally skipped
(time-range, column, or
+ * version gate) before {@code filterCell} is reached, optionally provide a
seek hint.</li>
* <li>{@link #filterCell(Cell)}: decides whether to include or exclude this
Cell. See
* {@link ReturnCode}.</li>
* <li>{@link #transformCell(Cell)}: if the Cell is included, let the filter
transform the Cell.
@@ -224,6 +228,85 @@ public abstract class Filter {
*/
abstract public Cell getNextCellHint(final Cell currentCell) throws
IOException;
+ /**
+ * Provides a seek hint to bypass row-by-row scanning after {@link
#filterRowKey(Cell)} rejects a
+ * row. When {@code filterRowKey} returns {@code true} the scan pipeline
would normally iterate
+ * through every remaining cell in the rejected row one-by-one (via {@code
nextRow()}) before
+ * moving on. If the filter can determine a better forward position — for
example, the next range
+ * boundary in a {@code MultiRowRangeFilter} — it should return that target
cell here, allowing
+ * the scanner to seek directly past the unwanted rows.
+ * <p>
+ * Contract:
+ * <ul>
+ * <li>Only called after {@link #filterRowKey(Cell)} has returned {@code
true} for the same
+ * {@code firstRowCell}.</li>
+ * <li>Implementations may use state that was set during {@link
#filterRowKey(Cell)} (e.g. an
+ * updated range pointer), but <strong>must not</strong> invoke {@link
#filterCell(Cell)} logic —
+ * the caller guarantees that {@code filterCell} has not been called for
this row.</li>
+ * <li>The returned {@link Cell}, if non-null, must be an
+ * {@link org.apache.hadoop.hbase.ExtendedCell} because filters are
evaluated on the server
+ * side.</li>
+ * <li>Returning {@code null} (the default) falls through to the existing
{@code nextRow()}
+ * behaviour, preserving full backward compatibility.</li>
+ * <li>For reversed scans ({@link
org.apache.hadoop.hbase.client.Scan#isReversed()}), the hint
+ * must point to a <em>smaller</em> row key (earlier in reverse-scan
direction). The scanner
+ * validates hint direction and falls back to {@code nextRow()} if the hint
does not advance in
+ * the scan direction.</li>
+ * <li><strong>Composite filter limitation:</strong> {@code FilterList},
{@code SkipFilter}, and
+ * {@code WhileMatchFilter} do not currently delegate this method to wrapped
sub-filters. Hints
+ * from filters used inside these wrappers will be silently ignored.</li>
+ * </ul>
+ * @param firstRowCell the first cell encountered in the rejected row;
contains the row key that
+ * was passed to {@code filterRowKey}
+ * @return a {@link Cell} representing the earliest position the scanner
should seek to, or
+ * {@code null} if this filter cannot provide a better position than
a sequential skip
+ * @throws IOException in case an I/O or filter-specific failure needs to be
signaled
+ * @see #filterRowKey(Cell)
+ */
+ public Cell getHintForRejectedRow(final Cell firstRowCell) throws
IOException {
+ return null;
+ }
+
+ /**
+ * Provides a seek hint for cells that are structurally skipped by the scan
pipeline
+ * <em>before</em> {@link #filterCell(Cell)} is ever reached. The pipeline
short-circuits on
+ * several criteria — time-range mismatch, column-set exclusion, and
version-limit exhaustion —
+ * and in each case the filter is bypassed entirely. When an implementation
can compute a
+ * meaningful forward position purely from the cell's coordinates (without
needing the
+ * {@code filterCell} call sequence), it should return that position here so
the scanner can seek
+ * ahead instead of advancing one cell at a time.
+ * <p>
+ * Contract:
+ * <ul>
+ * <li>May be called for cells that have <strong>never</strong> been passed
to
+ * {@link #filterCell(Cell)}.</li>
+ * <li>Implementations <strong>must not</strong> modify any filter state;
this method is treated
+ * as logically stateless. Only filters whose hint computation is based
solely on immutable
+ * configuration (e.g. a fixed column range or a fuzzy-row pattern) should
override this.</li>
+ * <li>The returned {@link Cell}, if non-null, must be an
+ * {@link org.apache.hadoop.hbase.ExtendedCell} because filters are
evaluated on the server
+ * side.</li>
+ * <li>Returning {@code null} (the default) falls through to the existing
structural skip/seek
+ * behaviour, preserving full backward compatibility.</li>
+ * <li>For reversed scans, the returned cell must have a <em>smaller</em>
row key (i.e., earlier
+ * in reverse-scan direction) than the {@code skippedCell}. Hints that do
not advance in the scan
+ * direction are silently ignored.</li>
+ * <li><strong>Composite filter limitation:</strong> {@code FilterList},
{@code SkipFilter}, and
+ * {@code WhileMatchFilter} do not currently delegate this method to wrapped
sub-filters. Hints
+ * from filters used inside these wrappers will be silently ignored.</li>
+ * </ul>
+ * @param skippedCell the cell that was rejected by the time-range, column,
or version gate before
+ * {@code filterCell} could be consulted
+ * @return a {@link Cell} representing the earliest position the scanner
should seek to, or
+ * {@code null} if this filter cannot provide a better position than
the structural hint
+ * @throws IOException in case an I/O or filter-specific failure needs to be
signaled
+ * @see #filterCell(Cell)
+ * @see #getNextCellHint(Cell)
+ */
+ public Cell getSkipHint(final Cell skippedCell) throws IOException {
+ return null;
+ }
+
/**
* Check that given column family is essential for filter to check row. Most
filters always return
* true here. But some could have more sophisticated logic which could
significantly reduce
diff --git
a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java
b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java
index 8c330d6aa9b..e2ca7444bd5 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/filter/FilterBase.java
@@ -112,6 +112,28 @@ public abstract class FilterBase extends Filter {
return null;
}
+ /**
+ * Filters that cannot provide a seek hint after row-key rejection can
inherit this no-op
+ * implementation. Subclasses whose row-key logic (e.g. a range pointer
advanced inside
+ * {@link #filterRowKey(Cell)}) makes a better seek target available should
override this.
+ * {@inheritDoc}
+ */
+ @Override
+ public Cell getHintForRejectedRow(Cell firstRowCell) throws IOException {
+ return null;
+ }
+
+ /**
+ * Filters that cannot provide a structural-skip seek hint can inherit this
no-op implementation.
+ * Subclasses with purely configuration-driven, stateless hint computation
(e.g. a fixed column
+ * range or fuzzy-row pattern) may override this to avoid cell-by-cell
advancement when the
+ * time-range, column, or version gate fires. {@inheritDoc}
+ */
+ @Override
+ public Cell getSkipHint(Cell skippedCell) throws IOException {
+ return null;
+ }
+
/**
* By default, we require all scan's column families to be present. Our
subclasses may be more
* precise. {@inheritDoc}
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FilterWrapper.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FilterWrapper.java
index c31fc25ec82..77494b6cd2e 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FilterWrapper.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/filter/FilterWrapper.java
@@ -93,6 +93,26 @@ final public class FilterWrapper extends Filter {
return this.filter.getNextCellHint(currentCell);
}
+ /**
+ * Delegates to the wrapped filter's {@link
Filter#getHintForRejectedRow(Cell)} so that the scan
+ * pipeline can seek directly past a rejected row rather than iterating
cell-by-cell via
+ * {@code nextRow()}. {@inheritDoc}
+ */
+ @Override
+ public Cell getHintForRejectedRow(Cell firstRowCell) throws IOException {
+ return this.filter.getHintForRejectedRow(firstRowCell);
+ }
+
+ /**
+ * Delegates to the wrapped filter's {@link Filter#getSkipHint(Cell)} so
that the scan pipeline
+ * can seek ahead when a cell is structurally skipped before {@code
filterCell} is reached.
+ * {@inheritDoc}
+ */
+ @Override
+ public Cell getSkipHint(Cell skippedCell) throws IOException {
+ return this.filter.getSkipHint(skippedCell);
+ }
+
@Override
public boolean filterRowKey(byte[] buffer, int offset, int length) throws
IOException {
// No call to this.
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java
index 0705b454de1..ac3cfbe514e 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RegionScannerImpl.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.ExtendedCell;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.KeyValue;
@@ -86,6 +87,7 @@ public class RegionScannerImpl implements RegionScanner,
Shipper, RpcCallback {
protected final byte[] stopRow;
protected final boolean includeStopRow;
+ protected final boolean reversed;
protected final HRegion region;
protected final CellComparator comparator;
@@ -130,6 +132,7 @@ public class RegionScannerImpl implements RegionScanner,
Shipper, RpcCallback {
defaultScannerContext =
ScannerContext.newBuilder().setBatchLimit(scan.getBatch()).build();
this.stopRow = scan.getStopRow();
this.includeStopRow = scan.includeStopRow();
+ this.reversed = scan.isReversed();
this.operationId = scan.getId();
// synchronize on scannerReadPoints so that nobody calculates
@@ -503,11 +506,18 @@ public class RegionScannerImpl implements RegionScanner,
Shipper, RpcCallback {
if (isFilterDoneInternal()) {
return
scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
}
+ // HBASE-29974: ask the filter for a seek hint so we can jump
directly past the rejected
+ // row instead of iterating through its cells one-by-one via
nextRow().
+ ExtendedCell rowHint = getHintForRejectedRow(current);
// Typically the count of rows scanned is incremented inside
#populateResult. However,
// here we are filtering a row based purely on its row key,
preventing us from calling
- // #populateResult. Thus, perform the necessary increment here to
rows scanned metric
+ // #populateResult. Thus, perform the necessary increment here to
rows scanned metric.
+ // Placed after getHintForRejectedRow so that a buggy filter
throwing DNRIOE doesn't
+ // leave the metric incremented for a row that was never actually
processed.
incrementCountOfRowsScannedMetric(scannerContext);
- boolean moreRows = nextRow(scannerContext, current);
+ boolean moreRows = (rowHint != null)
+ ? nextRowViaHint(scannerContext, current, rowHint)
+ : nextRow(scannerContext, current);
if (!moreRows) {
return
scannerContext.setScannerState(NextState.NO_MORE_VALUES).hasMoreValues();
}
@@ -749,6 +759,94 @@ public class RegionScannerImpl implements RegionScanner,
Shipper, RpcCallback {
|| this.region.getCoprocessorHost().postScannerFilterRow(this,
curRowCell);
}
+ /**
+ * Fast-path alternative to {@link #nextRow} used when the filter has
provided a seek hint via
+ * {@link
org.apache.hadoop.hbase.filter.Filter#getHintForRejectedRow(Cell)}. Instead of
iterating
+ * through every cell in the rejected row one-by-one, this method issues a
single seek to jump
+ * directly to the filter's suggested position ({@code requestSeek} for
forward scans,
+ * {@code backwardSeek} for reversed scans).
+ * <p>
+ * The skipping-row mode flag is set around the seek so that block-level
size tracking continues
+ * to function (consistent with {@link #nextRow}), and the filter state is
reset afterwards so the
+ * next row starts with a clean filter context.
+ * <p>
+ * <strong>Stop-row invariant:</strong> This method does not validate that
{@code hint} falls
+ * within the scan's stop row. If the hint overshoots, the next iteration's
+ * {@link #shouldStop(Cell)} check catches it and returns NO_MORE_VALUES.
One wasted seek may
+ * occur, but correctness is maintained.
+ * <p>
+ * <strong>Metrics note:</strong> The rows-scanned metric is incremented
once by the caller for
+ * the rejected row. Rows physically skipped by the seek are not
individually counted — this
+ * reflects the fact that no per-row work was done for those rows.
+ * <p>
+ * <strong>Coprocessor note:</strong> {@code postScannerFilterRow} is
invoked once with
+ * {@code curRowCell}, not once per skipped row. Coprocessors counting
filtered rows should be
+ * aware of this semantic when the hint path is used.
+ * @param scannerContext scanner context used for limit tracking
+ * @param curRowCell the first cell of the row that was rejected by
{@code filterRowKey};
+ * passed to the coprocessor hook for observability
+ * @param hint the validated {@link ExtendedCell} returned by the
filter; the scanner
+ * will seek to this position
+ * @return {@code true} if scanning should continue, {@code false} if a
coprocessor requests an
+ * early stop (mirrors the contract of {@link #nextRow})
+ * @throws IOException if the seek or the coprocessor hook signals a failure
+ */
+ private boolean nextRowViaHint(ScannerContext scannerContext, Cell
curRowCell, ExtendedCell hint)
+ throws IOException {
+ assert this.joinedContinuationRow == null : "Trying to go to next row
during joinedHeap read.";
+
+ int difference = comparator.compareRows(hint, curRowCell);
+ if ((!reversed && difference > 0) || (reversed && difference < 0)) {
+ scannerContext.setSkippingRow(true);
+ if (reversed) {
+ // ReversedKeyValueHeap does not support requestSeek; use backwardSeek
+ // to position at-or-before the hint within the target row.
+ // seekToPreviousRow would skip past the hint row entirely.
+ this.storeHeap.backwardSeek(hint);
+ } else {
+ this.storeHeap.requestSeek(hint, true, true);
+ }
+ scannerContext.setSkippingRow(false);
+
+ resetFilters();
+
+ return this.region.getCoprocessorHost() == null
+ || this.region.getCoprocessorHost().postScannerFilterRow(this,
curRowCell);
+ }
+
+ return nextRow(scannerContext, curRowCell);
+ }
+
+ /**
+ * Asks the current {@link org.apache.hadoop.hbase.filter.FilterWrapper} for
a seek hint to use
+ * after a row has been rejected by {@link #filterRowKey}. If the wrapped
filter overrides
+ * {@link
org.apache.hadoop.hbase.filter.Filter#getHintForRejectedRow(Cell)}, this
returns its
+ * answer as an {@link ExtendedCell}; otherwise returns {@code null}.
+ * <p>
+ * The returned cell is validated to be an {@link ExtendedCell} because
filters run on the server
+ * side and the scanner infrastructure requires {@code ExtendedCell}
references.
+ * @param rowCell the first cell of the rejected row (same cell passed to
{@code filterRowKey})
+ * @return a validated {@link ExtendedCell} seek target, or {@code null} if
the filter provides no
+ * hint
+ * @throws DoNotRetryIOException if the filter returns a non-{@link
ExtendedCell} instance
+ * @throws IOException if the filter signals an I/O failure
+ */
+ private ExtendedCell getHintForRejectedRow(Cell rowCell) throws IOException {
+ if (filter == null) {
+ return null;
+ }
+ Cell hint = filter.getHintForRejectedRow(rowCell);
+ if (hint == null) {
+ return null;
+ }
+ if (!(hint instanceof ExtendedCell)) {
+ throw new DoNotRetryIOException(
+ "Incorrect filter implementation: the Cell returned by
getHintForRejectedRow "
+ + "is not an ExtendedCell. Filter class: " +
filter.getClass().getName());
+ }
+ return (ExtendedCell) hint;
+ }
+
protected boolean shouldStop(Cell currentRowCell) {
if (currentRowCell == null) {
return true;
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java
index c07b91b77e6..0c7c902afb9 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/UserScanQueryMatcher.java
@@ -61,6 +61,18 @@ public abstract class UserScanQueryMatcher extends
ScanQueryMatcher {
private ExtendedCell curColCell = null;
+ /**
+ * Holds a seek-hint produced by {@link
org.apache.hadoop.hbase.filter.Filter#getSkipHint(Cell)}
+ * at one of the structural short-circuit points in {@link #matchColumn}.
When non-null this is
+ * returned by {@link #getNextKeyHint} instead of delegating to
+ * {@link org.apache.hadoop.hbase.filter.Filter#getNextCellHint}, because
the hint was computed
+ * for a cell that never reached {@code filterCell}. Cleared on every {@link
#getNextKeyHint} call
+ * so it cannot leak across multiple seek-hint cycles.
+ */
+ private ExtendedCell pendingSkipHint = null;
+
+ private final boolean reversed;
+
private static ExtendedCell createStartKey(Scan scan, ScanInfo scanInfo) {
if (scan.includeStartRow()) {
return createStartKeyFromRow(scan.getStartRow(), scanInfo);
@@ -82,6 +94,7 @@ public abstract class UserScanQueryMatcher extends
ScanQueryMatcher {
this.versionsAfterFilter = 0;
}
this.stopRow = scan.getStopRow();
+ this.reversed = scan.isReversed();
TimeRange timeRange =
scan.getColumnFamilyTimeRange().get(scanInfo.getFamily());
if (timeRange == null) {
this.tr = scan.getTimeRange();
@@ -107,6 +120,11 @@ public abstract class UserScanQueryMatcher extends
ScanQueryMatcher {
@Override
public ExtendedCell getNextKeyHint(ExtendedCell cell) throws IOException {
+ if (pendingSkipHint != null) {
+ ExtendedCell hint = pendingSkipHint;
+ pendingSkipHint = null;
+ return hint;
+ }
if (filter == null) {
return null;
} else {
@@ -118,7 +136,6 @@ public abstract class UserScanQueryMatcher extends
ScanQueryMatcher {
+ "the Cell returned by getNextKeyHint is not an ExtendedCell.
Filter class: "
+ filter.getClass().getName());
}
-
}
}
@@ -128,20 +145,47 @@ public abstract class UserScanQueryMatcher extends
ScanQueryMatcher {
if (curColCell != null) {
this.curColCell = KeyValueUtil.toNewKeyCell(this.curColCell);
}
+ if (pendingSkipHint != null) {
+ this.pendingSkipHint = KeyValueUtil.toNewKeyCell(this.pendingSkipHint);
+ }
}
+ @Override
+ public void setToNewRow(ExtendedCell currentRow) {
+ pendingSkipHint = null;
+ super.setToNewRow(currentRow);
+ }
+
+ @Override
+ public void clearCurrentRow() {
+ pendingSkipHint = null;
+ super.clearCurrentRow();
+ }
+
+ // At each structural short-circuit below (time-range, column-exclusion,
version-exhaustion),
+ // the filter is consulted via resolveSkipHint() before falling back to the
default skip/seek
+ // code. This lets filters provide a forward seek target even when
filterCell is never called.
protected final MatchCode matchColumn(ExtendedCell cell, long timestamp,
byte typeByte)
throws IOException {
int tsCmp = tr.compare(timestamp);
if (tsCmp > 0) {
+ if (resolveSkipHint(cell)) {
+ return MatchCode.SEEK_NEXT_USING_HINT;
+ }
return MatchCode.SKIP;
}
if (tsCmp < 0) {
+ if (resolveSkipHint(cell)) {
+ return MatchCode.SEEK_NEXT_USING_HINT;
+ }
return columns.getNextRowOrNextColumn(cell);
}
// STEP 1: Check if the column is part of the requested columns
MatchCode matchCode = columns.checkColumn(cell, typeByte);
if (matchCode != MatchCode.INCLUDE) {
+ if (resolveSkipHint(cell)) {
+ return MatchCode.SEEK_NEXT_USING_HINT;
+ }
return matchCode;
}
/*
@@ -151,8 +195,14 @@ public abstract class UserScanQueryMatcher extends
ScanQueryMatcher {
matchCode = columns.checkVersions(cell, timestamp, typeByte, false);
switch (matchCode) {
case SKIP:
+ if (resolveSkipHint(cell)) {
+ return MatchCode.SEEK_NEXT_USING_HINT;
+ }
return MatchCode.SKIP;
case SEEK_NEXT_COL:
+ if (resolveSkipHint(cell)) {
+ return MatchCode.SEEK_NEXT_USING_HINT;
+ }
return MatchCode.SEEK_NEXT_COL;
default:
// It means it is INCLUDE, INCLUDE_AND_SEEK_NEXT_COL or
INCLUDE_AND_SEEK_NEXT_ROW.
@@ -166,6 +216,47 @@ public abstract class UserScanQueryMatcher extends
ScanQueryMatcher {
: mergeFilterResponse(cell, matchCode, filter.filterCell(cell));
}
+ /**
+ * Asks the current filter for a seek hint via
+ * {@link org.apache.hadoop.hbase.filter.Filter#getSkipHint(Cell)},
validates the returned cell
+ * type, and if non-null stores it in {@link #pendingSkipHint} so that
{@link #getNextKeyHint} can
+ * return it when the scan pipeline asks for the seek target after receiving
+ * {@link ScanQueryMatcher.MatchCode#SEEK_NEXT_USING_HINT}.
+ * <p>
+ * This is only called from the structural short-circuit branches of {@link
#matchColumn}, where
+ * {@code filterCell} has <em>not</em> been called, in accordance with the
stateless contract of
+ * {@code Filter#getSkipHint}. The filter-null guard is included here so
call-sites need no
+ * boilerplate.
+ * @param cell the cell that triggered the structural short-circuit
+ * @return {@code true} if the filter returned a valid hint (stored in
{@link #pendingSkipHint}),
+ * {@code false} if no filter is set or the filter returned {@code
null}
+ * @throws DoNotRetryIOException if the filter returns a non-{@link
ExtendedCell} instance
+ * @throws IOException if the filter signals an I/O failure
+ */
+ private boolean resolveSkipHint(ExtendedCell cell) throws IOException {
+ if (filter == null) {
+ return false;
+ }
+ Cell raw = filter.getSkipHint(cell);
+ if (raw == null) {
+ return false;
+ }
+ if (!(raw instanceof ExtendedCell)) {
+ throw new DoNotRetryIOException(
+ "Incorrect filter implementation: the Cell returned by getSkipHint "
+ + "is not an ExtendedCell. Filter class: " +
filter.getClass().getName());
+ }
+ ExtendedCell hint = (ExtendedCell) raw;
+ // Full-key compare is intentional: skip hints can advance within the same
row
+ // (e.g., to a later column), not just across rows.
+ int cmp = rowComparator.compare(hint, cell);
+ if ((!reversed && cmp <= 0) || (reversed && cmp >= 0)) {
+ return false;
+ }
+ pendingSkipHint = hint;
+ return true;
+ }
+
/**
* Call this when scan has filter. Decide the desired behavior by
checkVersions's MatchCode and
* filterCell's ReturnCode. Cell may be skipped by filter, so the column
versions in result may be
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java
new file mode 100644
index 00000000000..ab9c87865ff
--- /dev/null
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/filter/TestFilterHintForRejectedRow.java
@@ -0,0 +1,646 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.filter;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.hadoop.hbase.Cell;
+import org.apache.hadoop.hbase.CellUtil;
+import org.apache.hadoop.hbase.HBaseTestingUtility;
+import org.apache.hadoop.hbase.PrivateCellUtil;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
+import org.apache.hadoop.hbase.client.Durability;
+import org.apache.hadoop.hbase.client.Put;
+import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.client.RegionInfoBuilder;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.TableDescriptor;
+import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
+import org.apache.hadoop.hbase.regionserver.HRegion;
+import org.apache.hadoop.hbase.regionserver.RegionScanner;
+import org.apache.hadoop.hbase.testclassification.FilterTests;
+import org.apache.hadoop.hbase.testclassification.MediumTests;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Tag;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInfo;
+
+@Tag(FilterTests.TAG)
+@Tag(MediumTests.TAG)
+public class TestFilterHintForRejectedRow {
+
+ private static final HBaseTestingUtility TEST_UTIL = new
HBaseTestingUtility();
+
+ private static final byte[] FAMILY = Bytes.toBytes("f");
+ private static final byte[] FAMILY2 = Bytes.toBytes("g");
+ private static final int CELLS_PER_ROW = 10;
+ private static final byte[] VALUE = Bytes.toBytes("value");
+
+ private HRegion region;
+
+ @BeforeEach
+ public void setUp(TestInfo testInfo) throws Exception {
+ TableDescriptor tableDescriptor =
+
TableDescriptorBuilder.newBuilder(TableName.valueOf(testInfo.getTestMethod().get().getName()))
+ .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY))
+ .setColumnFamily(ColumnFamilyDescriptorBuilder.of(FAMILY2)).build();
+ RegionInfo info =
RegionInfoBuilder.newBuilder(tableDescriptor.getTableName()).build();
+ this.region = HBaseTestingUtility.createRegionAndWAL(info,
TEST_UTIL.getDataTestDir(),
+ TEST_UTIL.getConfiguration(), tableDescriptor);
+ }
+
+ @AfterEach
+ public void tearDown() throws Exception {
+ HBaseTestingUtility.closeRegionAndWAL(this.region);
+ }
+
+ private void writeRows(String prefix, int count) throws IOException {
+ for (int i = 0; i < count; i++) {
+ byte[] row = Bytes.toBytes(String.format("%s-%02d", prefix, i));
+ Put p = new Put(row);
+ p.setDurability(Durability.SKIP_WAL);
+ for (int j = 0; j < CELLS_PER_ROW; j++) {
+ p.addColumn(FAMILY, Bytes.toBytes(String.format("q-%02d", j)), VALUE);
+ }
+ this.region.put(p);
+ }
+ this.region.flush(true);
+ }
+
+ private void writeRowsMultiFamily(String prefix, int count) throws
IOException {
+ for (int i = 0; i < count; i++) {
+ byte[] row = Bytes.toBytes(String.format("%s-%02d", prefix, i));
+ Put p = new Put(row);
+ p.setDurability(Durability.SKIP_WAL);
+ for (int j = 0; j < CELLS_PER_ROW; j++) {
+ byte[] qual = Bytes.toBytes(String.format("q-%02d", j));
+ p.addColumn(FAMILY, qual, VALUE);
+ p.addColumn(FAMILY2, qual, VALUE);
+ }
+ this.region.put(p);
+ }
+ this.region.flush(true);
+ }
+
+ private List<Cell> scanAll(Scan scan) throws IOException {
+ List<Cell> results = new ArrayList<>();
+ try (RegionScanner scanner = region.getScanner(scan)) {
+ List<Cell> rowCells = new ArrayList<>();
+ boolean hasMore;
+ do {
+ hasMore = scanner.next(rowCells);
+ results.addAll(rowCells);
+ rowCells.clear();
+ } while (hasMore);
+ }
+ return results;
+ }
+
+ @Test
+ public void testHintAndNoHintReturnSameCellsOnSameData() throws IOException {
+ final String prefix = "row";
+ final int rejectedCount = 5;
+ final int acceptedCount = 5;
+ writeRows(prefix, rejectedCount + acceptedCount);
+
+ final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d",
prefix, rejectedCount));
+ final AtomicInteger hintCallCount = new AtomicInteger(0);
+
+ FilterBase hintFilter = new FilterBase() {
+ @Override
+ public boolean filterRowKey(Cell cell) {
+ return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(),
cell.getRowLength(),
+ acceptedStartRow, 0, acceptedStartRow.length) < 0;
+ }
+
+ @Override
+ public Cell getHintForRejectedRow(Cell firstRowCell) {
+ hintCallCount.incrementAndGet();
+ return PrivateCellUtil.createFirstOnRow(acceptedStartRow);
+ }
+ };
+
+ FilterBase noHintFilter = new FilterBase() {
+ @Override
+ public boolean filterRowKey(Cell cell) {
+ return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(),
cell.getRowLength(),
+ acceptedStartRow, 0, acceptedStartRow.length) < 0;
+ }
+ };
+
+ List<Cell> hintResults = scanAll(new
Scan().addFamily(FAMILY).setFilter(hintFilter));
+ List<Cell> noHintResults = scanAll(new
Scan().addFamily(FAMILY).setFilter(noHintFilter));
+
+ assertEquals(noHintResults.size(), hintResults.size(),
+ "Both paths must return the same number of cells");
+ for (int i = 0; i < hintResults.size(); i++) {
+ assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)),
+ "Cell mismatch at index " + i);
+ }
+ assertEquals(acceptedCount * CELLS_PER_ROW, hintResults.size());
+ assertEquals(1, hintCallCount.get(),
+ "getHintForRejectedRow must be called exactly once: the hint skips all
rejected rows");
+ }
+
+ @Test
+ public void testHintBeyondLastRowTerminatesScanGracefully() throws
IOException {
+ final String prefix = "hint-beyond";
+ writeRows(prefix, 5);
+
+ final byte[] beyondAllRows = Bytes.toBytes("zzz-beyond-table-end");
+
+ FilterBase beyondHintFilter = new FilterBase() {
+ @Override
+ public boolean filterRowKey(Cell cell) {
+ return true;
+ }
+
+ @Override
+ public Cell getHintForRejectedRow(Cell firstRowCell) {
+ return PrivateCellUtil.createFirstOnRow(beyondAllRows);
+ }
+ };
+
+ List<Cell> results = scanAll(new
Scan().addFamily(FAMILY).setFilter(beyondHintFilter));
+ assertTrue(results.isEmpty(),
+ "When the hint is past the last row, no cells should be returned");
+ }
+
+ @Test
+ public void testPerRowHintCalledOncePerRejectedRow() throws IOException {
+ final String prefix = "hint-perrow";
+ final int rejectedCount = 4;
+ final int acceptedCount = 2;
+ writeRows(prefix, rejectedCount + acceptedCount);
+
+ final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d",
prefix, rejectedCount));
+ final AtomicInteger hintCallCount = new AtomicInteger(0);
+
+ FilterBase perRowHintFilter = new FilterBase() {
+ @Override
+ public boolean filterRowKey(Cell cell) {
+ return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(),
cell.getRowLength(),
+ acceptedStartRow, 0, acceptedStartRow.length) < 0;
+ }
+
+ @Override
+ public Cell getHintForRejectedRow(Cell firstRowCell) {
+ hintCallCount.incrementAndGet();
+ return PrivateCellUtil.createFirstOnNextRow(firstRowCell);
+ }
+ };
+
+ List<Cell> results = scanAll(new
Scan().addFamily(FAMILY).setFilter(perRowHintFilter));
+
+ assertEquals(acceptedCount * CELLS_PER_ROW, results.size());
+ for (Cell c : results) {
+ assertTrue(
+ Bytes.compareTo(c.getRowArray(), c.getRowOffset(), c.getRowLength(),
acceptedStartRow, 0,
+ acceptedStartRow.length) >= 0,
+ "Every returned cell must belong to the accepted row range");
+ }
+ assertEquals(rejectedCount, hintCallCount.get(),
+ "getHintForRejectedRow must be called once per rejected row in the
per-row hint strategy");
+ }
+
+ @Test
+ public void testReversedScanWithHint() throws IOException {
+ final String prefix = "row";
+ final int totalRows = 10;
+ writeRows(prefix, totalRows);
+
+ // Accept rows 00-04 (lower half), reject rows 05-09 (upper half).
+ // In a reversed scan, the scanner starts at row-09 and moves backward.
+ final byte[] rejectThreshold = Bytes.toBytes(String.format("%s-%02d",
prefix, 5));
+ final byte[] hintTarget = Bytes.toBytes(String.format("%s-%02d", prefix,
4));
+ final AtomicInteger hintCallCount = new AtomicInteger(0);
+
+ FilterBase hintFilter = new FilterBase() {
+ @Override
+ public boolean filterRowKey(Cell cell) {
+ return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(),
cell.getRowLength(),
+ rejectThreshold, 0, rejectThreshold.length) >= 0;
+ }
+
+ @Override
+ public Cell getHintForRejectedRow(Cell firstRowCell) {
+ hintCallCount.incrementAndGet();
+ // In reversed scan, hint must point backward (to a smaller row key).
+ return PrivateCellUtil.createFirstOnRow(hintTarget);
+ }
+ };
+
+ FilterBase noHintFilter = new FilterBase() {
+ @Override
+ public boolean filterRowKey(Cell cell) {
+ return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(),
cell.getRowLength(),
+ rejectThreshold, 0, rejectThreshold.length) >= 0;
+ }
+ };
+
+ Scan reversedHintScan = new
Scan().addFamily(FAMILY).setReversed(true).setFilter(hintFilter);
+ Scan reversedNoHintScan =
+ new Scan().addFamily(FAMILY).setReversed(true).setFilter(noHintFilter);
+
+ List<Cell> hintResults = scanAll(reversedHintScan);
+ List<Cell> noHintResults = scanAll(reversedNoHintScan);
+
+ assertEquals(noHintResults.size(), hintResults.size(),
+ "Both paths must return the same number of cells");
+ for (int i = 0; i < hintResults.size(); i++) {
+ assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)),
+ "Cell mismatch at index " + i);
+ }
+ assertEquals(5 * CELLS_PER_ROW, hintResults.size());
+ assertEquals(1, hintCallCount.get(),
+ "getHintForRejectedRow must be called exactly once for reversed scan");
+ }
+
+ @Test
+ public void testGetSkipHintWithTimeRangeIntegration() throws IOException {
+ final long insideTs = 2000;
+ final long outsideTs = 500;
+ final int rowCount = 5;
+
+ for (int i = 0; i < rowCount; i++) {
+ byte[] row = Bytes.toBytes(String.format("skiprow-%02d", i));
+ Put p = new Put(row);
+ p.setDurability(Durability.SKIP_WAL);
+ p.addColumn(FAMILY, Bytes.toBytes("q"), insideTs, VALUE);
+ p.addColumn(FAMILY, Bytes.toBytes("q"), outsideTs, VALUE);
+ region.put(p);
+ }
+ region.flush(true);
+
+ final AtomicInteger skipHintCalls = new AtomicInteger(0);
+
+ FilterBase skipHintFilter = new FilterBase() {
+ @Override
+ public Cell getSkipHint(Cell skippedCell) {
+ skipHintCalls.incrementAndGet();
+ return PrivateCellUtil.createFirstOnNextRow(skippedCell);
+ }
+ };
+
+ Scan hintScan = new Scan().addFamily(FAMILY).setTimeRange(1000,
3000).setFilter(skipHintFilter);
+ Scan noHintScan = new Scan().addFamily(FAMILY).setTimeRange(1000, 3000);
+
+ List<Cell> hintResults = scanAll(hintScan);
+ List<Cell> noHintResults = scanAll(noHintScan);
+
+ assertEquals(noHintResults.size(), hintResults.size(),
+ "Both paths must return the same number of cells");
+ for (int i = 0; i < hintResults.size(); i++) {
+ assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)),
+ "Cell mismatch at index " + i);
+ }
+ assertEquals(rowCount, hintResults.size());
+ }
+
+ @Test
+ public void testBackwardHintFallsBackToNextRow() throws IOException {
+ final String prefix = "row";
+ writeRows(prefix, 5);
+
+ final byte[] row00 = Bytes.toBytes(String.format("%s-%02d", prefix, 0));
+ final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d",
prefix, 2));
+
+ FilterBase backwardHintFilter = new FilterBase() {
+ @Override
+ public boolean filterRowKey(Cell cell) {
+ return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(),
cell.getRowLength(),
+ acceptedStartRow, 0, acceptedStartRow.length) < 0;
+ }
+
+ @Override
+ public Cell getHintForRejectedRow(Cell firstRowCell) {
+ return PrivateCellUtil.createFirstOnRow(row00);
+ }
+ };
+
+ FilterBase noHintFilter = new FilterBase() {
+ @Override
+ public boolean filterRowKey(Cell cell) {
+ return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(),
cell.getRowLength(),
+ acceptedStartRow, 0, acceptedStartRow.length) < 0;
+ }
+ };
+
+ List<Cell> hintResults = scanAll(new
Scan().addFamily(FAMILY).setFilter(backwardHintFilter));
+ List<Cell> noHintResults = scanAll(new
Scan().addFamily(FAMILY).setFilter(noHintFilter));
+
+ assertEquals(noHintResults.size(), hintResults.size(),
+ "Backward hint must produce same results as no-hint path");
+ for (int i = 0; i < hintResults.size(); i++) {
+ assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)),
+ "Cell mismatch at index " + i);
+ }
+ }
+
+ @Test
+ public void testSameRowHintFallsBackToNextRow() throws IOException {
+ final String prefix = "row";
+ writeRows(prefix, 5);
+
+ final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d",
prefix, 2));
+ final AtomicInteger hintCallCount = new AtomicInteger(0);
+
+ FilterBase sameRowHintFilter = new FilterBase() {
+ @Override
+ public boolean filterRowKey(Cell cell) {
+ return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(),
cell.getRowLength(),
+ acceptedStartRow, 0, acceptedStartRow.length) < 0;
+ }
+
+ @Override
+ public Cell getHintForRejectedRow(Cell firstRowCell) {
+ hintCallCount.incrementAndGet();
+ return
PrivateCellUtil.createFirstOnRow(CellUtil.cloneRow(firstRowCell));
+ }
+ };
+
+ FilterBase noHintFilter = new FilterBase() {
+ @Override
+ public boolean filterRowKey(Cell cell) {
+ return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(),
cell.getRowLength(),
+ acceptedStartRow, 0, acceptedStartRow.length) < 0;
+ }
+ };
+
+ List<Cell> hintResults = scanAll(new
Scan().addFamily(FAMILY).setFilter(sameRowHintFilter));
+ List<Cell> noHintResults = scanAll(new
Scan().addFamily(FAMILY).setFilter(noHintFilter));
+
+ assertEquals(noHintResults.size(), hintResults.size(),
+ "Same-row hint must produce same results as no-hint path");
+ for (int i = 0; i < hintResults.size(); i++) {
+ assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)),
+ "Cell mismatch at index " + i);
+ }
+ assertEquals(3 * CELLS_PER_ROW, hintResults.size());
+ }
+
+ @Test
+ public void testCoprocessorHookCalledOncePerHintedRejection() throws
IOException {
+ final String prefix = "row";
+ final int rejectedCount = 3;
+ final int acceptedCount = 3;
+ writeRows(prefix, rejectedCount + acceptedCount);
+
+ final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d",
prefix, rejectedCount));
+ final AtomicInteger hintCallCount = new AtomicInteger(0);
+
+ FilterBase hintFilter = new FilterBase() {
+ @Override
+ public boolean filterRowKey(Cell cell) {
+ return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(),
cell.getRowLength(),
+ acceptedStartRow, 0, acceptedStartRow.length) < 0;
+ }
+
+ @Override
+ public Cell getHintForRejectedRow(Cell firstRowCell) {
+ hintCallCount.incrementAndGet();
+ return PrivateCellUtil.createFirstOnRow(acceptedStartRow);
+ }
+ };
+
+ List<Cell> results = scanAll(new
Scan().addFamily(FAMILY).setFilter(hintFilter));
+
+ assertEquals(1, hintCallCount.get());
+ assertEquals(acceptedCount * CELLS_PER_ROW, results.size());
+ }
+
+ @Test
+ public void testMultiFamilyScanWithHint() throws IOException {
+ final String prefix = "row";
+ final int rejectedCount = 3;
+ final int acceptedCount = 3;
+ writeRowsMultiFamily(prefix, rejectedCount + acceptedCount);
+
+ final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d",
prefix, rejectedCount));
+ final AtomicInteger hintCallCount = new AtomicInteger(0);
+
+ FilterBase hintFilter = new FilterBase() {
+ @Override
+ public boolean filterRowKey(Cell cell) {
+ return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(),
cell.getRowLength(),
+ acceptedStartRow, 0, acceptedStartRow.length) < 0;
+ }
+
+ @Override
+ public Cell getHintForRejectedRow(Cell firstRowCell) {
+ hintCallCount.incrementAndGet();
+ return PrivateCellUtil.createFirstOnRow(acceptedStartRow);
+ }
+ };
+
+ FilterBase noHintFilter = new FilterBase() {
+ @Override
+ public boolean filterRowKey(Cell cell) {
+ return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(),
cell.getRowLength(),
+ acceptedStartRow, 0, acceptedStartRow.length) < 0;
+ }
+ };
+
+ // Scan both families.
+ Scan hintScan = new
Scan().addFamily(FAMILY).addFamily(FAMILY2).setFilter(hintFilter);
+ Scan noHintScan = new
Scan().addFamily(FAMILY).addFamily(FAMILY2).setFilter(noHintFilter);
+
+ List<Cell> hintResults = scanAll(hintScan);
+ List<Cell> noHintResults = scanAll(noHintScan);
+
+ assertEquals(noHintResults.size(), hintResults.size(),
+ "Both paths must return the same number of cells");
+ for (int i = 0; i < hintResults.size(); i++) {
+ assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)),
+ "Cell mismatch at index " + i);
+ }
+ assertEquals(2 * CELLS_PER_ROW * acceptedCount, hintResults.size());
+ assertEquals(1, hintCallCount.get());
+ }
+
+ @Test
+ public void testDataCorrectnessVsUnfilteredScan() throws IOException {
+ final String prefix = "row";
+ final int totalRows = 8;
+ writeRows(prefix, totalRows);
+
+ final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d",
prefix, 3));
+
+ FilterBase hintFilter = new FilterBase() {
+ @Override
+ public boolean filterRowKey(Cell cell) {
+ return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(),
cell.getRowLength(),
+ acceptedStartRow, 0, acceptedStartRow.length) < 0;
+ }
+
+ @Override
+ public Cell getHintForRejectedRow(Cell firstRowCell) {
+ return PrivateCellUtil.createFirstOnRow(acceptedStartRow);
+ }
+ };
+
+ List<Cell> filteredResults = scanAll(new
Scan().addFamily(FAMILY).setFilter(hintFilter));
+
+ // Unfiltered scan starting at acceptedStartRow — should return the exact
same cells.
+ Scan unfilteredScan = new
Scan().addFamily(FAMILY).withStartRow(acceptedStartRow);
+ List<Cell> unfilteredResults = scanAll(unfilteredScan);
+
+ assertEquals(unfilteredResults.size(), filteredResults.size(),
+ "Filtered and unfiltered scans must return same cell count");
+ for (int i = 0; i < filteredResults.size(); i++) {
+ assertTrue(CellUtil.equals(filteredResults.get(i),
unfilteredResults.get(i)),
+ "Cell mismatch at index " + i);
+ }
+ }
+
+ @Test
+ public void testHintOvershootingStopRowTerminatesGracefully() throws
IOException {
+ final String prefix = "row";
+ final int totalRows = 10;
+ writeRows(prefix, totalRows);
+
+ // Scan rows 00-06 (stopRow=row-07, exclusive). Reject rows 00-02, hint to
row-09.
+ // The hint overshoots the stop row, so the scan should terminate with no
results
+ // from the rejected range and only include rows 03-06 from the
non-rejected range.
+ final byte[] stopRow = Bytes.toBytes(String.format("%s-%02d", prefix, 7));
+ final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d",
prefix, 3));
+ final byte[] hintTarget = Bytes.toBytes(String.format("%s-%02d", prefix,
9));
+
+ FilterBase hintFilter = new FilterBase() {
+ @Override
+ public boolean filterRowKey(Cell cell) {
+ return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(),
cell.getRowLength(),
+ acceptedStartRow, 0, acceptedStartRow.length) < 0;
+ }
+
+ @Override
+ public Cell getHintForRejectedRow(Cell firstRowCell) {
+ // Hint past the stop row — scanner should terminate gracefully.
+ return PrivateCellUtil.createFirstOnRow(hintTarget);
+ }
+ };
+
+ Scan scan = new
Scan().addFamily(FAMILY).withStopRow(stopRow).setFilter(hintFilter);
+ List<Cell> results = scanAll(scan);
+
+ // The hint jumps past stopRow, so no cells should be returned.
+ assertTrue(results.isEmpty(), "Hint past stop row must terminate scan with
no results");
+ }
+
+ @Test
+ public void testHintWithinStopRowReturnsCorrectResults() throws IOException {
+ final String prefix = "row";
+ final int totalRows = 10;
+ writeRows(prefix, totalRows);
+
+ // Scan rows 00-06 (stopRow=row-07, exclusive). Reject rows 00-02, hint to
row-03.
+ final byte[] stopRow = Bytes.toBytes(String.format("%s-%02d", prefix, 7));
+ final byte[] acceptedStartRow = Bytes.toBytes(String.format("%s-%02d",
prefix, 3));
+
+ FilterBase hintFilter = new FilterBase() {
+ @Override
+ public boolean filterRowKey(Cell cell) {
+ return Bytes.compareTo(cell.getRowArray(), cell.getRowOffset(),
cell.getRowLength(),
+ acceptedStartRow, 0, acceptedStartRow.length) < 0;
+ }
+
+ @Override
+ public Cell getHintForRejectedRow(Cell firstRowCell) {
+ return PrivateCellUtil.createFirstOnRow(acceptedStartRow);
+ }
+ };
+
+ Scan hintScan = new
Scan().addFamily(FAMILY).withStopRow(stopRow).setFilter(hintFilter);
+ List<Cell> hintResults = scanAll(hintScan);
+
+ // Should get rows 03-06 (4 rows).
+ Scan baselineScan =
+ new
Scan().addFamily(FAMILY).withStartRow(acceptedStartRow).withStopRow(stopRow);
+ List<Cell> baselineResults = scanAll(baselineScan);
+
+ assertEquals(baselineResults.size(), hintResults.size(),
+ "Hint within stop row must return same results as baseline");
+ for (int i = 0; i < hintResults.size(); i++) {
+ assertTrue(CellUtil.equals(hintResults.get(i), baselineResults.get(i)),
+ "Cell mismatch at index " + i);
+ }
+ assertEquals(4 * CELLS_PER_ROW, hintResults.size());
+ }
+
+ @Test
+ public void testGetSkipHintCalledForReversedScan() throws IOException {
+ final long insideTs = 1500;
+ final long tooNewTs = 5000;
+ final int rowCount = 5;
+ final byte[] qInside = Bytes.toBytes("q-inside");
+ final byte[] qNew = Bytes.toBytes("q-new");
+
+ // Write two qualifiers per row: "q-inside" at ts=1500 (within range) and
"q-new" at
+ // ts=5000 (above the range upper bound). The time-range gate fires for
the too-new cell
+ // before version tracking can absorb it, ensuring getSkipHint is
consulted.
+ for (int i = 0; i < rowCount; i++) {
+ byte[] row = Bytes.toBytes(String.format("skiprev-%02d", i));
+ Put p = new Put(row);
+ p.setDurability(Durability.SKIP_WAL);
+ p.addColumn(FAMILY, qInside, insideTs, VALUE);
+ p.addColumn(FAMILY, qNew, tooNewTs, VALUE);
+ region.put(p);
+ }
+ region.flush(true);
+
+ final AtomicInteger skipHintCalls = new AtomicInteger(0);
+
+ // Return null to verify the code path is reached without altering scan
semantics.
+ // The unit test
TestUserScanQueryMatcher#testSkipHintConsultedForReversedScan validates
+ // that a non-null hint is correctly accepted by the reversed-scan guard.
+ FilterBase skipHintFilter = new FilterBase() {
+ @Override
+ public Cell getSkipHint(Cell skippedCell) {
+ skipHintCalls.incrementAndGet();
+ return null;
+ }
+ };
+
+ // Reversed scan with time range [1000, 3000): insideTs cells pass,
tooNewTs cells are
+ // structurally skipped (tsCmp > 0). The skip hint should be consulted.
+ Scan hintScan = new Scan().addFamily(FAMILY).setTimeRange(1000,
3000).setReversed(true)
+ .setFilter(skipHintFilter);
+ Scan noHintScan = new Scan().addFamily(FAMILY).setTimeRange(1000,
3000).setReversed(true);
+
+ List<Cell> hintResults = scanAll(hintScan);
+ List<Cell> noHintResults = scanAll(noHintScan);
+
+ assertEquals(noHintResults.size(), hintResults.size(),
+ "Both paths must return the same number of cells");
+ for (int i = 0; i < hintResults.size(); i++) {
+ assertTrue(CellUtil.equals(hintResults.get(i), noHintResults.get(i)),
+ "Cell mismatch at index " + i);
+ }
+ assertEquals(rowCount, hintResults.size());
+ assertTrue(skipHintCalls.get() > 0,
+ "getSkipHint must be called at least once for reversed scan");
+ }
+}
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java
index fafbfb8f66e..0ee3a065cfa 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestUserScanQueryMatcher.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.regionserver.querymatcher;
import static org.junit.jupiter.api.Assertions.assertArrayEquals;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNull;
import java.io.IOException;
import java.util.ArrayList;
@@ -291,6 +292,19 @@ public class TestUserScanQueryMatcher extends
AbstractTestScanQueryMatcher {
}
}
+ private static class FixedSkipHintFilter extends FilterBase {
+ final ExtendedCell hint;
+
+ FixedSkipHintFilter(ExtendedCell hint) {
+ this.hint = hint;
+ }
+
+ @Override
+ public Cell getSkipHint(Cell skippedCell) throws IOException {
+ return hint;
+ }
+ }
+
/**
* Here is the unit test for UserScanQueryMatcher#mergeFilterResponse, when
the number of cells
* exceed the versions requested in scan, we should return SEEK_NEXT_COL,
but if current match
@@ -394,6 +408,216 @@ public class TestUserScanQueryMatcher extends
AbstractTestScanQueryMatcher {
assertArrayEquals(nextCell.getQualifierArray(), col4);
}
+ /** Verify that getSkipHint is consulted when a cell is newer than the
time-range upper bound. */
+ @Test
+ public void testSkipHintConsultedForCellNewerThanTimeRange() throws
IOException {
+ long now = EnvironmentEdgeManager.currentTime();
+ long minTs = now - 2000;
+ long maxTs = now - 1000;
+
+ KeyValue hintCell = new KeyValue(row2, fam2, col1, now - 1500, data);
+ Scan timeRangeScan = new Scan().addFamily(fam2).setTimeRange(minTs, maxTs)
+ .setFilter(new FixedSkipHintFilter(hintCell));
+
+ long now2 = EnvironmentEdgeManager.currentTime();
+ UserScanQueryMatcher qm = UserScanQueryMatcher.create(timeRangeScan,
+ new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE,
HConstants.DEFAULT_BLOCKSIZE,
+ 0, rowComparator, false),
+ null, 0, now2, null);
+
+ // ts=now2 >= maxTs, so tsCmp > 0: time-range gate fires before filterCell.
+ KeyValue tooNew = new KeyValue(row1, fam2, col1, now2, data);
+ qm.setToNewRow(tooNew);
+
+ assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(tooNew));
+ assertEquals(hintCell, qm.getNextKeyHint(tooNew));
+ assertNull(qm.getNextKeyHint(tooNew));
+ }
+
+ /** Verify that getSkipHint is consulted when a cell is older than the
time-range lower bound. */
+ @Test
+ public void testSkipHintConsultedForCellOlderThanTimeRange() throws
IOException {
+ long now = EnvironmentEdgeManager.currentTime();
+ long minTs = now - 500;
+ long maxTs = now;
+
+ KeyValue hintCell = new KeyValue(row2, fam2, col1, now - 100, data);
+ Scan timeRangeScan = new Scan().addFamily(fam2).setTimeRange(minTs, maxTs)
+ .setFilter(new FixedSkipHintFilter(hintCell));
+
+ UserScanQueryMatcher qm = UserScanQueryMatcher.create(timeRangeScan,
+ new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE,
HConstants.DEFAULT_BLOCKSIZE,
+ 0, rowComparator, false),
+ null, 0, now, null);
+
+ // ts = now-1000 < minTs (now-500), so tsCmp < 0: time-range gate fires.
+ KeyValue tooOld = new KeyValue(row1, fam2, col1, now - 1000, data);
+ qm.setToNewRow(tooOld);
+
+ assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(tooOld));
+ assertEquals(hintCell, qm.getNextKeyHint(tooOld));
+ assertNull(qm.getNextKeyHint(tooOld));
+ }
+
+ /** Verify that a null getSkipHint falls back to the original SKIP match
code. */
+ @Test
+ public void testNullSkipHintFallsThroughToOriginalCodeOnTimeRangeGate()
throws IOException {
+ long now = EnvironmentEdgeManager.currentTime();
+ long minTs = now - 2000;
+ long maxTs = now - 1000;
+
+ Scan timeRangeScan =
+ new Scan().addFamily(fam2).setTimeRange(minTs, maxTs).setFilter(new
AlwaysIncludeFilter());
+
+ long now2 = EnvironmentEdgeManager.currentTime();
+ UserScanQueryMatcher qm = UserScanQueryMatcher.create(timeRangeScan,
+ new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE,
HConstants.DEFAULT_BLOCKSIZE,
+ 0, rowComparator, false),
+ null, 0, now2, null);
+
+ KeyValue tooNew = new KeyValue(row1, fam2, col1, now2, data);
+ qm.setToNewRow(tooNew);
+
+ assertEquals(MatchCode.SKIP, qm.match(tooNew));
+ }
+
+ /** Verify that getSkipHint is consulted when a column is excluded by the
scan's column set. */
+ @Test
+ public void testSkipHintConsultedForExcludedColumn() throws IOException {
+ long now = EnvironmentEdgeManager.currentTime();
+
+ // get.getFamilyMap().get(fam2) contains col2, col4, col5; presenting col1
triggers exclusion.
+ KeyValue hintCell = new KeyValue(row1, fam2, col2, 1, data);
+ Scan scanWithFilter = new Scan(scan).setFilter(new
FixedSkipHintFilter(hintCell));
+
+ UserScanQueryMatcher qm = UserScanQueryMatcher.create(
+ scanWithFilter, new ScanInfo(this.conf, fam2, 0, 1, ttl,
KeepDeletedCells.FALSE,
+ HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false),
+ get.getFamilyMap().get(fam2), now - ttl, now, null);
+
+ // col1 is not in {col2, col4, col5}: checkColumn returns a non-INCLUDE
code.
+ KeyValue excludedCol = new KeyValue(row1, fam2, col1, 1, data);
+ qm.setToNewRow(excludedCol);
+
+ assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(excludedCol));
+ assertEquals(hintCell, qm.getNextKeyHint(excludedCol));
+ assertNull(qm.getNextKeyHint(excludedCol));
+ }
+
+ /** Verify that getSkipHint is consulted when the version limit is
exhausted. */
+ @Test
+ public void testSkipHintConsultedOnVersionExhaustion() throws IOException {
+ long now = EnvironmentEdgeManager.currentTime();
+
+ KeyValue hintCell = new KeyValue(row2, fam2, col1, now, data);
+ Scan scanWithFilter = new Scan().addFamily(fam2).setFilter(new
FixedSkipHintFilter(hintCell));
+
+ UserScanQueryMatcher qm = UserScanQueryMatcher.create(scanWithFilter,
+ new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE,
HConstants.DEFAULT_BLOCKSIZE,
+ 0, rowComparator, false),
+ null, now - ttl, now, null);
+
+ KeyValue version1 = new KeyValue(row1, fam2, col1, now - 10, data);
+ qm.setToNewRow(version1);
+ assertEquals(MatchCode.INCLUDE, qm.match(version1));
+
+ // Second version: version limit exceeded, checkVersions returns
SEEK_NEXT_COL.
+ KeyValue version2 = new KeyValue(row1, fam2, col1, now - 20, data);
+ assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(version2));
+ assertEquals(hintCell, qm.getNextKeyHint(version2));
+ assertNull(qm.getNextKeyHint(version2));
+ }
+
+ /** Verify that pendingSkipHint is cleared when a row transition occurs via
setToNewRow. */
+ @Test
+ public void testPendingSkipHintClearedOnRowTransition() throws IOException {
+ long now = EnvironmentEdgeManager.currentTime();
+
+ KeyValue hintCell = new KeyValue(row2, fam2, col1, now, data);
+ Scan scanWithFilter = new Scan().addFamily(fam2).setFilter(new
FixedSkipHintFilter(hintCell));
+
+ UserScanQueryMatcher qm = UserScanQueryMatcher.create(scanWithFilter,
+ new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE,
HConstants.DEFAULT_BLOCKSIZE,
+ 0, rowComparator, false),
+ null, now - ttl, now, null);
+
+ // Trigger a structural skip that stores pendingSkipHint.
+ KeyValue version1 = new KeyValue(row1, fam2, col1, now - 10, data);
+ qm.setToNewRow(version1);
+ assertEquals(MatchCode.INCLUDE, qm.match(version1));
+ KeyValue version2 = new KeyValue(row1, fam2, col1, now - 20, data);
+ assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(version2));
+
+ // Do NOT consume the hint via getNextKeyHint. Instead, simulate a row
transition.
+ KeyValue newRowCell = new KeyValue(row2, fam2, col1, now - 10, data);
+ qm.setToNewRow(newRowCell);
+
+ // After row transition, pendingSkipHint must be null — getNextKeyHint
should delegate
+ // to the filter's getNextCellHint (which returns null for
FixedSkipHintFilter).
+ assertNull(qm.getNextKeyHint(newRowCell));
+ }
+
+ /** Verify that the normal filterCell/getNextCellHint path is unaffected by
pendingSkipHint. */
+ @Test
+ public void testNormalFilterCellHintPathUnaffectedBySkipHintChange() throws
IOException {
+ long now = EnvironmentEdgeManager.currentTime();
+
+ final KeyValue filterHintCell = new KeyValue(row2, fam2, col1, now, data);
+
+ FilterBase seekFilter = new FilterBase() {
+ @Override
+ public ReturnCode filterCell(Cell c) {
+ return ReturnCode.SEEK_NEXT_USING_HINT;
+ }
+
+ @Override
+ public Cell getNextCellHint(Cell currentCell) {
+ return filterHintCell;
+ }
+ };
+
+ Scan scanWithFilter = new Scan().addFamily(fam2).setFilter(seekFilter);
+
+ UserScanQueryMatcher qm = UserScanQueryMatcher.create(scanWithFilter,
+ new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE,
HConstants.DEFAULT_BLOCKSIZE,
+ 0, rowComparator, false),
+ null, now - ttl, now, null);
+
+ KeyValue cell = new KeyValue(row1, fam2, col1, now - 10, data);
+ qm.setToNewRow(cell);
+
+ assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(cell));
+ assertEquals(filterHintCell, qm.getNextKeyHint(cell));
+ }
+
+ /** Verify that getSkipHint works correctly for reversed scans (hint must be
smaller). */
+ @Test
+ public void testSkipHintConsultedForReversedScan() throws IOException {
+ long now = EnvironmentEdgeManager.currentTime();
+ long minTs = now - 2000;
+ long maxTs = now - 1000;
+
+ // For reversed scan, the hint must point backward (smaller key).
+ KeyValue hintCell = new KeyValue(row1, fam2, col1, now - 1500, data);
+ Scan reversedScan = new Scan().addFamily(fam2).setTimeRange(minTs,
maxTs).setReversed(true)
+ .setFilter(new FixedSkipHintFilter(hintCell));
+
+ long now2 = EnvironmentEdgeManager.currentTime();
+ UserScanQueryMatcher qm = UserScanQueryMatcher.create(reversedScan,
+ new ScanInfo(this.conf, fam2, 0, 1, ttl, KeepDeletedCells.FALSE,
HConstants.DEFAULT_BLOCKSIZE,
+ 0, rowComparator, false),
+ null, 0, now2, null);
+
+ // ts=now2 >= maxTs so tsCmp > 0: time-range gate fires.
+ // row2 > row1 in natural order, so for reversed scan hint (row1) is
"forward" (smaller).
+ KeyValue tooNew = new KeyValue(row2, fam2, col1, now2, data);
+ qm.setToNewRow(tooNew);
+
+ assertEquals(MatchCode.SEEK_NEXT_USING_HINT, qm.match(tooNew));
+ assertEquals(hintCell, qm.getNextKeyHint(tooNew));
+ assertNull(qm.getNextKeyHint(tooNew));
+ }
+
/**
* After enough consecutive range delete markers, the matcher should switch
from SKIP to
* SEEK_NEXT_COL. Point deletes and KEEP_DELETED_CELLS always SKIP.