This is an automated email from the ASF dual-hosted git repository.
junegunn pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/master by this push:
new 7b27d0941df HBASE-30036 Skip redundant delete markers during flush and
minor compaction (#7993)
7b27d0941df is described below
commit 7b27d0941df5e33ea9cfd9daf622b4ac4ed9c818
Author: Junegunn Choi <[email protected]>
AuthorDate: Wed Apr 8 11:19:54 2026 +0900
HBASE-30036 Skip redundant delete markers during flush and minor compaction
(#7993)
Add DeleteTracker.isRedundantDelete() to detect when a delete marker is
already covered by a previously tracked delete of equal or broader scope.
ScanDeleteTracker implements this for all four delete types:
- DeleteFamily/DeleteFamilyVersion: covered by a tracked DeleteFamily
- DeleteColumn/Delete: covered by a tracked DeleteFamily or DeleteColumn
MinorCompactionScanQueryMatcher calls this check before including a
delete marker, returning SEEK_NEXT_COL to skip past all remaining cells
covered by the previously tracked delete.
Compatible with KEEP_DELETED_CELLS. When set to TRUE, trackDelete() does
not populate the delete tracker, so isRedundantDelete() always returns
false and all markers are retained.
Signed-off-by: Charles Connell <[email protected]>
---
.../regionserver/querymatcher/DeleteTracker.java | 14 +++
.../MinorCompactionScanQueryMatcher.java | 14 +++
.../querymatcher/ScanDeleteTracker.java | 22 ++++
.../hbase/regionserver/TestStoreFileWriter.java | 11 +-
.../TestCompactionScanQueryMatcher.java | 116 +++++++++++++++++++++
5 files changed, 175 insertions(+), 2 deletions(-)
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/DeleteTracker.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/DeleteTracker.java
index 53816e9f3f3..1a66b99571a 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/DeleteTracker.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/DeleteTracker.java
@@ -83,6 +83,20 @@ public interface DeleteTracker extends ShipperListener {
// deleted in strong semantics of versions(See MvccTracker)
}
+ /**
+ * Check if the given delete marker is redundant, i.e., it is already
covered by a previously
+ * tracked delete of equal or broader scope. A DeleteFamily is redundant if
a DeleteFamily with a
+ * higher timestamp was already seen. A DeleteColumn is redundant if a
DeleteColumn for the same
+ * qualifier with a higher timestamp, or a DeleteFamily with a higher
timestamp, was already seen.
+ * <p>
+ * This is a read-only check with no side effects on tracker state.
+ * @param cell the delete marker cell to check
+ * @return true if the delete marker is redundant and can be skipped
+ */
+ default boolean isRedundantDelete(ExtendedCell cell) {
+ return false;
+ }
+
/**
* Return the comparator passed to this delete tracker
* @return the cell comparator
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/MinorCompactionScanQueryMatcher.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/MinorCompactionScanQueryMatcher.java
index 847eff44b31..6fbb138197e 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/MinorCompactionScanQueryMatcher.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/MinorCompactionScanQueryMatcher.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.regionserver.querymatcher;
import java.io.IOException;
import org.apache.hadoop.hbase.ExtendedCell;
+import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.regionserver.ScanInfo;
import org.apache.yetus.audience.InterfaceAudience;
@@ -47,6 +48,19 @@ public class MinorCompactionScanQueryMatcher extends
CompactionScanQueryMatcher
// we should not use this delete marker to mask any cell yet.
return MatchCode.INCLUDE;
}
+ // Check before tracking: an older DeleteColumn or DeleteFamily is
redundant if a newer
+ // one of equal or broader scope was already seen. Must check before
trackDelete() since
+ // that overwrites tracker state. Seek past remaining cells for this
column/row since
+ // they are all covered by the previously tracked delete.
+ if (deletes.isRedundantDelete(cell)) {
+ // Skip seeking for deletes with empty qualifier, not to skip a
subsequent
+ // DeleteFamily marker that covers other qualifiers. DeleteFamily
itself can seek
+ // safely because all remaining empty-qualifier cells are redundant
under it.
+ if (cell.getQualifierLength() == 0 && typeByte !=
KeyValue.Type.DeleteFamily.getCode()) {
+ return MatchCode.SKIP;
+ }
+ return columns.getNextRowOrNextColumn(cell);
+ }
trackDelete(cell);
return MatchCode.INCLUDE;
}
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/ScanDeleteTracker.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/ScanDeleteTracker.java
index efe09cce722..65b6f2d2187 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/ScanDeleteTracker.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/querymatcher/ScanDeleteTracker.java
@@ -142,6 +142,28 @@ public class ScanDeleteTracker implements DeleteTracker {
return DeleteResult.NOT_DELETED;
}
+ @Override
+ public boolean isRedundantDelete(ExtendedCell cell) {
+ byte type = cell.getTypeByte();
+ boolean coveredByFamily = hasFamilyStamp && cell.getTimestamp() <=
familyStamp;
+
+ if (
+ type == KeyValue.Type.DeleteFamily.getCode()
+ || type == KeyValue.Type.DeleteFamilyVersion.getCode()
+ ) {
+ return coveredByFamily;
+ }
+
+ boolean coveredByColumn =
+ deleteCell != null && deleteType == KeyValue.Type.DeleteColumn.getCode()
+ && CellUtil.matchingQualifier(cell, deleteCell) && cell.getTimestamp()
<= deleteTimestamp;
+
+ if (type == KeyValue.Type.DeleteColumn.getCode() || type ==
KeyValue.Type.Delete.getCode()) {
+ return coveredByFamily || coveredByColumn;
+ }
+ return false;
+ }
+
@Override
public boolean isEmpty() {
return deleteCell == null && !hasFamilyStamp &&
familyVersionStamps.isEmpty();
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java
index 6c27bc4082b..de84d4daa5e 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestStoreFileWriter.java
@@ -177,8 +177,15 @@ public class TestStoreFileWriter {
stores[0].getStorefilesCount());
regions[1].compact(false);
- assertEquals(flushCount - stores[1].getCompactedFiles().size() + 2,
- stores[1].getStorefilesCount());
+ // HBASE-30036 skips redundant delete markers during minor compaction, so
the historical
+ // file may end up empty and not be created. The count can be +1 or +2.
+ int minorCompactedCount = stores[1].getStorefilesCount();
+ int expectedMin = flushCount - stores[1].getCompactedFiles().size() + 1;
+ int expectedMax = flushCount - stores[1].getCompactedFiles().size() + 2;
+ assertTrue(
+ "Expected store file count between " + expectedMin + " and " +
expectedMax + " but was "
+ + minorCompactedCount,
+ minorCompactedCount >= expectedMin && minorCompactedCount <=
expectedMax);
verifyCells();
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestCompactionScanQueryMatcher.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestCompactionScanQueryMatcher.java
index 400c2fd2b60..17681d2f014 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestCompactionScanQueryMatcher.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/querymatcher/TestCompactionScanQueryMatcher.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hbase.regionserver.querymatcher;
import static
org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher.MatchCode.INCLUDE;
+import static
org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher.MatchCode.SEEK_NEXT_COL;
import static
org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher.MatchCode.SKIP;
import static org.junit.Assert.assertEquals;
@@ -75,6 +76,121 @@ public class TestCompactionScanQueryMatcher extends
AbstractTestScanQueryMatcher
testDropDeletes(row2, row3, new byte[][] { row1, row1 }, INCLUDE, INCLUDE);
}
+ /**
+ * Test redundant delete marker handling with COMPACT_RETAIN_DELETES. Cells
are auto-generated
+ * from the given types with decrementing timestamps.
+ */
+ @Test
+ public void testSkipsRedundantDeleteMarkers() throws IOException {
+ // Interleaved DeleteColumn + Put. First DC included, put triggers
SEEK_NEXT_COL.
+ assertRetainDeletes(new Type[] { Type.DeleteColumn, Type.Put,
Type.DeleteColumn }, INCLUDE,
+ SEEK_NEXT_COL);
+
+ // Contiguous DeleteColumn. First included, rest redundant.
+ assertRetainDeletes(new Type[] { Type.DeleteColumn, Type.DeleteColumn,
Type.DeleteColumn },
+ INCLUDE, SEEK_NEXT_COL, SEEK_NEXT_COL);
+
+ // Contiguous DeleteFamily. First included, rest redundant.
+ assertRetainDeletes(new Type[] { Type.DeleteFamily, Type.DeleteFamily,
Type.DeleteFamily },
+ INCLUDE, SEEK_NEXT_COL, SEEK_NEXT_COL);
+
+ // DF + DFV interleaved. DF included, DFV redundant (SKIP because empty
qualifier),
+ // older DF redundant (SEEK_NEXT_COL), older DFV redundant (SKIP).
+ assertRetainDeletes(new Type[] { Type.DeleteFamily,
Type.DeleteFamilyVersion, Type.DeleteFamily,
+ Type.DeleteFamilyVersion }, INCLUDE, SKIP, SEEK_NEXT_COL, SKIP);
+
+ // Delete (version) covered by DeleteColumn.
+ assertRetainDeletes(new Type[] { Type.DeleteColumn, Type.Delete,
Type.Delete, Type.Delete },
+ INCLUDE, SEEK_NEXT_COL, SEEK_NEXT_COL, SEEK_NEXT_COL);
+
+ // KEEP_DELETED_CELLS=TRUE: all markers retained.
+ assertRetainDeletes(KeepDeletedCells.TRUE,
+ new Type[] { Type.DeleteColumn, Type.DeleteColumn, Type.DeleteColumn },
INCLUDE, INCLUDE,
+ INCLUDE);
+ }
+
+ /**
+ * Redundant column-level deletes with empty qualifier must not seek past a
subsequent
+ * DeleteFamily. getKeyForNextColumn treats empty qualifier as "no column"
and returns
+ * SEEK_NEXT_ROW, which would skip the DF and all remaining cells in the row.
+ */
+ @Test
+ public void testEmptyQualifierDeleteDoesNotSkipDeleteFamily() throws
IOException {
+ byte[] emptyQualifier = HConstants.EMPTY_BYTE_ARRAY;
+
+ // DC(empty) + DC(empty) redundant + DF must still be reachable.
+ assertRetainDeletes(emptyQualifier,
+ new Type[] { Type.DeleteColumn, Type.DeleteColumn, Type.DeleteFamily },
INCLUDE, SKIP,
+ INCLUDE);
+
+ // DC(empty) + Delete(empty) redundant + DF must still be reachable.
+ assertRetainDeletes(emptyQualifier,
+ new Type[] { Type.DeleteColumn, Type.Delete, Type.DeleteFamily },
INCLUDE, SKIP, INCLUDE);
+ }
+
+ private void assertRetainDeletes(Type[] types, MatchCode... expected) throws
IOException {
+ assertRetainDeletes(KeepDeletedCells.FALSE, types, expected);
+ }
+
+ private void assertRetainDeletes(byte[] qualifier, Type[] types,
MatchCode... expected)
+ throws IOException {
+ assertRetainDeletes(KeepDeletedCells.FALSE, qualifier, types, expected);
+ }
+
+ /**
+ * Build cells from the given types with decrementing timestamps (same ts
for adjacent
+ * family-level and column-level types at the same position). Family-level
types (DeleteFamily,
+ * DeleteFamilyVersion) use empty qualifier; others use col1.
+ */
+ private void assertRetainDeletes(KeepDeletedCells keepDeletedCells, Type[]
types,
+ MatchCode... expected) throws IOException {
+ assertRetainDeletes(keepDeletedCells, null, types, expected);
+ }
+
+ /**
+ * Build cells from the given types with decrementing timestamps. If
qualifier is null,
+ * family-level types use empty qualifier and others use col1. If qualifier
is specified, all
+ * types use that qualifier.
+ */
+ private void assertRetainDeletes(KeepDeletedCells keepDeletedCells, byte[]
qualifier,
+ Type[] types, MatchCode... expected) throws IOException {
+ long now = EnvironmentEdgeManager.currentTime();
+ ScanInfo scanInfo = new ScanInfo(this.conf, fam1, 0, 1, ttl,
keepDeletedCells,
+ HConstants.DEFAULT_BLOCKSIZE, 0, rowComparator, false);
+ CompactionScanQueryMatcher qm = CompactionScanQueryMatcher.create(scanInfo,
+ ScanType.COMPACT_RETAIN_DELETES, 0L, PrivateConstants.OLDEST_TIMESTAMP,
+ PrivateConstants.OLDEST_TIMESTAMP, now, null, null, null);
+ qm.setToNewRow(KeyValueUtil.createFirstOnRow(row1));
+
+ long ts = now;
+ List<MatchCode> actual = new ArrayList<>(expected.length);
+ for (int i = 0; i < types.length; i++) {
+ byte[] qual;
+ if (qualifier != null) {
+ qual = qualifier;
+ } else {
+ boolean familyLevel = types[i] == Type.DeleteFamily || types[i] ==
Type.DeleteFamilyVersion;
+ qual = familyLevel ? HConstants.EMPTY_BYTE_ARRAY : col1;
+ }
+ KeyValue kv = types[i] == Type.Put
+ ? new KeyValue(row1, fam1, qual, ts, types[i], data)
+ : new KeyValue(row1, fam1, qual, ts, types[i]);
+ actual.add(qm.match(kv));
+ if (actual.size() >= expected.length) {
+ break;
+ }
+ // Decrement ts for next cell, but keep same ts when the next type has
lower type code
+ // at the same logical position (e.g. DF then DFV at the same timestamp).
+ if (i + 1 < types.length && types[i + 1].getCode() < types[i].getCode())
{
+ continue;
+ }
+ ts--;
+ }
+ for (int i = 0; i < expected.length; i++) {
+ assertEquals("Mismatch at index " + i, expected[i], actual.get(i));
+ }
+ }
+
private void testDropDeletes(byte[] from, byte[] to, byte[][] rows,
MatchCode... expected)
throws IOException {
long now = EnvironmentEdgeManager.currentTime();