This is an automated email from the ASF dual-hosted git repository. stack pushed a commit to branch branch-2 in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2 by this push: new 8f16e34 HBASE-26122: Implement an optional maximum size for Gets, after which a partial result is returned (#3532) 8f16e34 is described below commit 8f16e34eb277097678de73aab1d242c959511ef7 Author: Bryan Beaudreault <bbeaudrea...@hubspot.com> AuthorDate: Tue Aug 10 23:38:06 2021 -0400 HBASE-26122: Implement an optional maximum size for Gets, after which a partial result is returned (#3532) Signed-off-by: stack <st...@apache.org> --- .../java/org/apache/hadoop/hbase/client/Get.java | 23 ++++++ .../apache/hadoop/hbase/protobuf/ProtobufUtil.java | 2 +- .../hadoop/hbase/shaded/protobuf/ProtobufUtil.java | 9 ++- .../src/main/protobuf/Client.proto | 2 + .../apache/hadoop/hbase/regionserver/HRegion.java | 36 ++++++--- .../hadoop/hbase/regionserver/RSRpcServices.java | 10 ++- .../hbase/TestPartialResultsFromClientSide.java | 48 ++++++++++++ .../hadoop/hbase/regionserver/TestHRegion.java | 85 ++++++++++++++++++++++ 8 files changed, 202 insertions(+), 13 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Get.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Get.java index a671b9f..53b7154 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Get.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Get.java @@ -76,6 +76,7 @@ public class Get extends Query implements Row { private boolean checkExistenceOnly = false; private boolean closestRowBefore = false; private Map<byte [], NavigableSet<byte []>> familyMap = new TreeMap<>(Bytes.BYTES_COMPARATOR); + private long maxResultSize = -1; /** * Create a Get operation for the specified row. @@ -339,6 +340,21 @@ public class Get extends Query implements Row { return this; } + /** + * Set the maximum result size. The default is -1; this means that no specific + * maximum result size will be set for this Get. + * + * If set to a value greater than zero, the server may respond with a Result where + * {@link Result#mayHaveMoreCellsInRow()} is true. The user is required to handle + * this case. + * + * @param maxResultSize The maximum result size in bytes + */ + public Get setMaxResultSize(long maxResultSize) { + this.maxResultSize = maxResultSize; + return this; + } + /* Accessors */ /** @@ -459,6 +475,13 @@ public class Get extends Query implements Row { } /** + * @return the maximum result size in bytes. See {@link #setMaxResultSize(long)} + */ + public long getMaxResultSize() { + return maxResultSize; + } + + /** * Compile the details beyond the scope of getFingerprint (row, columns, * timestamps, etc.) into a Map along with the fingerprinted information. * Useful for debugging, logging, and administration tools. diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java index 5a01af9..1c17866 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java @@ -1382,7 +1382,7 @@ public final class ProtobufUtil { return (cells == null || cells.isEmpty()) ? (proto.getStale() ? EMPTY_RESULT_STALE : EMPTY_RESULT) - : Result.create(cells, null, proto.getStale()); + : Result.create(cells, null, proto.getStale(), proto.getPartial()); } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java index c2544f6..d6c7811 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java @@ -592,6 +592,9 @@ public final class ProtobufUtil { if (proto.hasLoadColumnFamiliesOnDemand()) { get.setLoadColumnFamiliesOnDemand(proto.getLoadColumnFamiliesOnDemand()); } + if (proto.hasMaxResultSize()) { + get.setMaxResultSize(proto.getMaxResultSize()); + } return get; } @@ -1256,6 +1259,9 @@ public final class ProtobufUtil { if (loadColumnFamiliesOnDemand != null) { builder.setLoadColumnFamiliesOnDemand(loadColumnFamiliesOnDemand); } + if (get.getMaxResultSize() > 0) { + builder.setMaxResultSize(get.getMaxResultSize()); + } return builder.build(); } @@ -1457,6 +1463,7 @@ public final class ProtobufUtil { ClientProtos.Result.Builder builder = ClientProtos.Result.newBuilder(); builder.setAssociatedCellCount(size); builder.setStale(result.isStale()); + builder.setPartial(result.mayHaveMoreCellsInRow()); return builder.build(); } @@ -1547,7 +1554,7 @@ public final class ProtobufUtil { return (cells == null || cells.isEmpty()) ? (proto.getStale() ? EMPTY_RESULT_STALE : EMPTY_RESULT) - : Result.create(cells, null, proto.getStale()); + : Result.create(cells, null, proto.getStale(), proto.getPartial()); } diff --git a/hbase-protocol-shaded/src/main/protobuf/Client.proto b/hbase-protocol-shaded/src/main/protobuf/Client.proto index 13917b6..7081d50 100644 --- a/hbase-protocol-shaded/src/main/protobuf/Client.proto +++ b/hbase-protocol-shaded/src/main/protobuf/Client.proto @@ -90,6 +90,8 @@ message Get { optional Consistency consistency = 12 [default = STRONG]; repeated ColumnFamilyTimeRange cf_time_range = 13; optional bool load_column_families_on_demand = 14; /* DO NOT add defaults to load_column_families_on_demand. */ + + optional uint64 max_result_size = 15; } message Result { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 6628328..9751db8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -146,6 +146,7 @@ import org.apache.hadoop.hbase.quotas.RegionServerSpaceQuotaManager; import org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry; import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext; import org.apache.hadoop.hbase.regionserver.compactions.CompactionLifeCycleTracker; +import org.apache.hadoop.hbase.regionserver.ScannerContext.LimitScope; import org.apache.hadoop.hbase.regionserver.throttle.CompactionThroughputControllerFactory; import org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController; import org.apache.hadoop.hbase.regionserver.throttle.StoreHotnessProtector; @@ -3864,8 +3865,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi Result result; if (returnResults) { // convert duplicate increment/append to get - List<Cell> results = region.get(toGet(mutation), false, nonceGroup, nonce); - result = Result.create(results); + result = region.get(toGet(mutation), false, nonceGroup, nonce); } else { result = Result.EMPTY_RESULT; } @@ -7497,9 +7497,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi @Override public Result get(final Get get) throws IOException { prepareGet(get); - List<Cell> results = get(get, true); - boolean stale = this.getRegionInfo().getReplicaId() != 0; - return Result.create(results, get.isCheckExistenceOnly() ? !results.isEmpty() : null, stale); + return get(get, true, HConstants.NO_NONCE, HConstants.NO_NONCE); } void prepareGet(final Get get) throws IOException { @@ -7518,11 +7516,31 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi @Override public List<Cell> get(Get get, boolean withCoprocessor) throws IOException { - return get(get, withCoprocessor, HConstants.NO_NONCE, HConstants.NO_NONCE); + return getInternal(get, null, withCoprocessor, HConstants.NO_NONCE, HConstants.NO_NONCE); } - private List<Cell> get(Get get, boolean withCoprocessor, long nonceGroup, long nonce) - throws IOException { + private Result get(Get get, boolean withCoprocessor, long nonceGroup, long nonce) + throws IOException { + ScannerContext scannerContext = get.getMaxResultSize() > 0 + ? ScannerContext.newBuilder() + .setSizeLimit(LimitScope.BETWEEN_CELLS, get.getMaxResultSize(), get.getMaxResultSize()) + .build() + : null; + + List<Cell> result = getInternal(get, scannerContext, withCoprocessor, nonceGroup, nonce); + boolean stale = this.getRegionInfo().getReplicaId() != 0; + boolean mayHaveMoreCellsInRow = + scannerContext != null && scannerContext.mayHaveMoreCellsInRow(); + + return Result.create( + result, + get.isCheckExistenceOnly() ? !result.isEmpty() : null, + stale, + mayHaveMoreCellsInRow); + } + + private List<Cell> getInternal(Get get, ScannerContext scannerContext, boolean withCoprocessor, + long nonceGroup, long nonce) throws IOException { List<Cell> results = new ArrayList<>(); long before = EnvironmentEdgeManager.currentTime(); @@ -7539,7 +7557,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } try (RegionScanner scanner = getScanner(scan, null, nonceGroup, nonce)) { List<Cell> tmp = new ArrayList<>(); - scanner.next(tmp); + scanner.next(tmp, scannerContext); // Copy EC to heap, then close the scanner. // This can be an EXPENSIVE call. It may make an extra copy from offheap to onheap buffers. // See more details in HBASE-26036. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java index 06e7ccf..b715c09 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java @@ -2668,10 +2668,15 @@ public class RSRpcServices implements HBaseRPCErrorHandler, if (scan.getLoadColumnFamiliesOnDemandValue() == null) { scan.setLoadColumnFamiliesOnDemand(region.isLoadingCfsOnDemandDefault()); } + + ScannerContext scannerContext = ScannerContext.newBuilder() + .setSizeLimit(LimitScope.BETWEEN_CELLS, get.getMaxResultSize(), get.getMaxResultSize()) + .build(); + RegionScannerImpl scanner = null; try { scanner = region.getScanner(scan); - scanner.next(results); + scanner.next(results, scannerContext); } finally { if (scanner != null) { if (closeCallBack == null) { @@ -2696,7 +2701,8 @@ public class RSRpcServices implements HBaseRPCErrorHandler, } region.metricsUpdateForGet(results, before); - return Result.create(results, get.isCheckExistenceOnly() ? !results.isEmpty() : null, stale); + return Result.create(results, get.isCheckExistenceOnly() ? !results.isEmpty() : null, stale, + scannerContext.mayHaveMoreCellsInRow()); } private void checkBatchSizeAndLogLargeSize(MultiRequest request) throws ServiceException { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java index 4e2d133..19fb996 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java @@ -31,18 +31,26 @@ import java.util.List; import java.util.Set; import org.apache.hadoop.hbase.client.ClientScanner; import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.filter.BinaryComparator; +import org.apache.hadoop.hbase.filter.ByteArrayComparable; import org.apache.hadoop.hbase.filter.ColumnPrefixFilter; import org.apache.hadoop.hbase.filter.ColumnRangeFilter; +import org.apache.hadoop.hbase.filter.FamilyFilter; import org.apache.hadoop.hbase.filter.Filter; +import org.apache.hadoop.hbase.filter.FilterList; +import org.apache.hadoop.hbase.filter.FilterListWithAND; import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter; import org.apache.hadoop.hbase.filter.FirstKeyValueMatchingQualifiersFilter; +import org.apache.hadoop.hbase.filter.PageFilter; import org.apache.hadoop.hbase.filter.RandomRowFilter; +import org.apache.hadoop.hbase.protobuf.generated.FilterProtos; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.ClassSize; @@ -136,6 +144,46 @@ public class TestPartialResultsFromClientSide { TEST_UTIL.shutdownMiniCluster(); } + @Test + public void testGetPartialResults() throws Exception { + byte[] row = ROWS[0]; + + Result result; + int cf = 0; + int qf = 0; + int total = 0; + + do { + // this will ensure we always return only 1 result + Get get = new Get(row) + .setMaxResultSize(1); + + // we want to page through the entire row, this will ensure we always get the next + if (total > 0) { + get.setFilter(new FilterList(FilterList.Operator.MUST_PASS_ALL, + new ColumnRangeFilter(QUALIFIERS[qf], true, null, false), + new FamilyFilter(CompareOperator.GREATER_OR_EQUAL, new BinaryComparator(FAMILIES[cf])))); + } + + // all values are the same, but there should be a value + result = TABLE.get(get); + assertTrue(String.format("Value for family %s (# %s) and qualifier %s (# %s)", + Bytes.toStringBinary(FAMILIES[cf]), cf, Bytes.toStringBinary(QUALIFIERS[qf]), qf), + Bytes.equals(VALUE, result.getValue(FAMILIES[cf], QUALIFIERS[qf]))); + + total++; + if (++qf >= NUM_QUALIFIERS) { + cf++; + qf = 0; + } + } while (result.mayHaveMoreCellsInRow()); + + // ensure we iterated all cells in row + assertEquals(NUM_COLS, total); + assertEquals(NUM_FAMILIES, cf); + assertEquals(0, qf); + } + /** * Ensure that the expected key values appear in a result returned from a scanner that is * combining partial results into complete results diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java index 9763841..3d00eb8 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java @@ -7861,4 +7861,89 @@ public class TestHRegion { assertFalse("Region lock holder should not have been interrupted", holderInterrupted.get()); } + @Test + public void testOversizedGetsReturnPartialResult() throws IOException { + HRegion region = initHRegion(tableName, name.getMethodName(), CONF, fam1); + + Put p = new Put(row) + .addColumn(fam1, qual1, value1) + .addColumn(fam1, qual2, value2); + + region.put(p); + + Get get = new Get(row) + .addColumn(fam1, qual1) + .addColumn(fam1, qual2) + .setMaxResultSize(1); // 0 doesn't count as a limit, according to HBase + + Result r = region.get(get); + + assertTrue("Expected partial result, but result was not marked as partial", r.mayHaveMoreCellsInRow()); + } + + @Test + public void testGetsWithoutResultSizeLimitAreNotPartial() throws IOException { + HRegion region = initHRegion(tableName, name.getMethodName(), CONF, fam1); + + Put p = new Put(row) + .addColumn(fam1, qual1, value1) + .addColumn(fam1, qual2, value2); + + region.put(p); + + Get get = new Get(row) + .addColumn(fam1, qual1) + .addColumn(fam1, qual2); + + Result r = region.get(get); + + assertFalse("Expected full result, but it was marked as partial", r.mayHaveMoreCellsInRow()); + assertTrue(Bytes.equals(value1, r.getValue(fam1, qual1))); + assertTrue(Bytes.equals(value2, r.getValue(fam1, qual2))); + } + + @Test + public void testGetsWithinResultSizeLimitAreNotPartial() throws IOException { + HRegion region = initHRegion(tableName, name.getMethodName(), CONF, fam1); + + Put p = new Put(row) + .addColumn(fam1, qual1, value1) + .addColumn(fam1, qual2, value2); + + region.put(p); + + Get get = new Get(row) + .addColumn(fam1, qual1) + .addColumn(fam1, qual2) + .setMaxResultSize(Long.MAX_VALUE); + + Result r = region.get(get); + + assertFalse("Expected full result, but it was marked as partial", r.mayHaveMoreCellsInRow()); + assertTrue(Bytes.equals(value1, r.getValue(fam1, qual1))); + assertTrue(Bytes.equals(value2, r.getValue(fam1, qual2))); + } + + @Test + public void testGetsWithResultSizeLimitReturnPartialResults() throws IOException { + HRegion region = initHRegion(tableName, name.getMethodName(), CONF, fam1); + + Put p = new Put(row) + .addColumn(fam1, qual1, value1) + .addColumn(fam1, qual2, value2); + + region.put(p); + + Get get = new Get(row) + .addColumn(fam1, qual1) + .addColumn(fam1, qual2) + .setMaxResultSize(10); + + Result r = region.get(get); + + assertTrue("Expected partial result, but it was marked as complete", r.mayHaveMoreCellsInRow()); + assertTrue(Bytes.equals(value1, r.getValue(fam1, qual1))); + assertEquals("Got more results than expected", 1, r.size()); + } + }