This is an automated email from the ASF dual-hosted git repository.

stack pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2 by this push:
     new 8f16e34  HBASE-26122: Implement an optional maximum size for Gets, 
after which a partial result is returned (#3532)
8f16e34 is described below

commit 8f16e34eb277097678de73aab1d242c959511ef7
Author: Bryan Beaudreault <bbeaudrea...@hubspot.com>
AuthorDate: Tue Aug 10 23:38:06 2021 -0400

    HBASE-26122: Implement an optional maximum size for Gets, after which a 
partial result is returned (#3532)
    
    
    Signed-off-by: stack <st...@apache.org>
---
 .../java/org/apache/hadoop/hbase/client/Get.java   | 23 ++++++
 .../apache/hadoop/hbase/protobuf/ProtobufUtil.java |  2 +-
 .../hadoop/hbase/shaded/protobuf/ProtobufUtil.java |  9 ++-
 .../src/main/protobuf/Client.proto                 |  2 +
 .../apache/hadoop/hbase/regionserver/HRegion.java  | 36 ++++++---
 .../hadoop/hbase/regionserver/RSRpcServices.java   | 10 ++-
 .../hbase/TestPartialResultsFromClientSide.java    | 48 ++++++++++++
 .../hadoop/hbase/regionserver/TestHRegion.java     | 85 ++++++++++++++++++++++
 8 files changed, 202 insertions(+), 13 deletions(-)

diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Get.java 
b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Get.java
index a671b9f..53b7154 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Get.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Get.java
@@ -76,6 +76,7 @@ public class Get extends Query implements Row {
   private boolean checkExistenceOnly = false;
   private boolean closestRowBefore = false;
   private Map<byte [], NavigableSet<byte []>> familyMap = new 
TreeMap<>(Bytes.BYTES_COMPARATOR);
+  private long maxResultSize = -1;
 
   /**
    * Create a Get operation for the specified row.
@@ -339,6 +340,21 @@ public class Get extends Query implements Row {
     return this;
   }
 
+  /**
+   * Set the maximum result size. The default is -1; this means that no 
specific
+   * maximum result size will be set for this Get.
+   *
+   * If set to a value greater than zero, the server may respond with a Result 
where
+   * {@link Result#mayHaveMoreCellsInRow()} is true. The user is required to 
handle
+   * this case.
+   *
+   * @param maxResultSize The maximum result size in bytes
+   */
+  public Get setMaxResultSize(long maxResultSize) {
+    this.maxResultSize = maxResultSize;
+    return this;
+  }
+
   /* Accessors */
 
   /**
@@ -459,6 +475,13 @@ public class Get extends Query implements Row {
   }
 
   /**
+   * @return the maximum result size in bytes. See {@link 
#setMaxResultSize(long)}
+   */
+  public long getMaxResultSize() {
+    return maxResultSize;
+  }
+
+  /**
    * Compile the details beyond the scope of getFingerprint (row, columns,
    * timestamps, etc.) into a Map along with the fingerprinted information.
    * Useful for debugging, logging, and administration tools.
diff --git 
a/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java 
b/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java
index 5a01af9..1c17866 100644
--- 
a/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java
+++ 
b/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java
@@ -1382,7 +1382,7 @@ public final class ProtobufUtil {
 
     return (cells == null || cells.isEmpty())
         ? (proto.getStale() ? EMPTY_RESULT_STALE : EMPTY_RESULT)
-        : Result.create(cells, null, proto.getStale());
+        : Result.create(cells, null, proto.getStale(), proto.getPartial());
   }
 
 
diff --git 
a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java
 
b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java
index c2544f6..d6c7811 100644
--- 
a/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java
+++ 
b/hbase-client/src/main/java/org/apache/hadoop/hbase/shaded/protobuf/ProtobufUtil.java
@@ -592,6 +592,9 @@ public final class ProtobufUtil {
     if (proto.hasLoadColumnFamiliesOnDemand()) {
       get.setLoadColumnFamiliesOnDemand(proto.getLoadColumnFamiliesOnDemand());
     }
+    if (proto.hasMaxResultSize()) {
+      get.setMaxResultSize(proto.getMaxResultSize());
+    }
     return get;
   }
 
@@ -1256,6 +1259,9 @@ public final class ProtobufUtil {
     if (loadColumnFamiliesOnDemand != null) {
       builder.setLoadColumnFamiliesOnDemand(loadColumnFamiliesOnDemand);
     }
+    if (get.getMaxResultSize() > 0) {
+      builder.setMaxResultSize(get.getMaxResultSize());
+    }
     return builder.build();
   }
 
@@ -1457,6 +1463,7 @@ public final class ProtobufUtil {
     ClientProtos.Result.Builder builder = ClientProtos.Result.newBuilder();
     builder.setAssociatedCellCount(size);
     builder.setStale(result.isStale());
+    builder.setPartial(result.mayHaveMoreCellsInRow());
     return builder.build();
   }
 
@@ -1547,7 +1554,7 @@ public final class ProtobufUtil {
 
     return (cells == null || cells.isEmpty())
         ? (proto.getStale() ? EMPTY_RESULT_STALE : EMPTY_RESULT)
-        : Result.create(cells, null, proto.getStale());
+        : Result.create(cells, null, proto.getStale(), proto.getPartial());
   }
 
 
diff --git a/hbase-protocol-shaded/src/main/protobuf/Client.proto 
b/hbase-protocol-shaded/src/main/protobuf/Client.proto
index 13917b6..7081d50 100644
--- a/hbase-protocol-shaded/src/main/protobuf/Client.proto
+++ b/hbase-protocol-shaded/src/main/protobuf/Client.proto
@@ -90,6 +90,8 @@ message Get {
   optional Consistency consistency = 12 [default = STRONG];
   repeated ColumnFamilyTimeRange cf_time_range = 13;
   optional bool load_column_families_on_demand = 14; /* DO NOT add defaults to 
load_column_families_on_demand. */
+
+  optional uint64 max_result_size = 15;
 }
 
 message Result {
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
index 6628328..9751db8 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
@@ -146,6 +146,7 @@ import 
org.apache.hadoop.hbase.quotas.RegionServerSpaceQuotaManager;
 import 
org.apache.hadoop.hbase.regionserver.MultiVersionConcurrencyControl.WriteEntry;
 import org.apache.hadoop.hbase.regionserver.compactions.CompactionContext;
 import 
org.apache.hadoop.hbase.regionserver.compactions.CompactionLifeCycleTracker;
+import org.apache.hadoop.hbase.regionserver.ScannerContext.LimitScope;
 import 
org.apache.hadoop.hbase.regionserver.throttle.CompactionThroughputControllerFactory;
 import 
org.apache.hadoop.hbase.regionserver.throttle.NoLimitThroughputController;
 import org.apache.hadoop.hbase.regionserver.throttle.StoreHotnessProtector;
@@ -3864,8 +3865,7 @@ public class HRegion implements HeapSize, 
PropagatingConfigurationObserver, Regi
             Result result;
             if (returnResults) {
               // convert duplicate increment/append to get
-              List<Cell> results = region.get(toGet(mutation), false, 
nonceGroup, nonce);
-              result = Result.create(results);
+              result = region.get(toGet(mutation), false, nonceGroup, nonce);
             } else {
               result = Result.EMPTY_RESULT;
             }
@@ -7497,9 +7497,7 @@ public class HRegion implements HeapSize, 
PropagatingConfigurationObserver, Regi
   @Override
   public Result get(final Get get) throws IOException {
     prepareGet(get);
-    List<Cell> results = get(get, true);
-    boolean stale = this.getRegionInfo().getReplicaId() != 0;
-    return Result.create(results, get.isCheckExistenceOnly() ? 
!results.isEmpty() : null, stale);
+    return get(get, true, HConstants.NO_NONCE, HConstants.NO_NONCE);
   }
 
   void prepareGet(final Get get) throws IOException {
@@ -7518,11 +7516,31 @@ public class HRegion implements HeapSize, 
PropagatingConfigurationObserver, Regi
 
   @Override
   public List<Cell> get(Get get, boolean withCoprocessor) throws IOException {
-    return get(get, withCoprocessor, HConstants.NO_NONCE, HConstants.NO_NONCE);
+    return getInternal(get, null, withCoprocessor, HConstants.NO_NONCE, 
HConstants.NO_NONCE);
   }
 
-  private List<Cell> get(Get get, boolean withCoprocessor, long nonceGroup, 
long nonce)
-      throws IOException {
+  private Result get(Get get, boolean withCoprocessor, long nonceGroup, long 
nonce)
+    throws IOException {
+    ScannerContext scannerContext = get.getMaxResultSize() > 0
+      ? ScannerContext.newBuilder()
+      .setSizeLimit(LimitScope.BETWEEN_CELLS, get.getMaxResultSize(), 
get.getMaxResultSize())
+      .build()
+      : null;
+
+    List<Cell> result = getInternal(get, scannerContext, withCoprocessor, 
nonceGroup, nonce);
+    boolean stale = this.getRegionInfo().getReplicaId() != 0;
+    boolean mayHaveMoreCellsInRow =
+      scannerContext != null && scannerContext.mayHaveMoreCellsInRow();
+
+    return Result.create(
+      result,
+      get.isCheckExistenceOnly() ? !result.isEmpty() : null,
+      stale,
+      mayHaveMoreCellsInRow);
+  }
+
+  private List<Cell> getInternal(Get get, ScannerContext scannerContext, 
boolean withCoprocessor,
+    long nonceGroup, long nonce) throws IOException {
     List<Cell> results = new ArrayList<>();
     long before =  EnvironmentEdgeManager.currentTime();
 
@@ -7539,7 +7557,7 @@ public class HRegion implements HeapSize, 
PropagatingConfigurationObserver, Regi
     }
     try (RegionScanner scanner = getScanner(scan, null, nonceGroup, nonce)) {
       List<Cell> tmp = new ArrayList<>();
-      scanner.next(tmp);
+      scanner.next(tmp, scannerContext);
       // Copy EC to heap, then close the scanner.
       // This can be an EXPENSIVE call. It may make an extra copy from offheap 
to onheap buffers.
       // See more details in HBASE-26036.
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
index 06e7ccf..b715c09 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java
@@ -2668,10 +2668,15 @@ public class RSRpcServices implements 
HBaseRPCErrorHandler,
     if (scan.getLoadColumnFamiliesOnDemandValue() == null) {
       scan.setLoadColumnFamiliesOnDemand(region.isLoadingCfsOnDemandDefault());
     }
+
+    ScannerContext scannerContext = ScannerContext.newBuilder()
+      .setSizeLimit(LimitScope.BETWEEN_CELLS, get.getMaxResultSize(), 
get.getMaxResultSize())
+      .build();
+
     RegionScannerImpl scanner = null;
     try {
       scanner = region.getScanner(scan);
-      scanner.next(results);
+      scanner.next(results, scannerContext);
     } finally {
       if (scanner != null) {
         if (closeCallBack == null) {
@@ -2696,7 +2701,8 @@ public class RSRpcServices implements 
HBaseRPCErrorHandler,
     }
     region.metricsUpdateForGet(results, before);
 
-    return Result.create(results, get.isCheckExistenceOnly() ? 
!results.isEmpty() : null, stale);
+    return Result.create(results, get.isCheckExistenceOnly() ? 
!results.isEmpty() : null, stale,
+      scannerContext.mayHaveMoreCellsInRow());
   }
 
   private void checkBatchSizeAndLogLargeSize(MultiRequest request) throws 
ServiceException {
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java
index 4e2d133..19fb996 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestPartialResultsFromClientSide.java
@@ -31,18 +31,26 @@ import java.util.List;
 import java.util.Set;
 import org.apache.hadoop.hbase.client.ClientScanner;
 import org.apache.hadoop.hbase.client.Delete;
+import org.apache.hadoop.hbase.client.Get;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.client.ResultScanner;
 import org.apache.hadoop.hbase.client.Scan;
 import org.apache.hadoop.hbase.client.Table;
+import org.apache.hadoop.hbase.filter.BinaryComparator;
+import org.apache.hadoop.hbase.filter.ByteArrayComparable;
 import org.apache.hadoop.hbase.filter.ColumnPrefixFilter;
 import org.apache.hadoop.hbase.filter.ColumnRangeFilter;
+import org.apache.hadoop.hbase.filter.FamilyFilter;
 import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.filter.FilterList;
+import org.apache.hadoop.hbase.filter.FilterListWithAND;
 import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
 import org.apache.hadoop.hbase.filter.FirstKeyValueMatchingQualifiersFilter;
+import org.apache.hadoop.hbase.filter.PageFilter;
 import org.apache.hadoop.hbase.filter.RandomRowFilter;
+import org.apache.hadoop.hbase.protobuf.generated.FilterProtos;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.ClassSize;
@@ -136,6 +144,46 @@ public class TestPartialResultsFromClientSide {
     TEST_UTIL.shutdownMiniCluster();
   }
 
+  @Test
+  public void testGetPartialResults() throws Exception {
+    byte[] row = ROWS[0];
+
+    Result result;
+    int cf = 0;
+    int qf = 0;
+    int total = 0;
+
+    do {
+      // this will ensure we always return only 1 result
+      Get get = new Get(row)
+        .setMaxResultSize(1);
+
+      // we want to page through the entire row, this will ensure we always 
get the next
+      if (total > 0) {
+        get.setFilter(new FilterList(FilterList.Operator.MUST_PASS_ALL,
+          new ColumnRangeFilter(QUALIFIERS[qf], true, null, false),
+          new FamilyFilter(CompareOperator.GREATER_OR_EQUAL, new 
BinaryComparator(FAMILIES[cf]))));
+      }
+
+      // all values are the same, but there should be a value
+      result = TABLE.get(get);
+      assertTrue(String.format("Value for family %s (# %s) and qualifier %s (# 
%s)",
+        Bytes.toStringBinary(FAMILIES[cf]), cf, 
Bytes.toStringBinary(QUALIFIERS[qf]), qf),
+        Bytes.equals(VALUE, result.getValue(FAMILIES[cf], QUALIFIERS[qf])));
+
+      total++;
+      if (++qf >= NUM_QUALIFIERS) {
+        cf++;
+        qf = 0;
+      }
+    } while (result.mayHaveMoreCellsInRow());
+
+    // ensure we iterated all cells in row
+    assertEquals(NUM_COLS, total);
+    assertEquals(NUM_FAMILIES, cf);
+    assertEquals(0, qf);
+  }
+
   /**
    * Ensure that the expected key values appear in a result returned from a 
scanner that is
    * combining partial results into complete results
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
index 9763841..3d00eb8 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHRegion.java
@@ -7861,4 +7861,89 @@ public class TestHRegion {
     assertFalse("Region lock holder should not have been interrupted", 
holderInterrupted.get());
   }
 
+  @Test
+  public void testOversizedGetsReturnPartialResult() throws IOException {
+    HRegion region = initHRegion(tableName, name.getMethodName(), CONF, fam1);
+
+    Put p = new Put(row)
+      .addColumn(fam1, qual1, value1)
+      .addColumn(fam1, qual2, value2);
+
+    region.put(p);
+
+    Get get = new Get(row)
+      .addColumn(fam1, qual1)
+      .addColumn(fam1, qual2)
+      .setMaxResultSize(1); // 0 doesn't count as a limit, according to HBase
+
+    Result r = region.get(get);
+
+    assertTrue("Expected partial result, but result was not marked as 
partial", r.mayHaveMoreCellsInRow());
+  }
+
+  @Test
+  public void testGetsWithoutResultSizeLimitAreNotPartial() throws IOException 
{
+    HRegion region = initHRegion(tableName, name.getMethodName(), CONF, fam1);
+
+    Put p = new Put(row)
+      .addColumn(fam1, qual1, value1)
+      .addColumn(fam1, qual2, value2);
+
+    region.put(p);
+
+    Get get = new Get(row)
+      .addColumn(fam1, qual1)
+      .addColumn(fam1, qual2);
+
+    Result r = region.get(get);
+
+    assertFalse("Expected full result, but it was marked as partial", 
r.mayHaveMoreCellsInRow());
+    assertTrue(Bytes.equals(value1, r.getValue(fam1, qual1)));
+    assertTrue(Bytes.equals(value2, r.getValue(fam1, qual2)));
+  }
+
+  @Test
+  public void testGetsWithinResultSizeLimitAreNotPartial() throws IOException {
+    HRegion region = initHRegion(tableName, name.getMethodName(), CONF, fam1);
+
+    Put p = new Put(row)
+      .addColumn(fam1, qual1, value1)
+      .addColumn(fam1, qual2, value2);
+
+    region.put(p);
+
+    Get get = new Get(row)
+      .addColumn(fam1, qual1)
+      .addColumn(fam1, qual2)
+      .setMaxResultSize(Long.MAX_VALUE);
+
+    Result r = region.get(get);
+
+    assertFalse("Expected full result, but it was marked as partial", 
r.mayHaveMoreCellsInRow());
+    assertTrue(Bytes.equals(value1, r.getValue(fam1, qual1)));
+    assertTrue(Bytes.equals(value2, r.getValue(fam1, qual2)));
+  }
+
+  @Test
+  public void testGetsWithResultSizeLimitReturnPartialResults() throws 
IOException {
+    HRegion region = initHRegion(tableName, name.getMethodName(), CONF, fam1);
+
+    Put p = new Put(row)
+      .addColumn(fam1, qual1, value1)
+      .addColumn(fam1, qual2, value2);
+
+    region.put(p);
+
+    Get get = new Get(row)
+      .addColumn(fam1, qual1)
+      .addColumn(fam1, qual2)
+      .setMaxResultSize(10);
+
+    Result r = region.get(get);
+
+    assertTrue("Expected partial result, but it was marked as complete", 
r.mayHaveMoreCellsInRow());
+    assertTrue(Bytes.equals(value1, r.getValue(fam1, qual1)));
+    assertEquals("Got more results than expected", 1, r.size());
+  }
+
 }

Reply via email to