HBASE-17557 HRegionServer#reportRegionSizesForQuotas() should respond to 
UnsupportedOperationException


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/7fb0ac26
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/7fb0ac26
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/7fb0ac26

Branch: refs/heads/master
Commit: 7fb0ac26e3a85a01b285078c198578216e3e8524
Parents: 6b334cd
Author: tedyu <yuzhih...@gmail.com>
Authored: Mon Jan 30 07:47:40 2017 -0800
Committer: Josh Elser <els...@apache.org>
Committed: Mon May 22 13:41:35 2017 -0400

----------------------------------------------------------------------
 .../quotas/FileSystemUtilizationChore.java      | 20 +++++++++++++---
 .../hbase/regionserver/HRegionServer.java       | 24 ++++++++++++++++----
 2 files changed, 36 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/7fb0ac26/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/FileSystemUtilizationChore.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/FileSystemUtilizationChore.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/FileSystemUtilizationChore.java
index 01540eb..efc17ff 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/FileSystemUtilizationChore.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/FileSystemUtilizationChore.java
@@ -53,6 +53,9 @@ public class FileSystemUtilizationChore extends 
ScheduledChore {
   static final String FS_UTILIZATION_MAX_ITERATION_DURATION_KEY = 
"hbase.regionserver.quotas.fs.utilization.chore.max.iteration.millis";
   static final long FS_UTILIZATION_MAX_ITERATION_DURATION_DEFAULT = 5000L;
 
+  private int numberOfCyclesToSkip = 0, prevNumberOfCyclesToSkip = 0;
+  private static final int CYCLE_UPPER_BOUND = 32;
+
   private final HRegionServer rs;
   private final long maxIterationMillis;
   private Iterator<Region> leftoverRegions;
@@ -67,6 +70,10 @@ public class FileSystemUtilizationChore extends 
ScheduledChore {
 
   @Override
   protected void chore() {
+    if (numberOfCyclesToSkip > 0) {
+      numberOfCyclesToSkip--;
+      return;
+    }
     final Map<HRegionInfo,Long> onlineRegionSizes = new HashMap<>();
     final Set<Region> onlineRegions = new HashSet<>(rs.getOnlineRegions());
     // Process the regions from the last run if we have any. If we are somehow 
having difficulty
@@ -126,7 +133,14 @@ public class FileSystemUtilizationChore extends 
ScheduledChore {
           + skippedSplitParents + " regions due to being the parent of a 
split, and"
           + skippedRegionReplicas + " regions due to being region replicas.");
     }
-    reportRegionSizesToMaster(onlineRegionSizes);
+    if (!reportRegionSizesToMaster(onlineRegionSizes)) {
+      // backoff reporting
+      numberOfCyclesToSkip = prevNumberOfCyclesToSkip > 0 ? 2 * 
prevNumberOfCyclesToSkip : 1;
+      if (numberOfCyclesToSkip > CYCLE_UPPER_BOUND) {
+        numberOfCyclesToSkip = CYCLE_UPPER_BOUND;
+      }
+      prevNumberOfCyclesToSkip = numberOfCyclesToSkip;
+    }
   }
 
   /**
@@ -166,8 +180,8 @@ public class FileSystemUtilizationChore extends 
ScheduledChore {
    *
    * @param onlineRegionSizes The computed region sizes to report.
    */
-  void reportRegionSizesToMaster(Map<HRegionInfo,Long> onlineRegionSizes) {
-    this.rs.reportRegionSizesForQuotas(onlineRegionSizes);
+  boolean reportRegionSizesToMaster(Map<HRegionInfo,Long> onlineRegionSizes) {
+    return this.rs.reportRegionSizesForQuotas(onlineRegionSizes);
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/hbase/blob/7fb0ac26/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
index 8130312..2b3e8f5 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
@@ -66,6 +66,7 @@ import org.apache.hadoop.hbase.ChoreService;
 import org.apache.hadoop.hbase.ClockOutOfSyncException;
 import org.apache.hadoop.hbase.CoordinatedStateManager;
 import org.apache.hadoop.hbase.CoordinatedStateManagerFactory;
+import org.apache.hadoop.hbase.DoNotRetryIOException;
 import org.apache.hadoop.hbase.HBaseConfiguration;
 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
 import org.apache.hadoop.hbase.HConstants;
@@ -1248,13 +1249,14 @@ public class HRegionServer extends HasThread implements
    * Reports the given map of Regions and their size on the filesystem to the 
active Master.
    *
    * @param onlineRegionSizes A map of region info to size in bytes
+   * @return false if FileSystemUtilizationChore should pause reporting to 
master. true otherwise
    */
-  public void reportRegionSizesForQuotas(final Map<HRegionInfo, Long> 
onlineRegionSizes) {
+  public boolean reportRegionSizesForQuotas(final Map<HRegionInfo, Long> 
onlineRegionSizes) {
     RegionServerStatusService.BlockingInterface rss = rssStub;
     if (rss == null) {
       // the current server could be stopping.
       LOG.trace("Skipping Region size report to HMaster as stub is null");
-      return;
+      return true;
     }
     try {
       RegionSpaceUseReportRequest request = buildRegionSpaceUseReportRequest(
@@ -1263,16 +1265,28 @@ public class HRegionServer extends HasThread implements
     } catch (ServiceException se) {
       IOException ioe = ProtobufUtil.getRemoteException(se);
       if (ioe instanceof PleaseHoldException) {
-        LOG.trace("Failed to report region sizes to Master because it is 
initializing. This will be retried.", ioe);
+        LOG.trace("Failed to report region sizes to Master because it is 
initializing."
+            + " This will be retried.", ioe);
         // The Master is coming up. Will retry the report later. Avoid 
re-creating the stub.
-        return;
+        return true;
       }
-      LOG.debug("Failed to report region sizes to Master. This will be 
retried.", ioe);
       if (rssStub == rss) {
         rssStub = null;
       }
       createRegionServerStatusStub(true);
+      if (ioe instanceof DoNotRetryIOException) {
+        DoNotRetryIOException doNotRetryEx = (DoNotRetryIOException) ioe;
+        if (doNotRetryEx.getCause() != null) {
+          Throwable t = doNotRetryEx.getCause();
+          if (t instanceof UnsupportedOperationException) {
+            LOG.debug("master doesn't support ReportRegionSpaceUse, pause 
before retrying");
+            return false;
+          }
+        }
+      }
+      LOG.debug("Failed to report region sizes to Master. This will be 
retried.", ioe);
     }
+    return true;
   }
 
   /**

Reply via email to