HBASE-17557 HRegionServer#reportRegionSizesForQuotas() should respond to UnsupportedOperationException
Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/7fb0ac26 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/7fb0ac26 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/7fb0ac26 Branch: refs/heads/master Commit: 7fb0ac26e3a85a01b285078c198578216e3e8524 Parents: 6b334cd Author: tedyu <yuzhih...@gmail.com> Authored: Mon Jan 30 07:47:40 2017 -0800 Committer: Josh Elser <els...@apache.org> Committed: Mon May 22 13:41:35 2017 -0400 ---------------------------------------------------------------------- .../quotas/FileSystemUtilizationChore.java | 20 +++++++++++++--- .../hbase/regionserver/HRegionServer.java | 24 ++++++++++++++++---- 2 files changed, 36 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/7fb0ac26/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/FileSystemUtilizationChore.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/FileSystemUtilizationChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/FileSystemUtilizationChore.java index 01540eb..efc17ff 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/FileSystemUtilizationChore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/quotas/FileSystemUtilizationChore.java @@ -53,6 +53,9 @@ public class FileSystemUtilizationChore extends ScheduledChore { static final String FS_UTILIZATION_MAX_ITERATION_DURATION_KEY = "hbase.regionserver.quotas.fs.utilization.chore.max.iteration.millis"; static final long FS_UTILIZATION_MAX_ITERATION_DURATION_DEFAULT = 5000L; + private int numberOfCyclesToSkip = 0, prevNumberOfCyclesToSkip = 0; + private static final int CYCLE_UPPER_BOUND = 32; + private final HRegionServer rs; private final long maxIterationMillis; private Iterator<Region> leftoverRegions; @@ -67,6 +70,10 @@ public class FileSystemUtilizationChore extends ScheduledChore { @Override protected void chore() { + if (numberOfCyclesToSkip > 0) { + numberOfCyclesToSkip--; + return; + } final Map<HRegionInfo,Long> onlineRegionSizes = new HashMap<>(); final Set<Region> onlineRegions = new HashSet<>(rs.getOnlineRegions()); // Process the regions from the last run if we have any. If we are somehow having difficulty @@ -126,7 +133,14 @@ public class FileSystemUtilizationChore extends ScheduledChore { + skippedSplitParents + " regions due to being the parent of a split, and" + skippedRegionReplicas + " regions due to being region replicas."); } - reportRegionSizesToMaster(onlineRegionSizes); + if (!reportRegionSizesToMaster(onlineRegionSizes)) { + // backoff reporting + numberOfCyclesToSkip = prevNumberOfCyclesToSkip > 0 ? 2 * prevNumberOfCyclesToSkip : 1; + if (numberOfCyclesToSkip > CYCLE_UPPER_BOUND) { + numberOfCyclesToSkip = CYCLE_UPPER_BOUND; + } + prevNumberOfCyclesToSkip = numberOfCyclesToSkip; + } } /** @@ -166,8 +180,8 @@ public class FileSystemUtilizationChore extends ScheduledChore { * * @param onlineRegionSizes The computed region sizes to report. */ - void reportRegionSizesToMaster(Map<HRegionInfo,Long> onlineRegionSizes) { - this.rs.reportRegionSizesForQuotas(onlineRegionSizes); + boolean reportRegionSizesToMaster(Map<HRegionInfo,Long> onlineRegionSizes) { + return this.rs.reportRegionSizesForQuotas(onlineRegionSizes); } /** http://git-wip-us.apache.org/repos/asf/hbase/blob/7fb0ac26/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 8130312..2b3e8f5 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -66,6 +66,7 @@ import org.apache.hadoop.hbase.ChoreService; import org.apache.hadoop.hbase.ClockOutOfSyncException; import org.apache.hadoop.hbase.CoordinatedStateManager; import org.apache.hadoop.hbase.CoordinatedStateManagerFactory; +import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HBaseInterfaceAudience; import org.apache.hadoop.hbase.HConstants; @@ -1248,13 +1249,14 @@ public class HRegionServer extends HasThread implements * Reports the given map of Regions and their size on the filesystem to the active Master. * * @param onlineRegionSizes A map of region info to size in bytes + * @return false if FileSystemUtilizationChore should pause reporting to master. true otherwise */ - public void reportRegionSizesForQuotas(final Map<HRegionInfo, Long> onlineRegionSizes) { + public boolean reportRegionSizesForQuotas(final Map<HRegionInfo, Long> onlineRegionSizes) { RegionServerStatusService.BlockingInterface rss = rssStub; if (rss == null) { // the current server could be stopping. LOG.trace("Skipping Region size report to HMaster as stub is null"); - return; + return true; } try { RegionSpaceUseReportRequest request = buildRegionSpaceUseReportRequest( @@ -1263,16 +1265,28 @@ public class HRegionServer extends HasThread implements } catch (ServiceException se) { IOException ioe = ProtobufUtil.getRemoteException(se); if (ioe instanceof PleaseHoldException) { - LOG.trace("Failed to report region sizes to Master because it is initializing. This will be retried.", ioe); + LOG.trace("Failed to report region sizes to Master because it is initializing." + + " This will be retried.", ioe); // The Master is coming up. Will retry the report later. Avoid re-creating the stub. - return; + return true; } - LOG.debug("Failed to report region sizes to Master. This will be retried.", ioe); if (rssStub == rss) { rssStub = null; } createRegionServerStatusStub(true); + if (ioe instanceof DoNotRetryIOException) { + DoNotRetryIOException doNotRetryEx = (DoNotRetryIOException) ioe; + if (doNotRetryEx.getCause() != null) { + Throwable t = doNotRetryEx.getCause(); + if (t instanceof UnsupportedOperationException) { + LOG.debug("master doesn't support ReportRegionSpaceUse, pause before retrying"); + return false; + } + } + } + LOG.debug("Failed to report region sizes to Master. This will be retried.", ioe); } + return true; } /**