Repository: hbase
Updated Branches:
  refs/heads/master 2b1cbeb28 -> fe4438361


HBASE-16209 provide an ExponentialBackOffPolicy sleep between failed region 
open requests

Signed-off-by: Elliott Clark <ecl...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/fe443836
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/fe443836
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/fe443836

Branch: refs/heads/master
Commit: fe44383619d0d11396382f2bfa493ad93c8b0e5f
Parents: 2b1cbeb
Author: Joseph Hwang <j...@fb.com>
Authored: Tue Jul 26 11:13:32 2016 -0700
Committer: Elliott Clark <ecl...@apache.org>
Committed: Fri Jul 29 09:47:00 2016 -0700

----------------------------------------------------------------------
 .../master/AssignmentManagerStatusTmpl.jamon    | 20 ++++++-
 .../hadoop/hbase/master/AssignmentManager.java  | 56 ++++++++++++++++++--
 .../hadoop/hbase/master/RegionStates.java       |  8 ++-
 .../hbase/master/TestMasterStatusServlet.java   |  8 ++-
 4 files changed, 84 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/fe443836/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
 
b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
index e2ae09d..06b320f 100644
--- 
a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
+++ 
b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/master/AssignmentManagerStatusTmpl.jamon
@@ -25,6 +25,8 @@ org.apache.hadoop.hbase.HBaseConfiguration;
 org.apache.hadoop.hbase.HConstants;
 java.util.HashSet;
 java.util.SortedSet;
+java.util.Map;
+java.util.concurrent.atomic.AtomicInteger;
 </%import>
 <%args>
 AssignmentManager assignmentManager;
@@ -32,7 +34,9 @@ int limit = 100;
 </%args>
 
 <%java SortedSet<RegionState> rit = assignmentManager
-  .getRegionStates().getRegionsInTransitionOrderedByTimestamp(); %>
+  .getRegionStates().getRegionsInTransitionOrderedByTimestamp();
+  Map<String, AtomicInteger> failedRegionTracker = 
assignmentManager.getFailedOpenTracker();
+   %>
 
 <%if !rit.isEmpty() %>
 <%java>
@@ -83,7 +87,7 @@ int numOfPages = (int) Math.ceil(numOfRITs * 1.0 / 
ritsPerPage);
              <div class="tab-pane" id="tab_rits<% (recordItr / ritsPerPage) + 
1 %>">
                  </%if>
                  <table class="table table-striped" 
style="margin-bottom:0px;"><tr><th>Region</th>
-                     <th>State</th><th>RIT time (ms)</th></tr>
+                     <th>State</th><th>RIT time (ms)</th> <th>Retries 
</th></tr>
              </%if>
 
              <%if ritsOverThreshold.contains(rs.getRegion().getEncodedName()) 
%>
@@ -93,9 +97,21 @@ int numOfPages = (int) Math.ceil(numOfRITs * 1.0 / 
ritsPerPage);
             <%else>
                     <tr>
             </%if>
+                        <%java>
+                          String retryStatus = "0";
+                          String name = rs.getRegion().getEncodedName();
+                          RegionState state = assignmentManager.getState(name);
+                          AtomicInteger numOpenRetries = 
failedRegionTracker.get(name);
+                          if (numOpenRetries != null ) {
+                            retryStatus = 
Integer.toString(numOpenRetries.get());
+                          } else if (state.getState() ==  
RegionState.State.FAILED_OPEN) {
+                             retryStatus = "Failed";
+                          }
+                        </%java>
                         <td><% rs.getRegion().getEncodedName() %></td><td>
                         <% 
HRegionInfo.getDescriptiveNameFromRegionStateForDisplay(rs, conf) %></td>
                         <td><% (currentTime - rs.getStamp()) %> </td>
+                        <td> <% retryStatus %> </td>
                      </tr>
                      <%java recordItr++; %>
              <%if (recordItr % ritsPerPage) == 0 %>

http://git-wip-us.apache.org/repos/asf/hbase/blob/fe443836/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
index 2ffe466..ffdbac8 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
@@ -36,6 +36,7 @@ import java.util.TreeMap;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.CopyOnWriteArrayList;
+import java.util.concurrent.ScheduledThreadPoolExecutor;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
@@ -87,6 +88,7 @@ import org.apache.hadoop.hbase.util.FSUtils;
 import org.apache.hadoop.hbase.util.KeyLocker;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.util.PairOfSameType;
+import org.apache.hadoop.hbase.util.RetryCounter;
 import org.apache.hadoop.hbase.util.Threads;
 import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
@@ -149,8 +151,8 @@ public class AssignmentManager {
 
   private final ExecutorService executorService;
 
-  // Thread pool executor service. TODO, consolidate with executorService?
   private java.util.concurrent.ExecutorService threadPoolExecutorService;
+  private ScheduledThreadPoolExecutor scheduledThreadPoolExecutor;
 
   private final RegionStates regionStates;
 
@@ -202,6 +204,8 @@ public class AssignmentManager {
 
   private RegionStateListener regionStateListener;
 
+  private RetryCounter.BackoffPolicy backoffPolicy;
+  private RetryCounter.RetryConfig retryConfig;
   /**
    * Constructs a new assignment manager.
    *
@@ -240,8 +244,13 @@ public class AssignmentManager {
         "hbase.meta.assignment.retry.sleeptime", 1000l);
     this.balancer = balancer;
     int maxThreads = conf.getInt("hbase.assignment.threads.max", 30);
+
     this.threadPoolExecutorService = Threads.getBoundedCachedThreadPool(
-      maxThreads, 60L, TimeUnit.SECONDS, 
Threads.newDaemonThreadFactory("AM."));
+        maxThreads, 60L, TimeUnit.SECONDS, 
Threads.newDaemonThreadFactory("AM."));
+
+    this.scheduledThreadPoolExecutor = new ScheduledThreadPoolExecutor(1,
+        Threads.newDaemonThreadFactory("AM.Scheduler"));
+
     this.regionStates = new RegionStates(
       server, tableStateManager, serverManager, regionStateStore);
 
@@ -254,6 +263,23 @@ public class AssignmentManager {
 
     this.metricsAssignmentManager = new MetricsAssignmentManager();
     this.tableLockManager = tableLockManager;
+
+    // Configurations for retrying opening a region on receiving a FAILED_OPEN
+    this.retryConfig = new RetryCounter.RetryConfig();
+    
this.retryConfig.setSleepInterval(conf.getLong("hbase.assignment.retry.sleep.initial",
 0l));
+    // Set the max time limit to the initial sleep interval so we use a 
constant time sleep strategy
+    // if the user does not set a max sleep limit
+    
this.retryConfig.setMaxSleepTime(conf.getLong("hbase.assignment.retry.sleep.max",
+        retryConfig.getSleepInterval()));
+    this.backoffPolicy = getBackoffPolicy();
+  }
+
+  /**
+   * Returns the backoff policy used for Failed Region Open retries
+   * @return the backoff policy used for Failed Region Open retries
+   */
+  RetryCounter.BackoffPolicy getBackoffPolicy() {
+    return new RetryCounter.ExponentialBackoffPolicyWithLimit();
   }
 
   MetricsAssignmentManager getAssignmentManagerMetrics() {
@@ -2028,6 +2054,11 @@ public class AssignmentManager {
     threadPoolExecutorService.submit(new AssignCallable(this, regionInfo));
   }
 
+  void invokeAssignLater(HRegionInfo regionInfo, long sleepMillis) {
+    scheduledThreadPoolExecutor.schedule(new DelayedAssignCallable(
+        new AssignCallable(this, regionInfo)), sleepMillis, 
TimeUnit.MILLISECONDS);
+  }
+
   void invokeUnAssign(HRegionInfo regionInfo) {
     threadPoolExecutorService.submit(new UnAssignCallable(this, regionInfo));
   }
@@ -2233,7 +2264,10 @@ public class AssignmentManager {
         } catch (HBaseIOException e) {
           LOG.warn("Failed to get region plan", e);
         }
-        invokeAssign(hri);
+        // Have the current thread sleep a bit before resubmitting the RPC 
request
+        long sleepTime = backoffPolicy.getBackoffTime(retryConfig,
+            failedOpenTracker.get(encodedName).get());
+        invokeAssignLater(hri, sleepTime);
       }
     }
     // Null means no error
@@ -2732,6 +2766,10 @@ public class AssignmentManager {
     return replicasToClose;
   }
 
+  public Map<String, AtomicInteger> getFailedOpenTracker() {return 
failedOpenTracker;}
+
+  public RegionState getState(String encodedName) {return 
regionStates.getRegionState(encodedName);}
+
   /**
    * A region is offline.  The new state should be the specified one,
    * if not null.  If the specified state is null, the new state is Offline.
@@ -2934,4 +2972,16 @@ public class AssignmentManager {
   void setRegionStateListener(RegionStateListener listener) {
     this.regionStateListener = listener;
   }
+
+  private class DelayedAssignCallable implements Runnable {
+    Callable callable;
+    public DelayedAssignCallable(Callable callable) {
+      this.callable = callable;
+    }
+
+    @Override
+    public void run() {
+      threadPoolExecutorService.submit(callable);
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/hbase/blob/fe443836/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
index b95b894..ba08a05 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java
@@ -67,10 +67,16 @@ public class RegionStates {
 
   public final static RegionStateStampComparator REGION_STATE_COMPARATOR =
     new RegionStateStampComparator();
+
+  // This comparator sorts the RegionStates by time stamp then Region name.
+  // Comparing by timestamp alone can lead us to discard different 
RegionStates that happen
+  // to share a timestamp.
   private static class RegionStateStampComparator implements 
Comparator<RegionState> {
     @Override
     public int compare(RegionState l, RegionState r) {
-      return Long.compare(l.getStamp(), r.getStamp());
+      return Long.compare(l.getStamp(), r.getStamp()) == 0 ?
+          Bytes.compareTo(l.getRegion().getRegionName(), 
r.getRegion().getRegionName()) :
+          Long.compare(l.getStamp(), r.getStamp());
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/fe443836/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterStatusServlet.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterStatusServlet.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterStatusServlet.java
index 02f01c4..f975d9d 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterStatusServlet.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterStatusServlet.java
@@ -19,6 +19,7 @@
 package org.apache.hadoop.hbase.master;
 
 import static org.junit.Assert.*;
+import static org.mockito.Matchers.any;
 
 import java.io.IOException;
 import java.io.StringWriter;
@@ -26,8 +27,6 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import java.util.TreeSet;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HBaseConfiguration;
@@ -178,6 +177,11 @@ public class TestMasterStatusServlet {
     Mockito.doReturn(rs).when(am).getRegionStates();
     Mockito.doReturn(regionsInTransition).when(rs).getRegionsInTransition();
     
Mockito.doReturn(regionsInTransition).when(rs).getRegionsInTransitionOrderedByTimestamp();
+    Mockito.when(
+        am.getState(
+            any(String.class)
+        )
+    ).thenReturn(new RegionState(null, null));
 
     // Render to a string
     StringWriter sw = new StringWriter();

Reply via email to