Repository: hbase
Updated Branches:
  refs/heads/HBASE-19064 33d0606a2 -> 0ac769246 (forced update)


HBASE-19998 Flakey TestVisibilityLabelsWithDefaultVisLabelService

Only call server.checkIfShouldMoveSystemRegionAsync if a node has been
added. Do not call it if only one regionserver in cluster. Make it
so ServerCrashProcedure runs before it. Add logging if
server.checkIfShouldMoveSystemRegionAsync was responsible for
MOVE (Previous was a mystery when it cut in).

Previous we'd call it when there was a nodeChildrenChanged. These
happen before nodeDeleted. If a server crashed,
checkIfShouldMoveSystemRegionAsync could run first, find the
server that had not yet registered as crashed, find system
tables on it and then try to move them. It would fail because
server would not respond to RPC. The region move would then
be waiting on the servercrashprocedure to wake it up when
done processing but this move had locked the region so
SCP couldn't run....


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/50c705da
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/50c705da
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/50c705da

Branch: refs/heads/HBASE-19064
Commit: 50c705dad9825d3095352b997be35a2568bd6190
Parents: 816d860
Author: Michael Stack <st...@apache.org>
Authored: Wed Feb 14 22:32:29 2018 -0800
Committer: Michael Stack <st...@apache.org>
Committed: Thu Feb 15 06:08:55 2018 -0800

----------------------------------------------------------------------
 .../hadoop/hbase/master/MasterServices.java     |  5 +++++
 .../hbase/master/RegionServerTracker.java       | 16 +++++++++++---
 .../master/assignment/AssignmentManager.java    | 23 ++++++++++++++++++--
 .../master/assignment/MoveRegionProcedure.java  |  1 -
 4 files changed, 39 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/50c705da/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
index 9d371bd..0e552d6 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java
@@ -486,6 +486,11 @@ public interface MasterServices extends Server {
 
   public String getRegionServerVersion(final ServerName sn);
 
+  /**
+   * Called when a new RegionServer is added to the cluster.
+   * Checks if new server has a newer version than any existing server and 
will move system tables
+   * there if so.
+   */
   public void checkIfShouldMoveSystemRegionAsync();
 
   /**

http://git-wip-us.apache.org/repos/asf/hbase/blob/50c705da/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java
index 29218e2..81fc589 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionServerTracker.java
@@ -104,9 +104,6 @@ public class RegionServerTracker extends ZKListener {
         }
       }
     }
-    if (server.isInitialized()) {
-      server.checkIfShouldMoveSystemRegionAsync();
-    }
   }
 
   private void remove(final ServerName sn) {
@@ -116,6 +113,19 @@ public class RegionServerTracker extends ZKListener {
   }
 
   @Override
+  public void nodeCreated(String path) {
+    if (path.startsWith(watcher.znodePaths.rsZNode)) {
+      String serverName = ZKUtil.getNodeName(path);
+      LOG.info("RegionServer ephemeral node created, adding [" + serverName + 
"]");
+      if (server.isInitialized()) {
+        // Only call the check to move servers if a RegionServer was added to 
the cluster; in this
+        // case it could be a server with a new version so it makes sense to 
run the check.
+        server.checkIfShouldMoveSystemRegionAsync();
+      }
+    }
+  }
+
+  @Override
   public void nodeDeleted(String path) {
     if (path.startsWith(watcher.znodePaths.rsZNode)) {
       String serverName = ZKUtil.getNodeName(path);

http://git-wip-us.apache.org/repos/asf/hbase/blob/50c705da/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index 8b3dc61..97d8258 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -455,12 +455,27 @@ public class AssignmentManager implements ServerListener {
    * If so, move all system table regions to RS with the highest version to 
keep compatibility.
    * The reason is, RS in new version may not be able to access RS in old 
version when there are
    * some incompatible changes.
+   * <p>This method is called when a new RegionServer is added to cluster 
only.</p>
    */
   public void checkIfShouldMoveSystemRegionAsync() {
+    // TODO: Fix this thread. If a server is killed and a new one started, 
this thread thinks that
+    // it should 'move' the system tables from the old server to the new 
server but
+    // ServerCrashProcedure is on it; and it will take care of the assign 
without dataloss.
+    if (this.master.getServerManager().countOfRegionServers() <= 1) {
+      return;
+    }
+    // This thread used to run whenever there was a change in the cluster. The 
ZooKeeper
+    // childrenChanged notification came in before the nodeDeleted message and 
so this method
+    // cold run before a ServerCrashProcedure could run. That meant that this 
thread could see
+    // a Crashed Server before ServerCrashProcedure and it could find system 
regions on the
+    // crashed server and go move them before ServerCrashProcedure had a 
chance; could be
+    // dataloss too if WALs were not recovered.
     new Thread(() -> {
       try {
         synchronized (checkIfShouldMoveSystemRegionLock) {
           List<RegionPlan> plans = new ArrayList<>();
+          // TODO: I don't think this code does a good job if all servers in 
cluster have same
+          // version. It looks like it will schedule unnecessary moves.
           for (ServerName server : getExcludedServersForSystemTable()) {
             if (master.getServerManager().isServerDead(server)) {
               // TODO: See HBASE-18494 and HBASE-18495. Though 
getExcludedServersForSystemTable()
@@ -471,13 +486,15 @@ public class AssignmentManager implements ServerListener {
               // handling.
               continue;
             }
-            List<RegionInfo> regionsShouldMove = 
getCarryingSystemTables(server);
+            List<RegionInfo> regionsShouldMove = getSystemTables(server);
             if (!regionsShouldMove.isEmpty()) {
               for (RegionInfo regionInfo : regionsShouldMove) {
                 // null value for dest forces destination server to be 
selected by balancer
                 RegionPlan plan = new RegionPlan(regionInfo, server, null);
                 if (regionInfo.isMetaRegion()) {
                   // Must move meta region first.
+                  LOG.info("Async MOVE of {} to newer Server={}",
+                      regionInfo.getEncodedName(), server);
                   moveAsync(plan);
                 } else {
                   plans.add(plan);
@@ -485,6 +502,8 @@ public class AssignmentManager implements ServerListener {
               }
             }
             for (RegionPlan plan : plans) {
+              LOG.info("Async MOVE of {} to newer Server={}",
+                  plan.getRegionInfo().getEncodedName(), server);
               moveAsync(plan);
             }
           }
@@ -495,7 +514,7 @@ public class AssignmentManager implements ServerListener {
     }).start();
   }
 
-  private List<RegionInfo> getCarryingSystemTables(ServerName serverName) {
+  private List<RegionInfo> getSystemTables(ServerName serverName) {
     Set<RegionStateNode> regions = 
this.getRegionStates().getServerNode(serverName).getRegions();
     if (regions == null) {
       return new ArrayList<>();

http://git-wip-us.apache.org/repos/asf/hbase/blob/50c705da/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java
index 4e7cde6..a29bfee 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/MoveRegionProcedure.java
@@ -54,7 +54,6 @@ public class MoveRegionProcedure extends 
AbstractStateMachineRegionProcedure<Mov
   public MoveRegionProcedure(final MasterProcedureEnv env, final RegionPlan 
plan) {
     super(env, plan.getRegionInfo());
     this.plan = plan;
-    LOG.info("REMOVE", new Throwable("REMOVE: Just to see who is calling 
Move!!!"));
   }
 
   @Override

Reply via email to