Repository: hbase
Updated Branches:
  refs/heads/branch-2 1efa050eb -> 77607e496


HBASE-19840 Flakey TestMetaWithReplicas

Fix two issues:

 # Meta Replicas can all be assigned to the same server. This
 will call the test to hang when we do our kill of the server
 hosting meta because there'll be no replicas to read from
 as test intends. Check is to look for this condition on
 startup and adjust if we come across it. Replicas cross-cut
 assignment. They need work.
 # Other issue was shutdown. The master started toward the
 end of the test may not have come up fully by the time
 shutdown is called. We could be stuck assigning the
 meta replicas. Have shutdown shutdown the procedure
 executor engine.

There is other cleanup and notes in the below.

M HMaster
 Remove the silly stops in startup now we have real
 means of shutting down Master during init.

M 
hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
 This replica stuff was doing stuff it shouldn't be doing
 like setting core Master state flags. It may have made
 sense once but now meta is assigned by a Pv2 Procedure
 so the flag setting in here is meddlesome. Clear out
 methods no longer needed.

M 
hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
 Remove unused methods.
 Changes local variable names so they align w/ our naming elsewhere in
 code base.

M 
hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java
 Check for all replicas on the one server.


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/77607e49
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/77607e49
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/77607e49

Branch: refs/heads/branch-2
Commit: 77607e496108ff9aac3d5719fca0c12449df1637
Parents: 1efa050
Author: Michael Stack <st...@apache.org>
Authored: Wed Jan 24 21:36:57 2018 -0800
Committer: Michael Stack <st...@apache.org>
Committed: Mon Jan 29 09:26:41 2018 -0800

----------------------------------------------------------------------
 .../org/apache/hadoop/hbase/util/HasThread.java |  2 +
 .../org/apache/hadoop/hbase/master/HMaster.java | 31 +++----
 .../hbase/master/MasterMetaBootstrap.java       | 83 ++++++++---------
 .../hadoop/hbase/master/ServerManager.java      |  7 +-
 .../master/assignment/AssignmentManager.java    | 94 +++++++++-----------
 .../hbase/client/TestMetaWithReplicas.java      | 63 ++++++++-----
 .../hbase/master/TestMasterNoCluster.java       |  5 +-
 7 files changed, 142 insertions(+), 143 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/77607e49/hbase-common/src/main/java/org/apache/hadoop/hbase/util/HasThread.java
----------------------------------------------------------------------
diff --git 
a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/HasThread.java 
b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/HasThread.java
index 76a90f5..29d5cda 100644
--- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/HasThread.java
+++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/HasThread.java
@@ -35,10 +35,12 @@ public abstract class HasThread implements Runnable {
   
   public HasThread() {
     this.thread = new Thread(this);
+    this.thread.setDaemon(true);
   }
 
   public HasThread(String name) {
     this.thread = new Thread(this, name);
+    this.thread.setDaemon(true);
   }
   
   public Thread getThread() {

http://git-wip-us.apache.org/repos/asf/hbase/blob/77607e49/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index bd52bd1..7bbf91f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -1,5 +1,4 @@
 /*
- *
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -814,13 +813,13 @@ public class HMaster extends HRegionServer implements 
MasterServices {
 
     this.serverManager = createServerManager(this);
 
+    // This manager is started AFTER hbase:meta is confirmed on line.
+    // See inside metaBootstrap.recoverMeta(); below. Shouldn't be so cryptic!
     this.tableStateManager = new TableStateManager(this);
 
     status.setStatus("Initializing ZK system trackers");
     initializeZKBasedSystemTrackers();
-
-    // Set Master as active now after we've setup zk with stuff like whether 
cluster is up or not.
-    // RegionServers won't come up if the cluster status is not up.
+    // Set ourselves as active Master now our claim has succeeded up in zk.
     this.activeMaster = true;
 
     // This is for backwards compatibility
@@ -863,10 +862,6 @@ public class HMaster extends HRegionServer implements 
MasterServices {
     this.balancer.setClusterMetrics(getClusterMetricsWithoutCoprocessor());
     this.balancer.initialize();
 
-    // Check if master is shutting down because of some issue
-    // in initializing the regionserver or the balancer.
-    if (isStopped()) return;
-
     // Make sure meta assigned before proceeding.
     status.setStatus("Recovering  Meta Region");
 
@@ -875,9 +870,6 @@ public class HMaster extends HRegionServer implements 
MasterServices {
     MasterMetaBootstrap metaBootstrap = createMetaBootstrap(this, status);
     metaBootstrap.recoverMeta();
 
-    // check if master is shutting down because above assignMeta could return 
even hbase:meta isn't
-    // assigned when master is shutting down
-    if (isStopped()) return;
 
     //Initialize after meta as it scans meta
     if (favoredNodesManager != null) {
@@ -935,15 +927,11 @@ public class HMaster extends HRegionServer implements 
MasterServices {
     configurationManager.registerObserver(this.balancer);
     configurationManager.registerObserver(this.hfileCleaner);
     configurationManager.registerObserver(this.logCleaner);
-
     // Set master as 'initialized'.
     setInitialized(true);
-
     assignmentManager.checkIfShouldMoveSystemRegionAsync();
-
     status.setStatus("Assign meta replicas");
     metaBootstrap.assignMetaReplicas();
-
     status.setStatus("Starting quota manager");
     initQuotaManager();
     if (QuotaUtil.isQuotaEnabled(conf)) {
@@ -2686,18 +2674,25 @@ public class HMaster extends HRegionServer implements 
MasterServices {
     if (cpHost != null) {
       cpHost.preShutdown();
     }
-    // Tell the servermanager cluster is down.
+    // Tell the servermanager cluster shutdown has been called. This makes it 
so when Master is
+    // last running server, it'll stop itself. Next, we broadcast the cluster 
shutdown by setting
+    // the cluster status as down. RegionServers will notice this change in 
state and will start
+    // shutting themselves down. When last has exited, Master can go down.
     if (this.serverManager != null) {
       this.serverManager.shutdownCluster();
     }
-    // Set the cluster down flag; broadcast across the cluster.
-    if (this.clusterStatusTracker != null){
+    if (this.clusterStatusTracker != null) {
       try {
         this.clusterStatusTracker.setClusterDown();
       } catch (KeeperException e) {
         LOG.error("ZooKeeper exception trying to set cluster as down in ZK", 
e);
       }
     }
+    // Stop the procedure executor. Will stop any ongoing assign, unassign, 
server crash etc.,
+    // processing so we can go down.
+    if (this.procedureExecutor != null) {
+      this.procedureExecutor.stop();
+    }
     // Shutdown our cluster connection. This will kill any hosted RPCs that 
might be going on;
     // this is what we want especially if the Master is in startup phase doing 
call outs to
     // hbase:meta, etc. when cluster is down. Without ths connection close, 
we'd have to wait on

http://git-wip-us.apache.org/repos/asf/hbase/blob/77607e49/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
index 78e7e6e..e841d27 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterMetaBootstrap.java
@@ -20,15 +20,18 @@ package org.apache.hadoop.hbase.master;
 
 import java.io.IOException;
 import java.util.List;
+import java.util.Map;
 import java.util.Set;
 
 import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.MetaTableAccessor;
 import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.client.RegionInfoBuilder;
 import org.apache.hadoop.hbase.client.RegionReplicaUtil;
 import org.apache.hadoop.hbase.client.TableState;
+import org.apache.hadoop.hbase.master.assignment.AssignProcedure;
 import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
 import org.apache.hadoop.hbase.monitoring.MonitoredTask;
 import org.apache.hadoop.hbase.zookeeper.MetaTableLocator;
@@ -74,12 +77,45 @@ public class MasterMetaBootstrap {
     }
   }
 
-  public void assignMetaReplicas()
+  /**
+   * For assigning hbase:meta replicas only.
+   * TODO: The way this assign runs, nothing but chance to stop all replicas 
showing up on same
+   * server as the hbase:meta region.
+   */
+  protected void assignMetaReplicas()
       throws IOException, InterruptedException, KeeperException {
     int numReplicas = 
master.getConfiguration().getInt(HConstants.META_REPLICAS_NUM,
            HConstants.DEFAULT_META_REPLICA_NUM);
+    if (numReplicas <= 1) {
+      // No replicaas to assign. Return.
+      return;
+    }
+    final AssignmentManager assignmentManager = master.getAssignmentManager();
+    if (!assignmentManager.isMetaInitialized()) {
+      throw new IllegalStateException("hbase:meta must be initialized first 
before we can " +
+          "assign out its replicas");
+    }
+    ServerName metaServername =
+        
this.master.getMetaTableLocator().getMetaRegionLocation(this.master.getZooKeeper());
     for (int i = 1; i < numReplicas; i++) {
-      assignMeta(i);
+      // Get current meta state for replica from zk.
+      RegionState metaState = 
MetaTableLocator.getMetaRegionState(master.getZooKeeper(), i);
+      RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(
+          RegionInfoBuilder.FIRST_META_REGIONINFO, i);
+      LOG.debug(hri.getRegionNameAsString() + " replica region state from 
zookeeper=" + metaState);
+      if (metaServername.equals(metaState.getServerName())) {
+        metaState = null;
+        LOG.info(hri.getRegionNameAsString() +
+          " old location is same as current hbase:meta location; setting 
location as null...");
+      }
+      // These assigns run inline. All is blocked till they complete. Only 
interrupt is shutting
+      // down hosting server which calls AM#stop.
+      if (metaState != null && metaState.getServerName() != null) {
+        // Try to retain old assignment.
+        assignmentManager.assign(hri, metaState.getServerName());
+      } else {
+        assignmentManager.assign(hri);
+      }
     }
     unassignExcessMetaReplica(numReplicas);
   }
@@ -108,49 +144,6 @@ public class MasterMetaBootstrap {
     }
   }
 
-  /**
-   * Check <code>hbase:meta</code> is assigned. If not, assign it.
-   */
-  protected void assignMeta(int replicaId)
-      throws InterruptedException, IOException, KeeperException {
-    final AssignmentManager assignmentManager = master.getAssignmentManager();
-
-    // Work on meta region
-    // TODO: Unimplemented
-    // long timeout =
-    //   
master.getConfiguration().getLong("hbase.catalog.verification.timeout", 1000);
-    if (replicaId == RegionInfo.DEFAULT_REPLICA_ID) {
-      status.setStatus("Assigning hbase:meta region");
-    } else {
-      status.setStatus("Assigning hbase:meta region, replicaId " + replicaId);
-    }
-
-    // Get current meta state from zk.
-    RegionState metaState = 
MetaTableLocator.getMetaRegionState(master.getZooKeeper(), replicaId);
-    LOG.debug("meta state from zookeeper: " + metaState);
-    RegionInfo hri = RegionReplicaUtil.getRegionInfoForReplica(
-        RegionInfoBuilder.FIRST_META_REGIONINFO, replicaId);
-    assignmentManager.assignMeta(hri, metaState.getServerName());
-
-    if (replicaId == RegionInfo.DEFAULT_REPLICA_ID) {
-      // TODO: should we prevent from using state manager before meta was 
initialized?
-      // tableStateManager.start();
-      master.getTableStateManager()
-        .setTableState(TableName.META_TABLE_NAME, TableState.State.ENABLED);
-    }
-
-    master.getTableStateManager().start();
-
-    // Make sure a hbase:meta location is set. We need to enable SSH here since
-    // if the meta region server is died at this time, we need it to be 
re-assigned
-    // by SSH so that system tables can be assigned.
-    // No need to wait for meta is assigned = 0 when meta is just verified.
-    if (replicaId == RegionInfo.DEFAULT_REPLICA_ID) 
enableCrashedServerProcessing(false);
-    LOG.info("hbase:meta with replicaId " + replicaId + ", location="
-      + 
master.getMetaTableLocator().getMetaRegionLocation(master.getZooKeeper(), 
replicaId));
-    status.setStatus("META assigned.");
-  }
-
   private void enableCrashedServerProcessing(final boolean waitForMeta)
       throws InterruptedException {
     // If crashed server processing is disabled, we enable it and expire those 
dead but not expired

http://git-wip-us.apache.org/repos/asf/hbase/blob/77607e49/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
index 3c84bfb..aa75091 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java
@@ -92,9 +92,6 @@ import 
org.apache.hadoop.hbase.shaded.protobuf.generated.RegionServerStatusProto
  * A server is fully processed only after the handler is fully enabled
  * and has completed the handling.
  */
-/**
- *
- */
 @InterfaceAudience.Private
 public class ServerManager {
   public static final String WAIT_ON_REGIONSERVERS_MAXTOSTART =
@@ -587,7 +584,7 @@ public class ServerManager {
 
     // If cluster is going down, yes, servers are going to be expiring; don't
     // process as a dead server
-    if (this.clusterShutdown.get()) {
+    if (isClusterShutdown()) {
       LOG.info("Cluster shutdown set; " + serverName +
         " expired; onlineServers=" + this.onlineServers.size());
       if (this.onlineServers.isEmpty()) {
@@ -961,7 +958,7 @@ public class ServerManager {
     this.clusterShutdown.set(true);
   }
 
-  public boolean isClusterShutdown() {
+  boolean isClusterShutdown() {
     return this.clusterShutdown.get();
   }
 

http://git-wip-us.apache.org/repos/asf/hbase/blob/77607e49/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
index 1b8e757..d6b73f4 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/assignment/AssignmentManager.java
@@ -449,27 +449,6 @@ public class AssignmentManager implements ServerListener {
     return metaLoadEvent.isReady();
   }
 
-  // 
============================================================================================
-  //  TODO: Sync helpers
-  // 
============================================================================================
-  public void assignMeta(final RegionInfo metaRegionInfo) throws IOException {
-    assignMeta(metaRegionInfo, null);
-  }
-
-  public void assignMeta(final RegionInfo metaRegionInfo, final ServerName 
serverName)
-      throws IOException {
-    assert isMetaRegion(metaRegionInfo) : "unexpected non-meta region " + 
metaRegionInfo;
-    AssignProcedure proc;
-    if (serverName != null) {
-      LOG.debug("Try assigning Meta " + metaRegionInfo + " to " + serverName);
-      proc = createAssignProcedure(metaRegionInfo, serverName);
-    } else {
-      LOG.debug("Assigning " + metaRegionInfo.getRegionNameAsString());
-      proc = createAssignProcedure(metaRegionInfo);
-    }
-    
ProcedureSyncWait.submitAndWaitProcedure(master.getMasterProcedureExecutor(), 
proc);
-  }
-
   /**
    * Start a new thread to check if there are region servers whose versions 
are higher than others.
    * If so, move all system table regions to RS with the highest version to 
keep compatibility.
@@ -526,6 +505,11 @@ public class AssignmentManager implements ServerListener {
         .collect(Collectors.toList());
   }
 
+  public void assign(final RegionInfo regionInfo, ServerName sn) throws 
IOException {
+    AssignProcedure proc = createAssignProcedure(regionInfo, sn);
+    
ProcedureSyncWait.submitAndWaitProcedure(master.getMasterProcedureExecutor(), 
proc);
+  }
+
   public void assign(final RegionInfo regionInfo) throws IOException {
     AssignProcedure proc = createAssignProcedure(regionInfo);
     
ProcedureSyncWait.submitAndWaitProcedure(master.getMasterProcedureExecutor(), 
proc);
@@ -1649,6 +1633,7 @@ public class AssignmentManager implements ServerListener {
         pendingAssignQueue.clear();
       }
     };
+    assignThread.setDaemon(true);
     assignThread.start();
   }
 
@@ -1712,17 +1697,16 @@ public class AssignmentManager implements 
ServerListener {
 
     // TODO: Optimize balancer. pass a RegionPlan?
     final HashMap<RegionInfo, ServerName> retainMap = new HashMap<>();
-    final List<RegionInfo> userRRList = new ArrayList<>();
-    // regions for system tables requiring reassignment
-    final List<RegionInfo> sysRRList = new ArrayList<>();
-    for (RegionStateNode regionNode : regions.values()) {
-      boolean sysTable = regionNode.isSystemTable();
-      final List<RegionInfo> rrList = sysTable ? sysRRList : userRRList;
-
-      if (regionNode.getRegionLocation() != null) {
-        retainMap.put(regionNode.getRegionInfo(), 
regionNode.getRegionLocation());
+    final List<RegionInfo> userHRIs = new ArrayList<>(regions.size());
+    // Regions for system tables requiring reassignment
+    final List<RegionInfo> systemHRIs = new ArrayList<>();
+    for (RegionStateNode regionStateNode: regions.values()) {
+      boolean sysTable = regionStateNode.isSystemTable();
+      final List<RegionInfo> hris = sysTable? systemHRIs: userHRIs;
+      if (regionStateNode.getRegionLocation() != null) {
+        retainMap.put(regionStateNode.getRegionInfo(), 
regionStateNode.getRegionLocation());
       } else {
-        rrList.add(regionNode.getRegionInfo());
+        hris.add(regionStateNode.getRegionInfo());
       }
     }
 
@@ -1731,45 +1715,44 @@ public class AssignmentManager implements 
ServerListener {
     // TODO use events
     List<ServerName> servers = 
master.getServerManager().createDestinationServersList();
     for (int i = 0; servers.size() < 1; ++i) {
+      // Report every fourth time around this loop; try not to flood log.
       if (i % 4 == 0) {
-        LOG.warn("no server available, unable to find a location for " + 
regions.size() +
-            " unassigned regions. waiting");
+        LOG.warn("No servers available; cannot place " + regions.size() + " 
unassigned regions.");
       }
 
-      // the was AM killed
       if (!isRunning()) {
-        LOG.debug("aborting assignment-queue with " + regions.size() + " not 
assigned");
+        LOG.debug("Stopped! Dropping assign of " + regions.size() + " queued 
regions.");
         return;
       }
-
       Threads.sleep(250);
       servers = master.getServerManager().createDestinationServersList();
     }
 
-    if (!sysRRList.isEmpty()) {
-      // system table regions requiring reassignment are present, get region 
servers
+    if (!systemHRIs.isEmpty()) {
+      // System table regions requiring reassignment are present, get region 
servers
       // not available for system table regions
       final List<ServerName> excludeServers = 
getExcludedServersForSystemTable();
       List<ServerName> serversForSysTables = servers.stream()
           .filter(s -> 
!excludeServers.contains(s)).collect(Collectors.toList());
       if (serversForSysTables.isEmpty()) {
-        LOG.warn("No servers available for system table regions, considering 
all servers!");
+        LOG.warn("Filtering old server versions and the excluded produced an 
empty set; " +
+            "instead considering all candidate servers!");
       }
-      LOG.debug("Processing assignment plans for System tables 
sysServersCount=" +
-          serversForSysTables.size() + ", allServersCount=" + servers.size());
-      processAssignmentPlans(regions, null, sysRRList,
-          serversForSysTables.isEmpty() ? servers : serversForSysTables);
+      LOG.debug("Processing assignQueue; systemServersCount=" + 
serversForSysTables.size() +
+          ", allServersCount=" + servers.size());
+      processAssignmentPlans(regions, null, systemHRIs,
+          serversForSysTables.isEmpty()? servers: serversForSysTables);
     }
 
-    processAssignmentPlans(regions, retainMap, userRRList, servers);
+    processAssignmentPlans(regions, retainMap, userHRIs, servers);
   }
 
   private void processAssignmentPlans(final HashMap<RegionInfo, 
RegionStateNode> regions,
-      final HashMap<RegionInfo, ServerName> retainMap, final List<RegionInfo> 
rrList,
+      final HashMap<RegionInfo, ServerName> retainMap, final List<RegionInfo> 
hris,
       final List<ServerName> servers) {
     boolean isTraceEnabled = LOG.isTraceEnabled();
     if (isTraceEnabled) {
-      LOG.trace("available servers count=" + servers.size() + ": " + servers);
+      LOG.trace("Available servers count=" + servers.size() + ": " + servers);
     }
 
     final LoadBalancer balancer = getBalancer();
@@ -1788,16 +1771,16 @@ public class AssignmentManager implements 
ServerListener {
 
     // TODO: Do we need to split retain and round-robin?
     // the retain seems to fallback to round-robin/random if the region is not 
in the map.
-    if (!rrList.isEmpty()) {
-      Collections.sort(rrList, RegionInfo.COMPARATOR);
+    if (!hris.isEmpty()) {
+      Collections.sort(hris, RegionInfo.COMPARATOR);
       if (isTraceEnabled) {
-        LOG.trace("round robin regions=" + rrList);
+        LOG.trace("round robin regions=" + hris);
       }
       try {
-        acceptPlan(regions, balancer.roundRobinAssignment(rrList, servers));
+        acceptPlan(regions, balancer.roundRobinAssignment(hris, servers));
       } catch (HBaseIOException e) {
         LOG.warn("unable to round-robin assignment", e);
-        addToPendingAssignment(regions, rrList);
+        addToPendingAssignment(regions, hris);
       }
     }
   }
@@ -1844,16 +1827,19 @@ public class AssignmentManager implements 
ServerListener {
   }
 
   /**
-   * Get a list of servers that this region can not assign to.
-   * For system table, we must assign them to a server with highest version.
+   * Get a list of servers that this region cannot be assigned to.
+   * For system tables, we must assign them to a server with highest version.
    */
   public List<ServerName> getExcludedServersForSystemTable() {
+    // TODO: This should be a cached list kept by the ServerManager rather 
than calculated on each
+    // move or system region assign. The RegionServerTracker keeps list of 
online Servers with
+    // RegionServerInfo that includes Version.
     List<Pair<ServerName, String>> serverList = 
master.getServerManager().getOnlineServersList()
         .stream()
         .map((s)->new Pair<>(s, master.getRegionServerVersion(s)))
         .collect(Collectors.toList());
     if (serverList.isEmpty()) {
-      return new ArrayList<>();
+      return Collections.EMPTY_LIST;
     }
     String highestVersion = Collections.max(serverList,
         (o1, o2) -> VersionInfo.compareVersion(o1.getSecond(), 
o2.getSecond())).getSecond();

http://git-wip-us.apache.org/repos/asf/hbase/blob/77607e49/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java
index 7099408..08e2922 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestMetaWithReplicas.java
@@ -28,7 +28,9 @@ import java.io.IOException;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.EnumSet;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.atomic.AtomicBoolean;
 
@@ -46,6 +48,7 @@ import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.TableNotFoundException;
 import org.apache.hadoop.hbase.Waiter;
 import org.apache.hadoop.hbase.master.NoSuchProcedureException;
+import org.apache.hadoop.hbase.master.assignment.AssignmentManager;
 import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
 import org.apache.hadoop.hbase.regionserver.StorefileRefresherChore;
 import org.apache.hadoop.hbase.testclassification.LargeTests;
@@ -81,6 +84,7 @@ public class TestMetaWithReplicas {
       build();
   private static final Logger LOG = 
LoggerFactory.getLogger(TestMetaWithReplicas.class);
   private final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
+  private static final int REGIONSERVERS_COUNT = 3;
 
   @Rule
   public TestName name = new TestName();
@@ -91,30 +95,47 @@ public class TestMetaWithReplicas {
     TEST_UTIL.getConfiguration().setInt(HConstants.META_REPLICAS_NUM, 3);
     TEST_UTIL.getConfiguration().setInt(
         StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD, 1000);
-    TEST_UTIL.startMiniCluster(3);
-    // disable the balancer
-    LoadBalancerTracker l = new 
LoadBalancerTracker(TEST_UTIL.getZooKeeperWatcher(),
-        new Abortable() {
-      AtomicBoolean aborted = new AtomicBoolean(false);
-      @Override
-      public boolean isAborted() {
-        return aborted.get();
-      }
-      @Override
-      public void abort(String why, Throwable e) {
-        aborted.set(true);
-      }
-    });
-    l.setBalancerOn(false);
-    for (int replicaId = 1; replicaId < 3; replicaId ++) {
-      RegionInfo h = 
RegionReplicaUtil.getRegionInfoForReplica(RegionInfoBuilder.FIRST_META_REGIONINFO,
-        replicaId);
+    TEST_UTIL.startMiniCluster(REGIONSERVERS_COUNT);
+    AssignmentManager am = 
TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager();
+    Set<ServerName> sns = new HashSet<ServerName>();
+    for (int replicaId = 0; replicaId < 3; replicaId ++) {
+      RegionInfo h =
+          
RegionReplicaUtil.getRegionInfoForReplica(RegionInfoBuilder.FIRST_META_REGIONINFO,
+              replicaId);
       try {
-        
TEST_UTIL.getMiniHBaseCluster().getMaster().getAssignmentManager().waitForAssignment(h);
+        am.waitForAssignment(h);
+        ServerName sn = am.getRegionStates().getRegionServerOfRegion(h);
+        LOG.info(h.getRegionNameAsString() + " on " + sn);
+        sns.add(sn);
       } catch (NoSuchProcedureException e) {
         LOG.info("Presume the procedure has been cleaned up so just proceed: " 
+ e.toString());
       }
     }
+    // Fun. All meta region replicas have ended up on the one server. This 
will cause this test
+    // to fail ... sometimes.
+    if (sns.size() == 1) {
+      LOG.warn("All hbase:meta replicas are on the one server; moving 
hbase:meta");
+      int metaServerIndex = TEST_UTIL.getHBaseCluster().getServerWithMeta();
+      int newServerIndex = (metaServerIndex + 1) % REGIONSERVERS_COUNT;
+      ServerName destinationServerName =
+          
TEST_UTIL.getHBaseCluster().getRegionServer(newServerIndex).getServerName();
+      
TEST_UTIL.getAdmin().move(RegionInfoBuilder.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
+          Bytes.toBytes(destinationServerName.toString()));
+    }
+    // Disable the balancer
+    LoadBalancerTracker l = new 
LoadBalancerTracker(TEST_UTIL.getZooKeeperWatcher(),
+        new Abortable() {
+          AtomicBoolean aborted = new AtomicBoolean(false);
+          @Override
+          public boolean isAborted() {
+            return aborted.get();
+          }
+          @Override
+          public void abort(String why, Throwable e) {
+            aborted.set(true);
+          }
+        });
+    l.setBalancerOn(false);
     LOG.debug("All meta replicas assigned");
   }
 
@@ -208,13 +229,15 @@ public class TestMetaWithReplicas {
           
Thread.sleep(conf.getInt(StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD,
               30000) * 3);
         }
+        // Ensure all metas are not on same hbase:meta replica=0 server!
+
         master = 
util.getHBaseClusterInterface().getClusterMetrics().getMasterName();
         // kill the master so that regionserver recovery is not triggered at 
all
         // for the meta server
         LOG.info("Stopping master=" + master.toString());
         util.getHBaseClusterInterface().stopMaster(master);
         util.getHBaseClusterInterface().waitForMasterToStop(master, 60000);
-        LOG.info("Master stopped!");
+        LOG.info("Master " + master + " stopped!");
         if (!master.equals(primary)) {
           util.getHBaseClusterInterface().killRegionServer(primary);
           util.getHBaseClusterInterface().waitForRegionServerToStop(primary, 
60000);

http://git-wip-us.apache.org/repos/asf/hbase/blob/77607e49/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java
----------------------------------------------------------------------
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java
index 15c8b6a..344b04f 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterNoCluster.java
@@ -262,7 +262,10 @@ public class TestMasterNoCluster {
       MasterMetaBootstrap createMetaBootstrap(final HMaster master, final 
MonitoredTask status) {
         return new MasterMetaBootstrap(this, status) {
           @Override
-          protected void assignMeta(int replicaId) { }
+          protected void assignMetaReplicas()
+              throws IOException, InterruptedException, KeeperException {
+            // Nothing to do.
+          }
         };
       }
 

Reply via email to