Repository: hbase Updated Branches: refs/heads/branch-1 416f28356 -> b892be744 refs/heads/branch-1.3 f6d8a29a3 -> b62d12ffc refs/heads/branch-1.4 f3accdcfc -> 0e21e9bcb refs/heads/branch-2 db8789ab2 -> d1cbd561d refs/heads/branch-2.0 e3deb9156 -> 16f8aac60 refs/heads/master 554d513f5 -> 1fbce10ff
HBASE-20548 Master fails to startup on large clusters, refreshing block distribution Signed-off-by: Andrew Purtell <apurt...@apache.org> Conflicts: hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/b62d12ff Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/b62d12ff Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/b62d12ff Branch: refs/heads/branch-1.3 Commit: b62d12ffcb5a70ed60c577abe8722153a642d01f Parents: f6d8a29 Author: Thiruvel Thirumoolan <thiru...@oath.com> Authored: Tue May 22 18:28:31 2018 -0700 Committer: Andrew Purtell <apurt...@apache.org> Committed: Thu May 24 12:26:41 2018 -0700 ---------------------------------------------------------------------- .../org/apache/hadoop/hbase/master/HMaster.java | 11 +++++++++ .../hadoop/hbase/master/LoadBalancer.java | 5 ++++ .../hbase/master/balancer/BaseLoadBalancer.java | 26 +++++++++++++------- 3 files changed, 33 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/b62d12ff/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 54a0b74..29acb26 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -894,6 +894,17 @@ public class HMaster extends HRegionServer implements MasterServices, Server { } zombieDetector.interrupt(); + + /* + * After master has started up, lets do balancer post startup initialization. Since this runs + * in activeMasterManager thread, it should be fine. + */ + long start = System.currentTimeMillis(); + this.balancer.postMasterStartupInitialize(); + if (LOG.isDebugEnabled()) { + LOG.debug("Balancer post startup initialization complete, took " + ( + (System.currentTimeMillis() - start) / 1000) + " seconds"); + } } private void initQuotaManager() throws IOException { http://git-wip-us.apache.org/repos/asf/hbase/blob/b62d12ff/hbase-server/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java index c581b08..1642af1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/LoadBalancer.java @@ -149,4 +149,9 @@ public interface LoadBalancer extends Configurable, Stoppable, ConfigurationObse * @param conf */ void onConfigurationChange(Configuration conf); + + /** + * If balancer needs to do initialization after Master has started up, lets do that here. + */ + void postMasterStartupInitialize(); } http://git-wip-us.apache.org/repos/asf/hbase/blob/b62d12ff/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java index 3e00080..e68c925 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java @@ -1126,6 +1126,19 @@ public abstract class BaseLoadBalancer implements LoadBalancer { this.regionFinder.setServices(masterServices); } + @Override + public void postMasterStartupInitialize() { + if (services != null && regionFinder != null) { + try { + Set<HRegionInfo> regions = + services.getAssignmentManager().getRegionStates().getRegionAssignments().keySet(); + regionFinder.refreshAndWait(regions); + } catch (Exception e) { + LOG.warn("Refreshing region HDFS Block dist failed with exception, ignoring", e); + } + } + } + public void setRackManager(RackManager rackManager) { this.rackManager = rackManager; } @@ -1226,7 +1239,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { return assignments; } - Cluster cluster = createCluster(servers, regions, false); + Cluster cluster = createCluster(servers, regions); List<HRegionInfo> unassignedRegions = new ArrayList<HRegionInfo>(); roundRobinAssignment(cluster, regions, unassignedRegions, @@ -1272,11 +1285,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { return assignments; } - protected Cluster createCluster(List<ServerName> servers, - Collection<HRegionInfo> regions, boolean forceRefresh) { - if (forceRefresh) { - regionFinder.refreshAndWait(regions); - } + protected Cluster createCluster(List<ServerName> servers, Collection<HRegionInfo> regions) { // Get the snapshot of the current assignments for the regions in question, and then create // a cluster out of it. Note that we might have replicas already assigned to some servers // earlier. So we want to get the snapshot to see those assignments, but this will only contain @@ -1350,7 +1359,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { } List<HRegionInfo> regions = Lists.newArrayList(regionInfo); - Cluster cluster = createCluster(servers, regions, false); + Cluster cluster = createCluster(servers, regions); return randomAssignment(cluster, regionInfo, servers); } @@ -1428,8 +1437,6 @@ public abstract class BaseLoadBalancer implements LoadBalancer { int numRandomAssignments = 0; int numRetainedAssigments = 0; - Cluster cluster = createCluster(servers, regions.keySet(), true); - for (Map.Entry<HRegionInfo, ServerName> entry : regions.entrySet()) { HRegionInfo region = entry.getKey(); ServerName oldServerName = entry.getValue(); @@ -1472,6 +1479,7 @@ public abstract class BaseLoadBalancer implements LoadBalancer { // If servers from prior assignment aren't present, then lets do randomAssignment on regions. if (randomAssignRegions.size() > 0) { + Cluster cluster = createCluster(servers, regions.keySet()); for (Map.Entry<ServerName, List<HRegionInfo>> entry : assignments.entrySet()) { ServerName sn = entry.getKey(); for (HRegionInfo region : entry.getValue()) {