HBASE-21373 (backport from HBASE-21338) Warn if balancer is an ill-fit for cluster size
Signed-off-by: Andrew Purtell <apurt...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/hbase/repo Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/4d81cfe7 Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/4d81cfe7 Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/4d81cfe7 Branch: refs/heads/branch-1.4 Commit: 4d81cfe7e209623cd3057db77816e6641ac1383e Parents: 745cc7a Author: xcang <xc...@salesforce.com> Authored: Fri Oct 26 20:55:22 2018 -0700 Committer: Andrew Purtell <apurt...@apache.org> Committed: Thu Nov 8 10:58:11 2018 -0800 ---------------------------------------------------------------------- .../master/balancer/StochasticLoadBalancer.java | 28 +++++++++++++++++--- 1 file changed, 24 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hbase/blob/4d81cfe7/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java ---------------------------------------------------------------------- diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java index 374070c..2cfe2dc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java @@ -108,6 +108,8 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { "hbase.master.balancer.stochastic.stepsPerRegion"; protected static final String MAX_STEPS_KEY = "hbase.master.balancer.stochastic.maxSteps"; + protected static final String RUN_MAX_STEPS_KEY = + "hbase.master.balancer.stochastic.runMaxSteps"; protected static final String MAX_RUNNING_TIME_KEY = "hbase.master.balancer.stochastic.maxRunningTime"; protected static final String KEEP_REGION_LOADS = @@ -123,6 +125,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { // values are defaults private int maxSteps = 1000000; + private boolean runMaxSteps = false; private int stepsPerRegion = 800; private long maxRunningTime = 30 * 1000 * 1; // 30 seconds. private int numRegionLoadsToRemember = 15; @@ -169,6 +172,7 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { stepsPerRegion = conf.getInt(STEPS_PER_REGION_KEY, stepsPerRegion); maxRunningTime = conf.getLong(MAX_RUNNING_TIME_KEY, maxRunningTime); + runMaxSteps = conf.getBoolean(RUN_MAX_STEPS_KEY, runMaxSteps); numRegionLoadsToRemember = conf.getInt(KEEP_REGION_LOADS, numRegionLoadsToRemember); isByTable = conf.getBoolean(HConstants.HBASE_MASTER_LOADBALANCE_BYTABLE, isByTable); @@ -371,14 +375,30 @@ public class StochasticLoadBalancer extends BaseLoadBalancer { for (int i = 0; i < this.curFunctionCosts.length; i++) { curFunctionCosts[i] = tempFunctionCosts[i]; } - LOG.info("start StochasticLoadBalancer.balancer, initCost=" + currentCost + ", functionCost=" - + functionCost()); double initCost = currentCost; double newCost = currentCost; - long computedMaxSteps = Math.min(this.maxSteps, - ((long)cluster.numRegions * (long)this.stepsPerRegion * (long)cluster.numServers)); + long computedMaxSteps = 0; + if (runMaxSteps) { + computedMaxSteps = Math.max(this.maxSteps, + ((long)cluster.numRegions * (long)this.stepsPerRegion * (long)cluster.numServers)); + } else { + long calculatedMaxSteps = + (long) cluster.numRegions * (long) this.stepsPerRegion * (long) cluster.numServers; + computedMaxSteps = Math.min(this.maxSteps, calculatedMaxSteps); + if (calculatedMaxSteps > maxSteps) { + LOG.warn(String.format("calculatedMaxSteps:%d for loadbalancer's stochastic walk is larger " + + "than maxSteps:%dÃ. Hence load balancing may not work well. Setting parameter " + + "\"hbase.master.balancer.stochastic.runMaxSteps\" to true to overcome this issue." + + "(This config change does not require service restart)", calculatedMaxSteps, + maxRunningTime)); + + } + } + LOG.info("start StochasticLoadBalancer.balancer, initCost=" + currentCost + ", functionCost=" + + functionCost() + " computedMaxSteps: " + computedMaxSteps); + // Perform a stochastic walk to see if we can get a good fit. long step;