This is an automated email from the ASF dual-hosted git repository.

stack pushed a commit to branch branch-2.3
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.3 by this push:
     new 7e7b730  HBASE-25973 Balancer should explain progress in a better way 
in log -… (#3484)
7e7b730 is described below

commit 7e7b73024279676b42be059001b2115776e07576
Author: clarax <clarax98...@gmail.com>
AuthorDate: Fri Jul 16 15:23:09 2021 -0700

    HBASE-25973 Balancer should explain progress in a better way in log -… 
(#3484)
    
    
    Signed-off-by: stack <st...@apache.org>
---
 .../org/apache/hadoop/hbase/master/HMaster.java    |   2 +
 .../hbase/master/balancer/BaseLoadBalancer.java    |   2 +
 .../master/balancer/StochasticLoadBalancer.java    | 115 ++++++++++++++-------
 .../TestStochasticLoadBalancerBalanceCluster.java  |   2 +-
 4 files changed, 84 insertions(+), 37 deletions(-)

diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
index 85265ff..228e45c 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@@ -1966,6 +1966,8 @@ public class HMaster extends HRegionServer implements 
MasterServices {
         }
       }
     }
+    LOG.info("Balancer is going into sleep until next period in {}ms", 
getConfiguration()
+      .getInt(HConstants.HBASE_BALANCER_PERIOD, 
HConstants.DEFAULT_HBASE_BALANCER_PERIOD));
     return successRegionPlans;
   }
 
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
index ee5e907..9a05a33 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
@@ -75,6 +75,8 @@ import org.slf4j.LoggerFactory;
  *
  */
 @InterfaceAudience.Private
+@edu.umd.cs.findbugs.annotations.SuppressWarnings(value="IS2_INCONSISTENT_SYNC",
+  justification="Complaint is about isByTable not being synchronized; we don't 
modify often")
 public abstract class BaseLoadBalancer implements LoadBalancer {
   protected static final int MIN_SERVER_BALANCE = 2;
   private volatile boolean stopped = false;
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
index a7ccc41..6b86374 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
@@ -145,7 +145,8 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
 
   private List<CandidateGenerator> candidateGenerators;
   private List<CostFunction> costFunctions; // FindBugs: Wants this protected; 
IS2_INCONSISTENT_SYNC
-
+  // To save currently configed sum of multiplier. Defaulted at 1 for cases 
that carry high cost
+  private float sumMultiplier = 1.0f;
   // to save and report costs to JMX
   private double curOverallCost = 0d;
   private double[] tempFunctionCosts;
@@ -197,7 +198,6 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
     }
     regionReplicaHostCostFunction = new RegionReplicaHostCostFunction(conf);
     regionReplicaRackCostFunction = new RegionReplicaRackCostFunction(conf);
-
     costFunctions = new ArrayList<>();
     addCostFunction(new RegionCountSkewCostFunction(conf));
     addCostFunction(new PrimaryRegionCountSkewCostFunction(conf));
@@ -311,46 +311,61 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
   protected boolean needsBalance(TableName tableName, Cluster cluster) {
     ClusterLoadState cs = new ClusterLoadState(cluster.clusterState);
     if (cs.getNumServers() < MIN_SERVER_BALANCE) {
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("Not running balancer because only " + cs.getNumServers()
-            + " active regionserver(s)");
-      }
+      LOG.info("Not running balancer because only " + cs.getNumServers() +
+        " active regionserver(s)");
       return false;
     }
     if (areSomeRegionReplicasColocated(cluster)) {
+      LOG.info("Running balancer because at least one server hosts replicas of 
the same region.");
       return true;
     }
 
     if (idleRegionServerExist(cluster)){
+      LOG.info("Running balancer because cluster has idle server(s).");
       return true;
     }
 
+    sumMultiplier = 0.0f;
     double total = 0.0;
-    float sumMultiplier = 0.0f;
     for (CostFunction c : costFunctions) {
       float multiplier = c.getMultiplier();
-      if (multiplier <= 0) {
-        LOG.trace("{} not needed because multiplier is <= 0", 
c.getClass().getSimpleName());
-        continue;
-      }
+      double cost = c.cost();
       if (!c.isNeeded()) {
         LOG.trace("{} not needed", c.getClass().getSimpleName());
         continue;
       }
+      total += cost * multiplier;
       sumMultiplier += multiplier;
-      total += c.cost() * multiplier;
+    }
+    if (sumMultiplier <= 0) {
+      LOG.error("At least one cost function needs a multiplier > 0. For 
example, set "
+        + "hbase.master.balancer.stochastic.regionCountCost to a positive 
value or default");
+      return false;
     }
 
-    boolean balanced = total <= 0 || sumMultiplier <= 0 ||
-        (sumMultiplier > 0 && (total / sumMultiplier) < minCostNeedBalance);
-    if (LOG.isDebugEnabled()) {
-      LOG.debug("{} {}; total cost={}, sum multiplier={}; cost/multiplier to 
need a balance is {}",
-          balanced ? "Skipping load balancing because balanced" : "We need to 
load balance",
-          isByTable ? String.format("table (%s)", tableName) : "cluster",
-          total, sumMultiplier, minCostNeedBalance);
-      if (LOG.isTraceEnabled()) {
-        LOG.trace("Balance decision detailed function costs={}", 
functionCost());
+    boolean balanced = (total / sumMultiplier < minCostNeedBalance);
+    if (balanced) {
+      String reason = "";
+      if (total <= 0) {
+        reason = 
"(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern) = " + total +
+          " <= 0";
+      } else if (sumMultiplier <= 0) {
+        reason = "sumMultiplier = " + sumMultiplier + " <= 0";
+      } else if ((total / sumMultiplier) < minCostNeedBalance) {
+        reason =
+          
"[(cost1*multiplier1)+(cost2*multiplier2)+...+(costn*multipliern)]/sumMultiplier
 = " + (
+            total / sumMultiplier) + " <= minCostNeedBalance(" + 
minCostNeedBalance + ")";
       }
+
+      LOG.info("{} - skipping load balancing because weighted average 
imbalance={} <= " +
+          "threshold({}). If you want more aggressive balancing, either lower 
" +
+          "hbase.master.balancer.stochastic.minCostNeedBalance from {} or 
increase the relative" +
+          " multiplier(s) of the specific cost function(s). functionCost={}",
+        isByTable ? "Table specific (" + tableName + ")" : "Cluster wide", 
total / sumMultiplier,
+        minCostNeedBalance, minCostNeedBalance, functionCost());
+    } else {
+      LOG.info("{} - Calculating plan. may take up to {}ms to complete.",
+        isByTable ? "Table specific ("+tableName+")" : "Cluster wide", 
maxRunningTime);
     }
     return !balanced;
   }
@@ -426,8 +441,9 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
             maxSteps);
       }
     }
-    LOG.info("start StochasticLoadBalancer.balancer, initCost=" + currentCost 
+ ", functionCost="
-        + functionCost() + " computedMaxSteps: " + computedMaxSteps);
+    LOG.info("Start StochasticLoadBalancer.balancer, initial weighted average 
imbalance={}, "
+      + "functionCost={} computedMaxSteps={}",
+      currentCost / sumMultiplier, functionCost(), computedMaxSteps);
 
     // Perform a stochastic walk to see if we can get a good fit.
     long step;
@@ -472,16 +488,16 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
     updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
     if (initCost > currentCost) {
       plans = createRegionPlans(cluster);
-      LOG.info("Finished computing new load balance plan. Computation took {}" 
+
-        " to try {} different iterations.  Found a solution that moves " +
-        "{} regions; Going from a computed cost of {}" +
-        " to a new cost of {}", java.time.Duration.ofMillis(endTime - 
startTime),
-        step, plans.size(), initCost, currentCost);
+      LOG.info("Finished computing new moving plan. Computation took {} ms" +
+          " to try {} different iterations.  Found a solution that moves " +
+          "{} regions; Going from a computed imbalance of {}" + " to a new 
imbalance of {}. ",
+        endTime - startTime, step, plans.size(), initCost / sumMultiplier,
+        currentCost / sumMultiplier);
       return plans;
     }
-    LOG.info("Could not find a better load balance plan.  Tried {} different 
configurations in " +
-      "{}, and did not find anything with a computed cost less than {}", step,
-      java.time.Duration.ofMillis(endTime - startTime), initCost);
+    LOG.info("Could not find a better moving plan.  Tried {} different 
configurations in " +
+        "{} ms, and did not find anything with an imbalance score less than 
{}", step,
+      endTime - startTime, initCost / sumMultiplier);
     return null;
   }
 
@@ -513,7 +529,8 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
   }
 
   private void addCostFunction(CostFunction costFunction) {
-    if (costFunction.getMultiplier() > 0) {
+    float multiplier = costFunction.getMultiplier();
+    if (multiplier > 0) {
       costFunctions.add(costFunction);
     }
   }
@@ -524,9 +541,13 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
       builder.append(c.getClass().getSimpleName());
       builder.append(" : (");
       if (c.isNeeded()) {
-        builder.append(c.getMultiplier());
+        builder.append("multiplier=" + c.getMultiplier());
         builder.append(", ");
-        builder.append(c.cost());
+        double cost = c.cost();
+        builder.append("imbalance=" + cost);
+        if (cost < minCostNeedBalance) {
+          builder.append(", balanced");
+        }
       } else {
         builder.append("not needed");
       }
@@ -535,6 +556,28 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
     return builder.toString();
   }
 
+
+  private String totalCostsPerFunc() {
+    StringBuilder builder = new StringBuilder();
+    for (CostFunction c : costFunctions) {
+      if (!c.isNeeded()) {
+        continue;
+      }
+      double cost = c.getMultiplier() * c.cost();
+      if (cost > 0.0) {
+        builder.append(" ");
+        builder.append(c.getClass().getSimpleName());
+        builder.append(" : ");
+        builder.append(cost);
+        builder.append(";");
+      }
+    }
+    if (builder.length() > 0) {
+      builder.deleteCharAt(builder.length() - 1);
+    }
+    return builder.toString();
+  }
+
   /**
    * Create all of the RegionPlan's needed to move from the initial cluster 
state to the desired
    * state.
@@ -601,7 +644,7 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
     allowedOnPath = ".*(/src/test/.*|StochasticLoadBalancer).java")
   void updateCostsWithAction(Cluster cluster, Action action) {
     for (CostFunction c : costFunctions) {
-      if (c.getMultiplier() > 0 && c.isNeeded()) {
+      if (c.isNeeded()) {
         c.postAction(action);
       }
     }
@@ -640,7 +683,7 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
       CostFunction c = costFunctions.get(i);
       this.tempFunctionCosts[i] = 0.0;
 
-      if (c.getMultiplier() <= 0 || !c.isNeeded()) {
+      if (!c.isNeeded()) {
         continue;
       }
 
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerBalanceCluster.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerBalanceCluster.java
index be7eecc..c952149 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerBalanceCluster.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerBalanceCluster.java
@@ -51,7 +51,7 @@ public class TestStochasticLoadBalancerBalanceCluster extends 
BalancerTestBase {
    */
   @Test
   public void testBalanceCluster() throws Exception {
-    conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 3 * 60 * 
1000); // 800 sec
+    conf.setLong("hbase.master.balancer.stochastic.maxRunningTime", 3 * 60 * 
1000); // 3 min
     conf.setFloat("hbase.master.balancer.stochastic.maxMovePercent", 1.0f);
     conf.setLong(StochasticLoadBalancer.MAX_STEPS_KEY, 20000000L);
     loadBalancer.setConf(conf);

Reply via email to