This is an automated email from the ASF dual-hosted git repository.

rmattingly pushed a commit to branch HBASE-29072-branch-2.6
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit db0798975677d6484696cabdb63d51f37dc850da
Author: Ray Mattingly <[email protected]>
AuthorDate: Tue Jan 28 14:59:20 2025 -0500

    HBASE-29072 StochasticLoadBalancer#areReplicasColocated ignores rack 
colocation (#6622) (#6640)
    
    Signed-off-by: Nick Dimiduk <[email protected]>
    Co-authored-by: Ray Mattingly <[email protected]>
---
 .../master/balancer/BalancerClusterState.java      |  6 +++
 .../master/balancer/StochasticLoadBalancer.java    | 40 ++++++++++++++---
 .../TestStochasticLoadBalancerRegionReplica.java   | 50 ++++++++++++++++++----
 3 files changed, 82 insertions(+), 14 deletions(-)

diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
index 1b3e75bb26c..b857055fb3a 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
@@ -106,6 +106,7 @@ class BalancerClusterState {
   int numRacks;
   int numTables;
   int numRegions;
+  int maxReplicas = 1;
 
   int numMovedRegions = 0; // num moved regions from the initial configuration
   Map<ServerName, List<RegionInfo>> clusterState;
@@ -432,6 +433,11 @@ class BalancerClusterState {
             : serversToIndex.get(loc.get(i).getAddress()));
       }
     }
+
+    int numReplicas = region.getReplicaId() + 1;
+    if (numReplicas > maxReplicas) {
+      maxReplicas = numReplicas;
+    }
   }
 
   /**
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
index 46efb4d499d..017625a715e 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
@@ -376,10 +376,33 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
     }
   }
 
-  private boolean areSomeRegionReplicasColocated(BalancerClusterState c) {
-    regionReplicaHostCostFunction.prepare(c);
-    double cost = Math.abs(regionReplicaHostCostFunction.cost());
-    return cost > CostFunction.getCostEpsilon(cost);
+  private boolean areSomeRegionReplicasColocatedOnHost(BalancerClusterState c) 
{
+    if (c.numHosts >= c.maxReplicas) {
+      regionReplicaHostCostFunction.prepare(c);
+      double hostCost = Math.abs(regionReplicaHostCostFunction.cost());
+      boolean colocatedAtHost = hostCost > 
CostFunction.getCostEpsilon(hostCost);
+      if (colocatedAtHost) {
+        return true;
+      }
+      LOG.trace("No host colocation detected with host cost={}", hostCost);
+    }
+    return false;
+  }
+
+  private boolean areSomeRegionReplicasColocatedOnRack(BalancerClusterState c) 
{
+    if (c.numRacks >= c.maxReplicas) {
+      regionReplicaRackCostFunction.prepare(c);
+      double rackCost = Math.abs(regionReplicaRackCostFunction.cost());
+      boolean colocatedAtRack = rackCost > 
CostFunction.getCostEpsilon(rackCost);
+      if (colocatedAtRack) {
+        return true;
+      }
+      LOG.trace("No rack colocation detected with rack cost={}", rackCost);
+    } else {
+      LOG.trace("Rack colocation is inevitable with fewer racks than replicas, 
"
+        + "so we won't bother checking");
+    }
+    return false;
   }
 
   @RestrictedApi(explanation = "Should only be called in tests", link = "",
@@ -393,12 +416,19 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
         + " < MIN_SERVER_BALANCE(" + MIN_SERVER_BALANCE + ")", null);
       return false;
     }
-    if (areSomeRegionReplicasColocated(cluster)) {
+
+    if (areSomeRegionReplicasColocatedOnHost(cluster)) {
       LOG.info("Running balancer because at least one server hosts replicas of 
the same region."
         + " function cost={}", functionCost());
       return true;
     }
 
+    if (areSomeRegionReplicasColocatedOnRack(cluster)) {
+      LOG.info("Running balancer because at least one rack hosts replicas of 
the same region."
+        + " function cost={}", functionCost());
+      return true;
+    }
+
     if (idleRegionServerExist(cluster)) {
       LOG.info("Running balancer because cluster has idle server(s)." + " 
function cost={}",
         functionCost());
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplica.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplica.java
index 714f5a9e2f6..a48faa69d4e 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplica.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplica.java
@@ -41,6 +41,8 @@ import org.junit.ClassRule;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
+import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList;
+
 @Category({ MasterTests.class, LargeTests.class })
 public class TestStochasticLoadBalancerRegionReplica extends BalancerTestBase {
 
@@ -136,7 +138,7 @@ public class TestStochasticLoadBalancerRegionReplica 
extends BalancerTestBase {
   }
 
   @Test
-  public void testNeedsBalanceForColocatedReplicas() {
+  public void testNeedsBalanceForColocatedReplicasOnHost() {
     // check for the case where there are two hosts and with one rack, and 
where
     // both the replicas are hosted on the same server
     List<RegionInfo> regions = randomRegions(1);
@@ -148,20 +150,50 @@ public class TestStochasticLoadBalancerRegionReplica 
extends BalancerTestBase {
     // until the step above s1 holds two replicas of a region
     regions = randomRegions(1);
     map.put(s2, regions);
-    assertTrue(loadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME,
-      new BalancerClusterState(map, null, null, null)));
-    // check for the case where there are two hosts on the same rack and there 
are two racks
-    // and both the replicas are on the same rack
-    map.clear();
-    regions = randomRegions(1);
+    BalancerClusterState cluster =
+      new BalancerClusterState(map, null, null, new ForTestRackManagerOne());
+    loadBalancer.initCosts(cluster);
+    assertTrue(loadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME, 
cluster));
+  }
+
+  @Test
+  public void testNeedsBalanceForColocatedReplicasOnRack() {
+    // Three hosts, two racks, and two replicas for a region. This should be 
balanced
+    List<RegionInfo> regions = randomRegions(1);
+    ServerName s1 = ServerName.valueOf("host1", 1000, 11111);
+    ServerName s2 = ServerName.valueOf("host11", 1000, 11111);
+    Map<ServerName, List<RegionInfo>> map = new HashMap<>();
     List<RegionInfo> regionsOnS2 = new ArrayList<>(1);
     regionsOnS2.add(RegionReplicaUtil.getRegionInfoForReplica(regions.get(0), 
1));
     map.put(s1, regions);
     map.put(s2, regionsOnS2);
     // add another server so that the cluster has some host on another rack
     map.put(ServerName.valueOf("host2", 1000, 11111), randomRegions(1));
-    assertFalse(loadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME,
-      new BalancerClusterState(map, null, null, new ForTestRackManagerOne())));
+    BalancerClusterState cluster =
+      new BalancerClusterState(map, null, null, new ForTestRackManagerOne());
+    loadBalancer.initCosts(cluster);
+    assertTrue(loadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME, 
cluster));
+  }
+
+  @Test
+  public void testNoNeededBalanceForColocatedReplicasTooFewRacks() {
+    // Three hosts, two racks, and three replicas for a region. This cannot be 
balanced
+    List<RegionInfo> regions = randomRegions(1);
+    ServerName s1 = ServerName.valueOf("host1", 1000, 11111);
+    ServerName s2 = ServerName.valueOf("host11", 1000, 11111);
+    ServerName s3 = ServerName.valueOf("host2", 1000, 11111);
+    Map<ServerName, List<RegionInfo>> map = new HashMap<>();
+    List<RegionInfo> regionsOnS2 = new ArrayList<>(1);
+    regionsOnS2.add(RegionReplicaUtil.getRegionInfoForReplica(regions.get(0), 
1));
+    map.put(s1, regions);
+    map.put(s2, regionsOnS2);
+    // there are 3 replicas for region 0, but only add a second rack
+    map.put(s3, 
ImmutableList.of(RegionReplicaUtil.getRegionInfoForReplica(regions.get(0), 2)));
+    BalancerClusterState cluster =
+      new BalancerClusterState(map, null, null, new ForTestRackManagerOne());
+    loadBalancer.initCosts(cluster);
+    // Should be false because there aren't enough racks
+    assertFalse(loadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME, 
cluster));
   }
 
   @Test

Reply via email to