This is an automated email from the ASF dual-hosted git repository.
rmattingly pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2 by this push:
new 9a6ae5a3209 HBASE-29072 StochasticLoadBalancer#areReplicasColocated
ignores rack colocation (#6622) (#6640)
9a6ae5a3209 is described below
commit 9a6ae5a32098b0b1a2472f56f9eb507ec6c65586
Author: Ray Mattingly <[email protected]>
AuthorDate: Tue Jan 28 14:59:20 2025 -0500
HBASE-29072 StochasticLoadBalancer#areReplicasColocated ignores rack
colocation (#6622) (#6640)
Signed-off-by: Nick Dimiduk <[email protected]>
Co-authored-by: Ray Mattingly <[email protected]>
---
.../master/balancer/BalancerClusterState.java | 6 +++
.../master/balancer/StochasticLoadBalancer.java | 40 ++++++++++++++---
.../TestStochasticLoadBalancerRegionReplica.java | 50 ++++++++++++++++++----
3 files changed, 82 insertions(+), 14 deletions(-)
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
index 1b3e75bb26c..b857055fb3a 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
@@ -106,6 +106,7 @@ class BalancerClusterState {
int numRacks;
int numTables;
int numRegions;
+ int maxReplicas = 1;
int numMovedRegions = 0; // num moved regions from the initial configuration
Map<ServerName, List<RegionInfo>> clusterState;
@@ -432,6 +433,11 @@ class BalancerClusterState {
: serversToIndex.get(loc.get(i).getAddress()));
}
}
+
+ int numReplicas = region.getReplicaId() + 1;
+ if (numReplicas > maxReplicas) {
+ maxReplicas = numReplicas;
+ }
}
/**
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
index 46efb4d499d..017625a715e 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
@@ -376,10 +376,33 @@ public class StochasticLoadBalancer extends
BaseLoadBalancer {
}
}
- private boolean areSomeRegionReplicasColocated(BalancerClusterState c) {
- regionReplicaHostCostFunction.prepare(c);
- double cost = Math.abs(regionReplicaHostCostFunction.cost());
- return cost > CostFunction.getCostEpsilon(cost);
+ private boolean areSomeRegionReplicasColocatedOnHost(BalancerClusterState c)
{
+ if (c.numHosts >= c.maxReplicas) {
+ regionReplicaHostCostFunction.prepare(c);
+ double hostCost = Math.abs(regionReplicaHostCostFunction.cost());
+ boolean colocatedAtHost = hostCost >
CostFunction.getCostEpsilon(hostCost);
+ if (colocatedAtHost) {
+ return true;
+ }
+ LOG.trace("No host colocation detected with host cost={}", hostCost);
+ }
+ return false;
+ }
+
+ private boolean areSomeRegionReplicasColocatedOnRack(BalancerClusterState c)
{
+ if (c.numRacks >= c.maxReplicas) {
+ regionReplicaRackCostFunction.prepare(c);
+ double rackCost = Math.abs(regionReplicaRackCostFunction.cost());
+ boolean colocatedAtRack = rackCost >
CostFunction.getCostEpsilon(rackCost);
+ if (colocatedAtRack) {
+ return true;
+ }
+ LOG.trace("No rack colocation detected with rack cost={}", rackCost);
+ } else {
+ LOG.trace("Rack colocation is inevitable with fewer racks than replicas,
"
+ + "so we won't bother checking");
+ }
+ return false;
}
@RestrictedApi(explanation = "Should only be called in tests", link = "",
@@ -393,12 +416,19 @@ public class StochasticLoadBalancer extends
BaseLoadBalancer {
+ " < MIN_SERVER_BALANCE(" + MIN_SERVER_BALANCE + ")", null);
return false;
}
- if (areSomeRegionReplicasColocated(cluster)) {
+
+ if (areSomeRegionReplicasColocatedOnHost(cluster)) {
LOG.info("Running balancer because at least one server hosts replicas of
the same region."
+ " function cost={}", functionCost());
return true;
}
+ if (areSomeRegionReplicasColocatedOnRack(cluster)) {
+ LOG.info("Running balancer because at least one rack hosts replicas of
the same region."
+ + " function cost={}", functionCost());
+ return true;
+ }
+
if (idleRegionServerExist(cluster)) {
LOG.info("Running balancer because cluster has idle server(s)." + "
function cost={}",
functionCost());
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplica.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplica.java
index 714f5a9e2f6..a48faa69d4e 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplica.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplica.java
@@ -41,6 +41,8 @@ import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
+import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList;
+
@Category({ MasterTests.class, LargeTests.class })
public class TestStochasticLoadBalancerRegionReplica extends BalancerTestBase {
@@ -136,7 +138,7 @@ public class TestStochasticLoadBalancerRegionReplica
extends BalancerTestBase {
}
@Test
- public void testNeedsBalanceForColocatedReplicas() {
+ public void testNeedsBalanceForColocatedReplicasOnHost() {
// check for the case where there are two hosts and with one rack, and
where
// both the replicas are hosted on the same server
List<RegionInfo> regions = randomRegions(1);
@@ -148,20 +150,50 @@ public class TestStochasticLoadBalancerRegionReplica
extends BalancerTestBase {
// until the step above s1 holds two replicas of a region
regions = randomRegions(1);
map.put(s2, regions);
- assertTrue(loadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME,
- new BalancerClusterState(map, null, null, null)));
- // check for the case where there are two hosts on the same rack and there
are two racks
- // and both the replicas are on the same rack
- map.clear();
- regions = randomRegions(1);
+ BalancerClusterState cluster =
+ new BalancerClusterState(map, null, null, new ForTestRackManagerOne());
+ loadBalancer.initCosts(cluster);
+ assertTrue(loadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME,
cluster));
+ }
+
+ @Test
+ public void testNeedsBalanceForColocatedReplicasOnRack() {
+ // Three hosts, two racks, and two replicas for a region. This should be
balanced
+ List<RegionInfo> regions = randomRegions(1);
+ ServerName s1 = ServerName.valueOf("host1", 1000, 11111);
+ ServerName s2 = ServerName.valueOf("host11", 1000, 11111);
+ Map<ServerName, List<RegionInfo>> map = new HashMap<>();
List<RegionInfo> regionsOnS2 = new ArrayList<>(1);
regionsOnS2.add(RegionReplicaUtil.getRegionInfoForReplica(regions.get(0),
1));
map.put(s1, regions);
map.put(s2, regionsOnS2);
// add another server so that the cluster has some host on another rack
map.put(ServerName.valueOf("host2", 1000, 11111), randomRegions(1));
- assertFalse(loadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME,
- new BalancerClusterState(map, null, null, new ForTestRackManagerOne())));
+ BalancerClusterState cluster =
+ new BalancerClusterState(map, null, null, new ForTestRackManagerOne());
+ loadBalancer.initCosts(cluster);
+ assertTrue(loadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME,
cluster));
+ }
+
+ @Test
+ public void testNoNeededBalanceForColocatedReplicasTooFewRacks() {
+ // Three hosts, two racks, and three replicas for a region. This cannot be
balanced
+ List<RegionInfo> regions = randomRegions(1);
+ ServerName s1 = ServerName.valueOf("host1", 1000, 11111);
+ ServerName s2 = ServerName.valueOf("host11", 1000, 11111);
+ ServerName s3 = ServerName.valueOf("host2", 1000, 11111);
+ Map<ServerName, List<RegionInfo>> map = new HashMap<>();
+ List<RegionInfo> regionsOnS2 = new ArrayList<>(1);
+ regionsOnS2.add(RegionReplicaUtil.getRegionInfoForReplica(regions.get(0),
1));
+ map.put(s1, regions);
+ map.put(s2, regionsOnS2);
+ // there are 3 replicas for region 0, but only add a second rack
+ map.put(s3,
ImmutableList.of(RegionReplicaUtil.getRegionInfoForReplica(regions.get(0), 2)));
+ BalancerClusterState cluster =
+ new BalancerClusterState(map, null, null, new ForTestRackManagerOne());
+ loadBalancer.initCosts(cluster);
+ // Should be false because there aren't enough racks
+ assertFalse(loadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME,
cluster));
}
@Test