This is an automated email from the ASF dual-hosted git repository.
rmattingly pushed a commit to branch branch-2.6
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2.6 by this push:
new 9540d49961f HBASE-29072 StochasticLoadBalancer#areReplicasColocated
ignores rack colocation (#6622) (#6640) (#6644)
9540d49961f is described below
commit 9540d49961fc47815f79e6813093d7f475ae660d
Author: Ray Mattingly <[email protected]>
AuthorDate: Fri Jan 31 18:25:26 2025 -0500
HBASE-29072 StochasticLoadBalancer#areReplicasColocated ignores rack
colocation (#6622) (#6640) (#6644)
Signed-off-by: Nick Dimiduk <[email protected]>
Co-authored-by: Ray Mattingly <[email protected]>
---
.../master/balancer/BalancerClusterState.java | 6 +++
.../master/balancer/StochasticLoadBalancer.java | 40 ++++++++++++++---
.../TestStochasticLoadBalancerRegionReplica.java | 50 ++++++++++++++++++----
3 files changed, 82 insertions(+), 14 deletions(-)
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
index 1b3e75bb26c..b857055fb3a 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
@@ -106,6 +106,7 @@ class BalancerClusterState {
int numRacks;
int numTables;
int numRegions;
+ int maxReplicas = 1;
int numMovedRegions = 0; // num moved regions from the initial configuration
Map<ServerName, List<RegionInfo>> clusterState;
@@ -432,6 +433,11 @@ class BalancerClusterState {
: serversToIndex.get(loc.get(i).getAddress()));
}
}
+
+ int numReplicas = region.getReplicaId() + 1;
+ if (numReplicas > maxReplicas) {
+ maxReplicas = numReplicas;
+ }
}
/**
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
index 46efb4d499d..017625a715e 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
@@ -376,10 +376,33 @@ public class StochasticLoadBalancer extends
BaseLoadBalancer {
}
}
- private boolean areSomeRegionReplicasColocated(BalancerClusterState c) {
- regionReplicaHostCostFunction.prepare(c);
- double cost = Math.abs(regionReplicaHostCostFunction.cost());
- return cost > CostFunction.getCostEpsilon(cost);
+ private boolean areSomeRegionReplicasColocatedOnHost(BalancerClusterState c)
{
+ if (c.numHosts >= c.maxReplicas) {
+ regionReplicaHostCostFunction.prepare(c);
+ double hostCost = Math.abs(regionReplicaHostCostFunction.cost());
+ boolean colocatedAtHost = hostCost >
CostFunction.getCostEpsilon(hostCost);
+ if (colocatedAtHost) {
+ return true;
+ }
+ LOG.trace("No host colocation detected with host cost={}", hostCost);
+ }
+ return false;
+ }
+
+ private boolean areSomeRegionReplicasColocatedOnRack(BalancerClusterState c)
{
+ if (c.numRacks >= c.maxReplicas) {
+ regionReplicaRackCostFunction.prepare(c);
+ double rackCost = Math.abs(regionReplicaRackCostFunction.cost());
+ boolean colocatedAtRack = rackCost >
CostFunction.getCostEpsilon(rackCost);
+ if (colocatedAtRack) {
+ return true;
+ }
+ LOG.trace("No rack colocation detected with rack cost={}", rackCost);
+ } else {
+ LOG.trace("Rack colocation is inevitable with fewer racks than replicas,
"
+ + "so we won't bother checking");
+ }
+ return false;
}
@RestrictedApi(explanation = "Should only be called in tests", link = "",
@@ -393,12 +416,19 @@ public class StochasticLoadBalancer extends
BaseLoadBalancer {
+ " < MIN_SERVER_BALANCE(" + MIN_SERVER_BALANCE + ")", null);
return false;
}
- if (areSomeRegionReplicasColocated(cluster)) {
+
+ if (areSomeRegionReplicasColocatedOnHost(cluster)) {
LOG.info("Running balancer because at least one server hosts replicas of
the same region."
+ " function cost={}", functionCost());
return true;
}
+ if (areSomeRegionReplicasColocatedOnRack(cluster)) {
+ LOG.info("Running balancer because at least one rack hosts replicas of
the same region."
+ + " function cost={}", functionCost());
+ return true;
+ }
+
if (idleRegionServerExist(cluster)) {
LOG.info("Running balancer because cluster has idle server(s)." + "
function cost={}",
functionCost());
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplica.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplica.java
index 714f5a9e2f6..a48faa69d4e 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplica.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStochasticLoadBalancerRegionReplica.java
@@ -41,6 +41,8 @@ import org.junit.ClassRule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
+import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableList;
+
@Category({ MasterTests.class, LargeTests.class })
public class TestStochasticLoadBalancerRegionReplica extends BalancerTestBase {
@@ -136,7 +138,7 @@ public class TestStochasticLoadBalancerRegionReplica
extends BalancerTestBase {
}
@Test
- public void testNeedsBalanceForColocatedReplicas() {
+ public void testNeedsBalanceForColocatedReplicasOnHost() {
// check for the case where there are two hosts and with one rack, and
where
// both the replicas are hosted on the same server
List<RegionInfo> regions = randomRegions(1);
@@ -148,20 +150,50 @@ public class TestStochasticLoadBalancerRegionReplica
extends BalancerTestBase {
// until the step above s1 holds two replicas of a region
regions = randomRegions(1);
map.put(s2, regions);
- assertTrue(loadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME,
- new BalancerClusterState(map, null, null, null)));
- // check for the case where there are two hosts on the same rack and there
are two racks
- // and both the replicas are on the same rack
- map.clear();
- regions = randomRegions(1);
+ BalancerClusterState cluster =
+ new BalancerClusterState(map, null, null, new ForTestRackManagerOne());
+ loadBalancer.initCosts(cluster);
+ assertTrue(loadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME,
cluster));
+ }
+
+ @Test
+ public void testNeedsBalanceForColocatedReplicasOnRack() {
+ // Three hosts, two racks, and two replicas for a region. This should be
balanced
+ List<RegionInfo> regions = randomRegions(1);
+ ServerName s1 = ServerName.valueOf("host1", 1000, 11111);
+ ServerName s2 = ServerName.valueOf("host11", 1000, 11111);
+ Map<ServerName, List<RegionInfo>> map = new HashMap<>();
List<RegionInfo> regionsOnS2 = new ArrayList<>(1);
regionsOnS2.add(RegionReplicaUtil.getRegionInfoForReplica(regions.get(0),
1));
map.put(s1, regions);
map.put(s2, regionsOnS2);
// add another server so that the cluster has some host on another rack
map.put(ServerName.valueOf("host2", 1000, 11111), randomRegions(1));
- assertFalse(loadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME,
- new BalancerClusterState(map, null, null, new ForTestRackManagerOne())));
+ BalancerClusterState cluster =
+ new BalancerClusterState(map, null, null, new ForTestRackManagerOne());
+ loadBalancer.initCosts(cluster);
+ assertTrue(loadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME,
cluster));
+ }
+
+ @Test
+ public void testNoNeededBalanceForColocatedReplicasTooFewRacks() {
+ // Three hosts, two racks, and three replicas for a region. This cannot be
balanced
+ List<RegionInfo> regions = randomRegions(1);
+ ServerName s1 = ServerName.valueOf("host1", 1000, 11111);
+ ServerName s2 = ServerName.valueOf("host11", 1000, 11111);
+ ServerName s3 = ServerName.valueOf("host2", 1000, 11111);
+ Map<ServerName, List<RegionInfo>> map = new HashMap<>();
+ List<RegionInfo> regionsOnS2 = new ArrayList<>(1);
+ regionsOnS2.add(RegionReplicaUtil.getRegionInfoForReplica(regions.get(0),
1));
+ map.put(s1, regions);
+ map.put(s2, regionsOnS2);
+ // there are 3 replicas for region 0, but only add a second rack
+ map.put(s3,
ImmutableList.of(RegionReplicaUtil.getRegionInfoForReplica(regions.get(0), 2)));
+ BalancerClusterState cluster =
+ new BalancerClusterState(map, null, null, new ForTestRackManagerOne());
+ loadBalancer.initCosts(cluster);
+ // Should be false because there aren't enough racks
+ assertFalse(loadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME,
cluster));
}
@Test