This is an automated email from the ASF dual-hosted git repository.
rmattingly pushed a commit to branch branch-3
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-3 by this push:
new 0d37302c1fd HBASE-29202 Balancer conditionals make balancer actions
more likely to be approved (#6821) (#6836)
0d37302c1fd is described below
commit 0d37302c1fdda09c7ed8ece6722cd274cf1825cb
Author: Ray Mattingly <[email protected]>
AuthorDate: Fri Mar 21 17:56:02 2025 -0400
HBASE-29202 Balancer conditionals make balancer actions more likely to be
approved (#6821) (#6836)
Signed-off-by: Nick Dimiduk <[email protected]>
Co-authored-by: Ray Mattingly <[email protected]>
---
.../master/balancer/BalancerConditionals.java | 2 +-
.../balancer/CandidateGeneratorTestUtil.java | 32 ++++++---
...ancingTableIsolationAndReplicaDistribution.java | 7 +-
....java => TestUnattainableBalancerCostGoal.java} | 83 +++++++++-------------
4 files changed, 60 insertions(+), 64 deletions(-)
diff --git
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java
index bbd85e5c1ef..7e5bd98a013 100644
---
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java
+++
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java
@@ -145,7 +145,7 @@ final class BalancerConditionals implements Configurable {
// Reset cluster
cluster.doAction(undoAction);
- if (isViolatingPre && isViolatingPost) {
+ if (isViolatingPre == isViolatingPost) {
return 0;
} else if (!isViolatingPre && isViolatingPost) {
return 1;
diff --git
a/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java
b/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java
index b7e7b5fa844..870c97a1f49 100644
---
a/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java
+++
b/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java
@@ -46,16 +46,22 @@ public final class CandidateGeneratorTestUtil {
private CandidateGeneratorTestUtil() {
}
+ enum ExhaustionType {
+ COST_GOAL_ACHIEVED,
+ NO_MORE_MOVES;
+ }
+
static void runBalancerToExhaustion(Configuration conf,
Map<ServerName, List<RegionInfo>> serverToRegions,
Set<Function<BalancerClusterState, Boolean>> expectations, float
targetMaxBalancerCost) {
- runBalancerToExhaustion(conf, serverToRegions, expectations,
targetMaxBalancerCost, 15000);
+ runBalancerToExhaustion(conf, serverToRegions, expectations,
targetMaxBalancerCost, 15000,
+ ExhaustionType.COST_GOAL_ACHIEVED);
}
static void runBalancerToExhaustion(Configuration conf,
Map<ServerName, List<RegionInfo>> serverToRegions,
Set<Function<BalancerClusterState, Boolean>> expectations, float
targetMaxBalancerCost,
- long maxRunningTime) {
+ long maxRunningTime, ExhaustionType exhaustionType) {
// Do the full plan. We're testing with a lot of regions
conf.setBoolean("hbase.master.balancer.stochastic.runMaxSteps", true);
conf.setLong(MAX_RUNNING_TIME_KEY, maxRunningTime);
@@ -71,7 +77,7 @@ public final class CandidateGeneratorTestUtil {
boolean isBalanced = false;
while (!isBalanced) {
balancerRuns++;
- if (balancerRuns > 1000) {
+ if (balancerRuns > 10) {
throw new RuntimeException("Balancer failed to find balance & meet
expectations");
}
long start = System.currentTimeMillis();
@@ -106,16 +112,24 @@ public final class CandidateGeneratorTestUtil {
}
}
if (isBalanced) { // Check if the balancer thinks we're done too
- LOG.info("All balancer conditions passed. Checking if balancer thinks
it's done.");
- if
(stochasticLoadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME, cluster)) {
- LOG.info("Balancer would still like to run");
- isBalanced = false;
+ if (exhaustionType == ExhaustionType.COST_GOAL_ACHIEVED) {
+ // If we expect to achieve the cost goal, then needsBalance should
be false
+ if
(stochasticLoadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME, cluster)) {
+ LOG.info("Balancer cost goal is not achieved. needsBalance=true");
+ isBalanced = false;
+ }
} else {
- LOG.info("Balancer is done");
+ // If we anticipate running out of moves, then our last balance run
should have produced
+ // nothing
+ if (regionPlans != null && !regionPlans.isEmpty()) {
+ LOG.info("Balancer is not out of moves. regionPlans.size()={}",
regionPlans.size());
+ isBalanced = false;
+ }
}
}
}
- LOG.info("Balancing took {}sec",
Duration.ofMillis(balancingMillis).toMinutes());
+ LOG.info("Balancer is done. Balancing took {}sec",
+ Duration.ofMillis(balancingMillis).toMinutes());
}
/**
diff --git
a/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingTableIsolationAndReplicaDistribution.java
b/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingTableIsolationAndReplicaDistribution.java
index 5fec827b9bf..0ea739faf78 100644
---
a/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingTableIsolationAndReplicaDistribution.java
+++
b/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingTableIsolationAndReplicaDistribution.java
@@ -104,9 +104,10 @@ public class
TestLargeClusterBalancingTableIsolationAndReplicaDistribution {
conf.setBoolean(BalancerConditionals.ISOLATE_SYSTEM_TABLES_KEY, true);
DistributeReplicasTestConditional.enableConditionalReplicaDistributionForTest(conf);
- runBalancerToExhaustion(conf, serverToRegions,
Set.of(this::isMetaTableIsolated,
- this::isSystemTableIsolated,
CandidateGeneratorTestUtil::areAllReplicasDistributed), 10.0f,
- 60_000);
+ runBalancerToExhaustion(conf, serverToRegions,
+ Set.of(this::isMetaTableIsolated, this::isSystemTableIsolated,
+ CandidateGeneratorTestUtil::areAllReplicasDistributed),
+ 10.0f, 60_000,
CandidateGeneratorTestUtil.ExhaustionType.COST_GOAL_ACHIEVED);
LOG.info("Meta table regions are successfully isolated, "
+ "and region replicas are appropriately distributed.");
}
diff --git
a/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingTableIsolationAndReplicaDistribution.java
b/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestUnattainableBalancerCostGoal.java
similarity index 55%
copy from
hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingTableIsolationAndReplicaDistribution.java
copy to
hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestUnattainableBalancerCostGoal.java
index 5fec827b9bf..ffa2b4a7821 100644
---
a/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingTableIsolationAndReplicaDistribution.java
+++
b/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestUnattainableBalancerCostGoal.java
@@ -24,7 +24,6 @@ import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.Random;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseClassTestRule;
@@ -41,21 +40,27 @@ import org.junit.experimental.categories.Category;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-@Category({ MediumTests.class, MasterTests.class })
-public class TestLargeClusterBalancingTableIsolationAndReplicaDistribution {
+/**
+ * If your minCostNeedsBalance is set too low, then the balancer should still
eventually stop making
+ * moves as further cost improvements become impossible, and balancer plan
calculation becomes
+ * wasteful. This test ensures that the balancer will not get stuck in a loop
of continuously moving
+ * regions.
+ */
+@Category({ MasterTests.class, MediumTests.class })
+public class TestUnattainableBalancerCostGoal {
@ClassRule
- public static final HBaseClassTestRule CLASS_RULE = HBaseClassTestRule
-
.forClass(TestLargeClusterBalancingTableIsolationAndReplicaDistribution.class);
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestUnattainableBalancerCostGoal.class);
+
+ private static final Logger LOG =
LoggerFactory.getLogger(TestUnattainableBalancerCostGoal.class);
- private static final Logger LOG =
-
LoggerFactory.getLogger(TestLargeClusterBalancingTableIsolationAndReplicaDistribution.class);
private static final TableName SYSTEM_TABLE_NAME =
TableName.valueOf("hbase:system");
- private static final TableName NON_ISOLATED_TABLE_NAME =
TableName.valueOf("userTable");
+ private static final TableName NON_SYSTEM_TABLE_NAME =
TableName.valueOf("userTable");
- private static final int NUM_SERVERS = 500;
- private static final int NUM_REGIONS = 2_500;
- private static final int NUM_REPLICAS = 3;
+ private static final int NUM_SERVERS = 10;
+ private static final int NUM_REGIONS = 1000;
+ private static final float UNACHIEVABLE_COST_GOAL = 0.01f;
private static final ServerName[] servers = new ServerName[NUM_SERVERS];
private static final Map<ServerName, List<RegionInfo>> serverToRegions = new
HashMap<>();
@@ -65,62 +70,38 @@ public class
TestLargeClusterBalancingTableIsolationAndReplicaDistribution {
// Initialize servers
for (int i = 0; i < NUM_SERVERS; i++) {
servers[i] = ServerName.valueOf("server" + i, i,
System.currentTimeMillis());
- serverToRegions.put(servers[i], new ArrayList<>());
}
- // Create primary regions and their replicas
+ // Create regions
+ List<RegionInfo> allRegions = new ArrayList<>();
for (int i = 0; i < NUM_REGIONS; i++) {
- TableName tableName;
- if (i < 1) {
- tableName = TableName.META_TABLE_NAME;
- } else if (i < 10) {
- tableName = SYSTEM_TABLE_NAME;
- } else {
- tableName = NON_ISOLATED_TABLE_NAME;
- }
-
- // Define startKey and endKey for the region
+ TableName tableName = i < 3 ? SYSTEM_TABLE_NAME : NON_SYSTEM_TABLE_NAME;
byte[] startKey = new byte[1];
startKey[0] = (byte) i;
byte[] endKey = new byte[1];
endKey[0] = (byte) (i + 1);
- Random random = new Random();
- // Create 3 replicas for each primary region
- for (int replicaId = 0; replicaId < NUM_REPLICAS; replicaId++) {
- RegionInfo regionInfo =
RegionInfoBuilder.newBuilder(tableName).setStartKey(startKey)
- .setEndKey(endKey).setReplicaId(replicaId).build();
- // Assign region to random server
- int randomServer = random.nextInt(servers.length);
- serverToRegions.get(servers[randomServer]).add(regionInfo);
- }
+ RegionInfo regionInfo =
+
RegionInfoBuilder.newBuilder(tableName).setStartKey(startKey).setEndKey(endKey).build();
+ allRegions.add(regionInfo);
+ }
+
+ // Assign all regions to the first server
+ serverToRegions.put(servers[0], new ArrayList<>(allRegions));
+ for (int i = 1; i < NUM_SERVERS; i++) {
+ serverToRegions.put(servers[i], new ArrayList<>());
}
}
@Test
- public void testTableIsolationAndReplicaDistribution() {
+ public void testSystemTableIsolation() {
Configuration conf = new Configuration(false);
- conf.setBoolean(BalancerConditionals.ISOLATE_META_TABLE_KEY, true);
conf.setBoolean(BalancerConditionals.ISOLATE_SYSTEM_TABLES_KEY, true);
-
DistributeReplicasTestConditional.enableConditionalReplicaDistributionForTest(conf);
-
- runBalancerToExhaustion(conf, serverToRegions,
Set.of(this::isMetaTableIsolated,
- this::isSystemTableIsolated,
CandidateGeneratorTestUtil::areAllReplicasDistributed), 10.0f,
- 60_000);
- LOG.info("Meta table regions are successfully isolated, "
- + "and region replicas are appropriately distributed.");
- }
-
- /**
- * Validates whether all meta table regions are isolated.
- */
- private boolean isMetaTableIsolated(BalancerClusterState cluster) {
- return isTableIsolated(cluster, TableName.META_TABLE_NAME, "Meta");
+ runBalancerToExhaustion(conf, serverToRegions,
Set.of(this::isSystemTableIsolated),
+ UNACHIEVABLE_COST_GOAL, 10_000,
CandidateGeneratorTestUtil.ExhaustionType.NO_MORE_MOVES);
+ LOG.info("Meta table regions are successfully isolated.");
}
- /**
- * Validates whether all meta table regions are isolated.
- */
private boolean isSystemTableIsolated(BalancerClusterState cluster) {
return isTableIsolated(cluster, SYSTEM_TABLE_NAME, "System");
}