This is an automated email from the ASF dual-hosted git repository.

rmattingly pushed a commit to branch HBASE-29202-branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit da4a53793d706ef652d5a31e9f6633b9c1a67f0b
Author: Ray Mattingly <[email protected]>
AuthorDate: Fri Mar 21 08:05:03 2025 -0400

    HBASE-29202 Balancer conditionals make balancer actions more likely to be 
approved (#6821)
    
    Co-authored-by: Ray Mattingly <[email protected]>
    Signed-off-by: Nick Dimiduk <[email protected]>
---
 .../master/balancer/BalancerConditionals.java      |  2 +-
 .../balancer/CandidateGeneratorTestUtil.java       | 32 +++++---
 ...ancingTableIsolationAndReplicaDistribution.java |  8 +-
 ....java => TestUnattainableBalancerCostGoal.java} | 86 +++++++++-------------
 4 files changed, 61 insertions(+), 67 deletions(-)

diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java
index 021a34bce6b..b82c68b37da 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java
@@ -146,7 +146,7 @@ final class BalancerConditionals implements Configurable {
     // Reset cluster
     cluster.doAction(undoAction);
 
-    if (isViolatingPre && isViolatingPost) {
+    if (isViolatingPre == isViolatingPost) {
       return 0;
     } else if (!isViolatingPre && isViolatingPost) {
       return 1;
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java
index 03bfcce8e15..d2a9d17cdba 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java
@@ -51,16 +51,22 @@ public final class CandidateGeneratorTestUtil {
   private CandidateGeneratorTestUtil() {
   }
 
+  enum ExhaustionType {
+    COST_GOAL_ACHIEVED,
+    NO_MORE_MOVES;
+  }
+
   static void runBalancerToExhaustion(Configuration conf,
     Map<ServerName, List<RegionInfo>> serverToRegions,
     Set<Function<BalancerClusterState, Boolean>> expectations, float 
targetMaxBalancerCost) {
-    runBalancerToExhaustion(conf, serverToRegions, expectations, 
targetMaxBalancerCost, 15000);
+    runBalancerToExhaustion(conf, serverToRegions, expectations, 
targetMaxBalancerCost, 15000,
+      ExhaustionType.COST_GOAL_ACHIEVED);
   }
 
   static void runBalancerToExhaustion(Configuration conf,
     Map<ServerName, List<RegionInfo>> serverToRegions,
     Set<Function<BalancerClusterState, Boolean>> expectations, float 
targetMaxBalancerCost,
-    long maxRunningTime) {
+    long maxRunningTime, ExhaustionType exhaustionType) {
     // Do the full plan. We're testing with a lot of regions
     conf.setBoolean("hbase.master.balancer.stochastic.runMaxSteps", true);
     conf.setLong(MAX_RUNNING_TIME_KEY, maxRunningTime);
@@ -76,7 +82,7 @@ public final class CandidateGeneratorTestUtil {
     boolean isBalanced = false;
     while (!isBalanced) {
       balancerRuns++;
-      if (balancerRuns > 1000) {
+      if (balancerRuns > 10) {
         throw new RuntimeException("Balancer failed to find balance & meet 
expectations");
       }
       long start = System.currentTimeMillis();
@@ -111,16 +117,24 @@ public final class CandidateGeneratorTestUtil {
         }
       }
       if (isBalanced) { // Check if the balancer thinks we're done too
-        LOG.info("All balancer conditions passed. Checking if balancer thinks 
it's done.");
-        if 
(stochasticLoadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME, cluster)) {
-          LOG.info("Balancer would still like to run");
-          isBalanced = false;
+        if (exhaustionType == ExhaustionType.COST_GOAL_ACHIEVED) {
+          // If we expect to achieve the cost goal, then needsBalance should 
be false
+          if 
(stochasticLoadBalancer.needsBalance(HConstants.ENSEMBLE_TABLE_NAME, cluster)) {
+            LOG.info("Balancer cost goal is not achieved. needsBalance=true");
+            isBalanced = false;
+          }
         } else {
-          LOG.info("Balancer is done");
+          // If we anticipate running out of moves, then our last balance run 
should have produced
+          // nothing
+          if (regionPlans != null && !regionPlans.isEmpty()) {
+            LOG.info("Balancer is not out of moves. regionPlans.size()={}", 
regionPlans.size());
+            isBalanced = false;
+          }
         }
       }
     }
-    LOG.info("Balancing took {}sec", 
Duration.ofMillis(balancingMillis).toMinutes());
+    LOG.info("Balancer is done. Balancing took {}sec",
+      Duration.ofMillis(balancingMillis).toMinutes());
   }
 
   /**
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingTableIsolationAndReplicaDistribution.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingTableIsolationAndReplicaDistribution.java
index 3a28ae801e4..bc31530f492 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingTableIsolationAndReplicaDistribution.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingTableIsolationAndReplicaDistribution.java
@@ -104,10 +104,10 @@ public class 
TestLargeClusterBalancingTableIsolationAndReplicaDistribution {
     conf.setBoolean(BalancerConditionals.ISOLATE_META_TABLE_KEY, true);
     conf.setBoolean(BalancerConditionals.ISOLATE_SYSTEM_TABLES_KEY, true);
     
DistributeReplicasTestConditional.enableConditionalReplicaDistributionForTest(conf);
-
-    runBalancerToExhaustion(conf, serverToRegions, 
ImmutableSet.of(this::isMetaTableIsolated,
-      this::isSystemTableIsolated, 
CandidateGeneratorTestUtil::areAllReplicasDistributed), 10.0f,
-      60_000);
+    runBalancerToExhaustion(conf, serverToRegions,
+      ImmutableSet.of(this::isMetaTableIsolated, this::isSystemTableIsolated,
+        CandidateGeneratorTestUtil::areAllReplicasDistributed),
+      10.0f, 60_000, 
CandidateGeneratorTestUtil.ExhaustionType.COST_GOAL_ACHIEVED);
     LOG.info("Meta table regions are successfully isolated, "
       + "and region replicas are appropriately distributed.");
   }
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingTableIsolationAndReplicaDistribution.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestUnattainableBalancerCostGoal.java
similarity index 53%
copy from 
hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingTableIsolationAndReplicaDistribution.java
copy to 
hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestUnattainableBalancerCostGoal.java
index 3a28ae801e4..ffa2b4a7821 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestLargeClusterBalancingTableIsolationAndReplicaDistribution.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestUnattainableBalancerCostGoal.java
@@ -24,7 +24,7 @@ import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Random;
+import java.util.Set;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HBaseClassTestRule;
 import org.apache.hadoop.hbase.ServerName;
@@ -40,23 +40,27 @@ import org.junit.experimental.categories.Category;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import org.apache.hbase.thirdparty.com.google.common.collect.ImmutableSet;
-
-@Category({ MediumTests.class, MasterTests.class })
-public class TestLargeClusterBalancingTableIsolationAndReplicaDistribution {
+/**
+ * If your minCostNeedsBalance is set too low, then the balancer should still 
eventually stop making
+ * moves as further cost improvements become impossible, and balancer plan 
calculation becomes
+ * wasteful. This test ensures that the balancer will not get stuck in a loop 
of continuously moving
+ * regions.
+ */
+@Category({ MasterTests.class, MediumTests.class })
+public class TestUnattainableBalancerCostGoal {
 
   @ClassRule
-  public static final HBaseClassTestRule CLASS_RULE = HBaseClassTestRule
-    
.forClass(TestLargeClusterBalancingTableIsolationAndReplicaDistribution.class);
+  public static final HBaseClassTestRule CLASS_RULE =
+    HBaseClassTestRule.forClass(TestUnattainableBalancerCostGoal.class);
+
+  private static final Logger LOG = 
LoggerFactory.getLogger(TestUnattainableBalancerCostGoal.class);
 
-  private static final Logger LOG =
-    
LoggerFactory.getLogger(TestLargeClusterBalancingTableIsolationAndReplicaDistribution.class);
   private static final TableName SYSTEM_TABLE_NAME = 
TableName.valueOf("hbase:system");
-  private static final TableName NON_ISOLATED_TABLE_NAME = 
TableName.valueOf("userTable");
+  private static final TableName NON_SYSTEM_TABLE_NAME = 
TableName.valueOf("userTable");
 
-  private static final int NUM_SERVERS = 500;
-  private static final int NUM_REGIONS = 2_500;
-  private static final int NUM_REPLICAS = 3;
+  private static final int NUM_SERVERS = 10;
+  private static final int NUM_REGIONS = 1000;
+  private static final float UNACHIEVABLE_COST_GOAL = 0.01f;
 
   private static final ServerName[] servers = new ServerName[NUM_SERVERS];
   private static final Map<ServerName, List<RegionInfo>> serverToRegions = new 
HashMap<>();
@@ -66,62 +70,38 @@ public class 
TestLargeClusterBalancingTableIsolationAndReplicaDistribution {
     // Initialize servers
     for (int i = 0; i < NUM_SERVERS; i++) {
       servers[i] = ServerName.valueOf("server" + i, i, 
System.currentTimeMillis());
-      serverToRegions.put(servers[i], new ArrayList<>());
     }
 
-    // Create primary regions and their replicas
+    // Create regions
+    List<RegionInfo> allRegions = new ArrayList<>();
     for (int i = 0; i < NUM_REGIONS; i++) {
-      TableName tableName;
-      if (i < 1) {
-        tableName = TableName.META_TABLE_NAME;
-      } else if (i < 10) {
-        tableName = SYSTEM_TABLE_NAME;
-      } else {
-        tableName = NON_ISOLATED_TABLE_NAME;
-      }
-
-      // Define startKey and endKey for the region
+      TableName tableName = i < 3 ? SYSTEM_TABLE_NAME : NON_SYSTEM_TABLE_NAME;
       byte[] startKey = new byte[1];
       startKey[0] = (byte) i;
       byte[] endKey = new byte[1];
       endKey[0] = (byte) (i + 1);
 
-      Random random = new Random();
-      // Create 3 replicas for each primary region
-      for (int replicaId = 0; replicaId < NUM_REPLICAS; replicaId++) {
-        RegionInfo regionInfo = 
RegionInfoBuilder.newBuilder(tableName).setStartKey(startKey)
-          .setEndKey(endKey).setReplicaId(replicaId).build();
-        // Assign region to random server
-        int randomServer = random.nextInt(servers.length);
-        serverToRegions.get(servers[randomServer]).add(regionInfo);
-      }
+      RegionInfo regionInfo =
+        
RegionInfoBuilder.newBuilder(tableName).setStartKey(startKey).setEndKey(endKey).build();
+      allRegions.add(regionInfo);
+    }
+
+    // Assign all regions to the first server
+    serverToRegions.put(servers[0], new ArrayList<>(allRegions));
+    for (int i = 1; i < NUM_SERVERS; i++) {
+      serverToRegions.put(servers[i], new ArrayList<>());
     }
   }
 
   @Test
-  public void testTableIsolationAndReplicaDistribution() {
+  public void testSystemTableIsolation() {
     Configuration conf = new Configuration(false);
-    conf.setBoolean(BalancerConditionals.ISOLATE_META_TABLE_KEY, true);
     conf.setBoolean(BalancerConditionals.ISOLATE_SYSTEM_TABLES_KEY, true);
-    
DistributeReplicasTestConditional.enableConditionalReplicaDistributionForTest(conf);
-
-    runBalancerToExhaustion(conf, serverToRegions, 
ImmutableSet.of(this::isMetaTableIsolated,
-      this::isSystemTableIsolated, 
CandidateGeneratorTestUtil::areAllReplicasDistributed), 10.0f,
-      60_000);
-    LOG.info("Meta table regions are successfully isolated, "
-      + "and region replicas are appropriately distributed.");
-  }
-
-  /**
-   * Validates whether all meta table regions are isolated.
-   */
-  private boolean isMetaTableIsolated(BalancerClusterState cluster) {
-    return isTableIsolated(cluster, TableName.META_TABLE_NAME, "Meta");
+    runBalancerToExhaustion(conf, serverToRegions, 
Set.of(this::isSystemTableIsolated),
+      UNACHIEVABLE_COST_GOAL, 10_000, 
CandidateGeneratorTestUtil.ExhaustionType.NO_MORE_MOVES);
+    LOG.info("Meta table regions are successfully isolated.");
   }
 
-  /**
-   * Validates whether all meta table regions are isolated.
-   */
   private boolean isSystemTableIsolated(BalancerClusterState cluster) {
     return isTableIsolated(cluster, SYSTEM_TABLE_NAME, "System");
   }

Reply via email to