This is an automated email from the ASF dual-hosted git repository.

wchevreuil pushed a commit to branch branch-3
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-3 by this push:
     new 1d5cc6b708e HBASE-30135: Improve CacheAwareLoadBalancer to simulate 
low cache ratio regions as cached in candidate servers with enough cache space 
(#8221)
1d5cc6b708e is described below

commit 1d5cc6b708e15fe15fcb03d8f181fd6c230cd89d
Author: Wellington Ramos Chevreuil <[email protected]>
AuthorDate: Mon May 18 10:30:00 2026 +0100

    HBASE-30135: Improve CacheAwareLoadBalancer to simulate low cache ratio 
regions as cached in candidate servers with enough cache space (#8221)
    
    Signed-off-by: Peter Somogyi <[email protected]>
    Change-Id: I0b4160b53a84fefeee89b9ac7d5b22ce5d2d824f
---
 .../master/balancer/BalancerClusterState.java      |  38 +++--
 .../hbase/master/balancer/BalancerRegionLoad.java  |   6 +
 .../master/balancer/CacheAwareLoadBalancer.java    | 155 ++++++++++++++++-----
 .../master/balancer/StochasticLoadBalancer.java    |   9 ++
 .../TestCacheAwareLoadBalancerCostFunctions.java   |  14 +-
 ...lancerWithCacheAwareLoadBalancerAsInternal.java |  79 ++++++++++-
 6 files changed, 240 insertions(+), 61 deletions(-)

diff --git 
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
 
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
index 183ccf4fc25..aa73b52a404 100644
--- 
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
+++ 
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
@@ -547,7 +547,29 @@ class BalancerClusterState {
     if (load == null) {
       return 0;
     }
-    return regionLoads[region].getLast().getStorefileSizeMB();
+    return load.getLast().getStorefileSizeMB();
+  }
+
+  /**
+   * Finds and return the sum of latest reported cache ratio and cold data 
ratio for the region on
+   * the RegionServer it's currently online.
+   */
+  float getSumRegionCacheAndColdDataRatio(int region) {
+    Deque<BalancerRegionLoad> dq = regionLoads[region];
+    if (dq == null || dq.isEmpty()) {
+      return 0.0f;
+    }
+    BalancerRegionLoad load = dq.getLast();
+    return load.getCurrentRegionCacheRatio() + load.getRegionColdDataRatio();
+  }
+
+  int getRegionSizeMinusColdDataMB(int region) {
+    Deque<BalancerRegionLoad> dq = regionLoads[region];
+    if (dq == null || dq.isEmpty()) {
+      return 0;
+    }
+    BalancerRegionLoad load = dq.getLast();
+    return load.getRegionSizeMB() - (int) (load.getRegionSizeMB() * 
load.getRegionColdDataRatio());
   }
 
   /**
@@ -592,23 +614,11 @@ class BalancerClusterState {
 
   }
 
-  /**
-   * Returns the size of hFiles from the most recent RegionLoad for region
-   */
-  public int getTotalRegionHFileSizeMB(int region) {
-    Deque<BalancerRegionLoad> load = regionLoads[region];
-    if (load == null) {
-      // This means, that the region has no actual data on disk
-      return 0;
-    }
-    return regionLoads[region].getLast().getRegionSizeMB();
-  }
-
   /**
    * Returns the weighted cache ratio of a region on the given region server
    */
   public float getOrComputeWeightedRegionCacheRatio(int region, int server) {
-    return getTotalRegionHFileSizeMB(region) * 
getOrComputeRegionCacheRatio(region, server);
+    return getRegionSizeMinusColdDataMB(region) * 
getOrComputeRegionCacheRatio(region, server);
   }
 
   /**
diff --git 
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerRegionLoad.java
 
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerRegionLoad.java
index 33d00e3de86..5c9e73a1053 100644
--- 
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerRegionLoad.java
+++ 
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerRegionLoad.java
@@ -36,6 +36,7 @@ class BalancerRegionLoad {
   private final int storefileSizeMB;
   private final int regionSizeMB;
   private final float currentRegionPrefetchRatio;
+  private final float regionColdDataRatio;
 
   BalancerRegionLoad(RegionMetrics regionMetrics) {
     readRequestsCount = regionMetrics.getReadRequestCount();
@@ -45,6 +46,7 @@ class BalancerRegionLoad {
     storefileSizeMB = (int) 
regionMetrics.getStoreFileSize().get(Size.Unit.MEGABYTE);
     regionSizeMB = (int) 
regionMetrics.getRegionSizeMB().get(Size.Unit.MEGABYTE);
     currentRegionPrefetchRatio = regionMetrics.getCurrentRegionCachedRatio();
+    regionColdDataRatio = regionMetrics.getCurrentRegionColdDataRatio();
   }
 
   public long getReadRequestsCount() {
@@ -74,4 +76,8 @@ class BalancerRegionLoad {
   public float getCurrentRegionCacheRatio() {
     return currentRegionPrefetchRatio;
   }
+
+  public float getRegionColdDataRatio() {
+    return regionColdDataRatio;
+  }
 }
diff --git 
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java
 
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java
index d1b17f78854..88313b1575b 100644
--- 
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java
+++ 
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java
@@ -29,6 +29,7 @@ package org.apache.hadoop.hbase.master.balancer;
 import static 
org.apache.hadoop.hbase.HConstants.BUCKET_CACHE_PERSISTENT_PATH_KEY;
 
 import java.text.DecimalFormat;
+import java.math.BigDecimal;
 import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -37,6 +38,7 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
+import java.util.concurrent.ThreadLocalRandom;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.ClusterMetrics;
 import org.apache.hadoop.hbase.RegionMetrics;
@@ -89,10 +91,11 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
   private Long sleepTime;
   private Configuration configuration;
 
-  public enum GeneratorFunctionType {
-    LOAD,
-    CACHE_RATIO
-  }
+  private float lowCacheRatioThreshold;
+  private float potentialCacheRatioAfterMove;
+  private float minFreeCacheSpaceFactor;
+
+  private BigDecimal simulatedRatio = BigDecimal.ZERO;
 
   @Override
   public void loadConf(Configuration configuration) {
@@ -102,6 +105,12 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
     ratioThreshold =
       this.configuration.getFloat(CACHE_RATIO_THRESHOLD, 
CACHE_RATIO_THRESHOLD_DEFAULT);
     sleepTime = configuration.getLong(MOVE_THROTTLING, 
MOVE_THROTTLING_DEFAULT.toMillis());
+    lowCacheRatioThreshold = 
configuration.getFloat(LOW_CACHE_RATIO_FOR_RELOCATION_KEY,
+      LOW_CACHE_RATIO_FOR_RELOCATION_DEFAULT);
+    potentialCacheRatioAfterMove = 
configuration.getFloat(POTENTIAL_CACHE_RATIO_AFTER_MOVE_KEY,
+      POTENTIAL_CACHE_RATIO_AFTER_MOVE_DEFAULT);
+    minFreeCacheSpaceFactor =
+      configuration.getFloat(MIN_FREE_CACHE_SPACE_FACTOR_KEY, 
MIN_FREE_CACHE_SPACE_FACTOR_DEFAULT);
   }
 
   @Override
@@ -193,15 +202,13 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
             int regionSizeMB =
               
regionCacheRatioOnCurrentServerMap.get(regionEncodedName).getSecond();
             // The coldDataSize accounts for data size classified as "cold" by 
DataTieringManager,
-            // which should be kept out of cache. We sum cold region size in 
the cache ratio, as we
+            // which should be kept out of cache. We calculate cache ratio on 
old server based
+            // only on the hot data size for the region (regionSizeMB - 
coldDataSize), as we
             // don't want to move regions with low cache ratio due to data 
classified as cold.
-            float regionCacheRatioOnOldServer =
-              regionSizeMB
-                  == 0
-                    ? 0.0f
-                    : (float) (regionSizeInCache
-                      + 
sm.getRegionColdDataSize().getOrDefault(regionEncodedName, 0))
-                      / regionSizeMB;
+            int coldDataSize = 
sm.getRegionColdDataSize().getOrDefault(regionEncodedName, 0);
+            float regionCacheRatioOnOldServer = (regionSizeMB - coldDataSize) 
<= 0
+              ? 0.0f
+              : (float) regionSizeInCache / (regionSizeMB - coldDataSize);
             regionCacheRatioOnOldServerMap.put(regionEncodedName,
               new Pair<>(sn, regionCacheRatioOnOldServer));
           }
@@ -272,6 +279,7 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
   private class CacheAwareCandidateGenerator extends CandidateGenerator {
     @Override
     protected BalanceAction generate(BalancerClusterState cluster) {
+      simulatedRatio = BigDecimal.ZERO;
       // Move the regions to the servers they were previously hosted on based 
on the cache ratio
       if (
         !regionCacheRatioOnOldServerMap.isEmpty()
@@ -311,6 +319,50 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
         regionCacheRatioOnOldServerMap.remove(regionEncodedName);
         return action;
       }
+      return generatePlanForFreeCacheSpace(cluster);
+    }
+
+    private BalanceAction generatePlanForFreeCacheSpace(BalancerClusterState 
cluster) {
+      if (cluster.serverBlockCacheFreeSize == null) {
+        return BalanceAction.NULL_ACTION;
+      }
+      List<BalanceAction> possibleActions = new ArrayList<>();
+      Map<Integer, Long> serverFreeCacheAfterAction = new HashMap<>();
+      for (int region = 0; region < cluster.numRegions; region++) {
+        RegionInfo regionInfo = cluster.regions[region];
+        if (regionInfo.isMetaRegion() || 
regionInfo.getTable().isSystemTable()) {
+          continue;
+        }
+        int currentServer = cluster.regionIndexToServerIndex[region];
+        float ratio = cluster.getSumRegionCacheAndColdDataRatio(region);
+        if (ratio >= lowCacheRatioThreshold) {
+          continue;
+        }
+        int regionSizeMb = cluster.getRegionSizeMinusColdDataMB(region);
+        if (regionSizeMb <= 0) {
+          continue;
+        }
+        long bytesNeeded = (long) (regionSizeMb * 1024L * 1024L * 
minFreeCacheSpaceFactor);
+        for (int server = 0; server < cluster.numServers; server++) {
+          // Skips current server for region, as we can't generate a move to 
same server
+          if (server == currentServer) {
+            continue;
+          }
+          serverFreeCacheAfterAction.putIfAbsent(server, 
cluster.serverBlockCacheFreeSize[server]);
+          if (serverFreeCacheAfterAction.get(server) >= bytesNeeded) {
+            serverFreeCacheAfterAction.compute(server, (s, freeCache) -> 
freeCache - bytesNeeded);
+            possibleActions.add(getAction(currentServer, region, server, -1));
+          }
+        }
+      }
+      if (!possibleActions.isEmpty()) {
+        BalanceAction action =
+          
possibleActions.get(ThreadLocalRandom.current().nextInt(possibleActions.size()));
+        LOG.debug("region {} had sum ratio {}",
+          cluster.regions[((MoveRegionAction) 
action).getRegion()].getEncodedName(),
+          cluster.getSumRegionCacheAndColdDataRatio(((MoveRegionAction) 
action).getRegion()));
+        return action;
+      }
       return BalanceAction.NULL_ACTION;
     }
 
@@ -320,7 +372,7 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
       return moveRegionToOldServer(cluster, regionIndex, currentServerIndex,
         cacheRatioOnCurrentServer, oldServerIndex, cacheRatioOnOldServer)
           ? getAction(currentServerIndex, regionIndex, oldServerIndex, -1)
-          : BalanceAction.NULL_ACTION;
+          : generatePlanForFreeCacheSpace(cluster);
     }
 
     private boolean moveRegionToOldServer(BalancerClusterState cluster, int 
regionIndex,
@@ -391,6 +443,7 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
   private class CacheAwareSkewnessCandidateGenerator extends 
LoadCandidateGenerator {
     @Override
     BalanceAction pickRandomRegions(BalancerClusterState cluster, int 
thisServer, int otherServer) {
+      simulatedRatio = BigDecimal.ZERO;
       // First move all the regions which were hosted previously on some other 
server back to their
       // old servers
       if (
@@ -525,7 +578,7 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
     }
   }
 
-  static class CacheAwareCostFunction extends CostFunction {
+  class CacheAwareCostFunction extends CostFunction {
     private static final String CACHE_COST_KEY = 
"hbase.master.balancer.stochastic.cacheCost";
     private double cacheRatio;
     private double bestCacheRatio;
@@ -567,14 +620,13 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
         currentSum += currentWeighted[region];
         // here we only get the server index where this region cache ratio is 
the highest
         int serverIndexBestCache = 
cluster.getOrComputeServerWithBestRegionCachedRatio()[region];
+        // get the highest cacheRatio for this region on the current state of 
allocations
         double currentHighestCache =
           cluster.getOrComputeWeightedRegionCacheRatio(region, 
serverIndexBestCache);
         // Get a hypothetical best cache ratio for this region if any server 
has enough free cache
         // to host it.
-        double potentialHighestCache =
-          potentialBestWeightedFromFreeCache(cluster, region, 
currentHighestCache);
-        double actualHighest = Math.max(currentHighestCache, 
potentialHighestCache);
-        bestCacheSum += actualHighest;
+        double potentialHighestCache = 
potentialBestWeightedFromFreeCache(cluster, region);
+        bestCacheSum += Math.max(currentHighestCache, potentialHighestCache);
       }
       bestCacheRatio = bestCacheSum;
       if (bestCacheSum <= 0.0) {
@@ -589,11 +641,24 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
       double[] contrib = new double[totalRegions];
       for (int r = 0; r < totalRegions; r++) {
         int s = cluster.regionIndexToServerIndex[r];
-        int sizeMb = cluster.getTotalRegionHFileSizeMB(r);
+        int sizeMb = cluster.getRegionSizeMinusColdDataMB(r);
         if (sizeMb <= 0) {
           contrib[r] = 0.0;
           continue;
         }
+        boolean movedInSimulation = cluster.initialRegionIndexToServerIndex[r] 
!= s;
+        if (
+          cluster.serverBlockCacheFreeSize != null && movedInSimulation
+            && cluster.getSumRegionCacheAndColdDataRatio(r) < 
lowCacheRatioThreshold
+        ) {
+          LOG.debug("Region {} is simulated moved to new server {}",
+            cluster.regions[r].getEncodedName(), 
cluster.servers[s].getHostname());
+          long bytesNeeded = (long) (sizeMb * 1024L * 1024L * 
minFreeCacheSpaceFactor);
+          if (cluster.serverBlockCacheFreeSize[s] >= bytesNeeded) {
+            contrib[r] = sizeMb * potentialCacheRatioAfterMove;
+            continue;
+          }
+        }
         contrib[r] = cluster.getOrComputeWeightedRegionCacheRatio(r, s);
       }
       return contrib;
@@ -605,13 +670,12 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
      * #potentialCacheRatioAfterMove} * region MB) so placement is not 
considered optimal solely
      * from low ratios when capacity exists somewhere in the cluster.
      */
-    private double potentialBestWeightedFromFreeCache(BalancerClusterState 
cluster, int region,
-      double currentHighestCache) {
-      float observedRatio = cluster.getObservedRegionCacheRatio(region);
+    private double potentialBestWeightedFromFreeCache(BalancerClusterState 
cluster, int region) {
+      float observedRatio = cluster.getSumRegionCacheAndColdDataRatio(region);
       if (observedRatio >= lowCacheRatioThreshold) {
         return 0.0;
       }
-      int regionSizeMb = cluster.getTotalRegionHFileSizeMB(region);
+      int regionSizeMb = cluster.getRegionSizeMinusColdDataMB(region);
       if (regionSizeMb <= 0) {
         return 0.0;
       }
@@ -619,7 +683,7 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
       long requiredFree = (long) (regionSizeBytes * minFreeCacheSpaceFactor);
       for (int s = 0; s < cluster.numServers; s++) {
         if (cluster.serverBlockCacheFreeSize[s] >= requiredFree) {
-          return Math.max(currentHighestCache, regionSizeMb * 
potentialCacheRatioAfterMove);
+          return regionSizeMb * potentialCacheRatioAfterMove;
         }
       }
       return 0.0;
@@ -634,18 +698,39 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
     protected void regionMoved(int region, int oldServer, int newServer) {
       double regionCacheRatioOnOldServer =
         cluster.getOrComputeWeightedRegionCacheRatio(region, oldServer);
-      double regionCacheRatioOnNewServer =
-        cluster.getOrComputeWeightedRegionCacheRatio(region, newServer);
-      double cacheRatioDiff = regionCacheRatioOnNewServer - 
regionCacheRatioOnOldServer;
-      double normalizedDelta = bestCacheRatio == 0.0 ? 0.0 : cacheRatioDiff / 
bestCacheRatio;
-      cacheRatio += normalizedDelta;
-      if (LOG.isDebugEnabled() && (cacheRatio < 0.0 || cacheRatio > 1.0)) {
+      if (simulatedRatio.equals(BigDecimal.ZERO)) {
+        double potentialCachedSizeOnNewServer =
+          cluster.getRegionSizeMinusColdDataMB(region) * 
potentialCacheRatioAfterMove;
+        boolean simulateCacheBasedOnFreeSpace =
+          cluster.getOrComputeRegionCacheRatio(region, oldServer) < 
lowCacheRatioThreshold
+            && cluster.serverBlockCacheFreeSize[newServer] >= 
potentialCachedSizeOnNewServer;
+        double regionCacheRatioOnNewServer = simulateCacheBasedOnFreeSpace
+          ? potentialCachedSizeOnNewServer
+          : cluster.getOrComputeWeightedRegionCacheRatio(region, newServer);
+        double cacheRatioDiff = regionCacheRatioOnNewServer - 
regionCacheRatioOnOldServer;
+        double normalizedDelta = bestCacheRatio == 0.0 ? 0.0 : cacheRatioDiff 
/ bestCacheRatio;
         LOG.debug(
-          
"CacheAwareCostFunction:regionMoved:region:{}:from:{}:to:{}:regionCacheRatioOnOldServer:{}:"
-            + 
"regionCacheRatioOnNewServer:{}:bestRegionCacheRatio:{}:cacheRatio:{}",
-          cluster.regions[region].getEncodedName(), 
cluster.servers[oldServer].getHostname(),
-          cluster.servers[newServer].getHostname(), 
regionCacheRatioOnOldServer,
-          regionCacheRatioOnNewServer, bestCacheRatio, cacheRatio);
+          "simulating moving region {} using simulateCacheBasedOnFreeSpace={} "
+            + "got a normalized delta of {} to be added to cacheRatio: {}",
+          cluster.regions[region].getEncodedName(), 
simulateCacheBasedOnFreeSpace, normalizedDelta,
+          cacheRatio);
+        simulatedRatio = BigDecimal.valueOf(normalizedDelta);
+        cacheRatio += normalizedDelta;
+        if (cacheRatio < 0.0 || cacheRatio > 1.0) {
+          LOG.info(
+            "Recomputing cacheRatio after calculating impact of region move: 
\n "
+              + "CacheAwareCostFunction:regionMoved:region:{}:from:{}:to:{}:"
+              + 
"regionCacheRatioOnOldServer:{}:regionCacheRatioOnNewServer:{}:"
+              + "bestRegionCacheRatio:{}:cacheRatio:{}",
+            cluster.regions[region].getEncodedName(), 
cluster.servers[oldServer].getHostname(),
+            cluster.servers[newServer].getHostname(), 
regionCacheRatioOnOldServer,
+            regionCacheRatioOnNewServer, bestCacheRatio, cacheRatio);
+          recomputeCacheRatio(cluster);
+        }
+      } else {
+        // This means we are in an undoAction call and need to reverse the 
cache delta applied in
+        // the region move simulation
+        cacheRatio -= simulatedRatio.doubleValue();
       }
     }
 
diff --git 
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
 
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
index 62b1c2a3454..79a0129518e 100644
--- 
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
+++ 
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
@@ -688,6 +688,15 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
 
       newCost = computeCost(cluster, currentCost);
 
+      if (LOG.isDebugEnabled() && action.getType() == 
BalanceAction.Type.MOVE_REGION) {
+        LOG.debug(
+          "action moving region {} from {} to {} with cost {}. currentCost={}, 
functionCost={}",
+          cluster.regions[((MoveRegionAction) 
action).getRegion()].getEncodedName(),
+          cluster.servers[((MoveRegionAction) 
action).getFromServer()].getServerName(),
+          cluster.servers[((MoveRegionAction) 
action).getToServer()].getServerName(), newCost,
+          currentCost, functionCost());
+      }
+
       double costImprovement = currentCost - newCost;
       double minimumImprovement =
         Math.max(CostFunction.getCostEpsilon(currentCost), 
CostFunction.getCostEpsilon(newCost));
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java
index 42a0ad213cf..8586e1fc626 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java
@@ -251,8 +251,9 @@ public class TestCacheAwareLoadBalancerCostFunctions 
extends StochasticBalancerT
   @Test
   public void testCacheCost() {
     conf.set(HConstants.BUCKET_CACHE_PERSISTENT_PATH_KEY, 
"/tmp/prefetch.persistence");
+    CacheAwareLoadBalancer lb = newCacheAwareBalancer(conf);
     CacheAwareLoadBalancer.CacheAwareCostFunction costFunction =
-      new CacheAwareLoadBalancer.CacheAwareCostFunction(conf);
+      lb.new CacheAwareCostFunction(conf);
 
     for (int test = 0; test < clusterRegionCacheRatioMocks.length; test++) {
       int[][] clusterRegionLocations = clusterRegionCacheRatioMocks[test];
@@ -379,11 +380,6 @@ public class TestCacheAwareLoadBalancerCostFunctions 
extends StochasticBalancerT
       regionCacheRatioOnOldServerMap = oldCacheRatio;
     }
 
-    @Override
-    public int getTotalRegionHFileSizeMB(int region) {
-      return 1;
-    }
-
     @Override
     protected float getRegionCacheRatioOnRegionServer(int region, int 
regionServerIndex) {
       float cacheRatio = 0.0f;
@@ -412,5 +408,11 @@ public class TestCacheAwareLoadBalancerCostFunctions 
extends StochasticBalancerT
       }
       return cacheRatio;
     }
+
+    @Override
+    int getRegionSizeMinusColdDataMB(int region) {
+      return 1;
+    }
+
   }
 }
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestRSGroupBasedLoadBalancerWithCacheAwareLoadBalancerAsInternal.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestRSGroupBasedLoadBalancerWithCacheAwareLoadBalancerAsInternal.java
index 71154ebcb33..4ed6c84a026 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestRSGroupBasedLoadBalancerWithCacheAwareLoadBalancerAsInternal.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestRSGroupBasedLoadBalancerWithCacheAwareLoadBalancerAsInternal.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.master.balancer;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
@@ -129,10 +130,76 @@ public class 
TestRSGroupBasedLoadBalancerWithCacheAwareLoadBalancerAsInternal
     assertEquals(5, targetServers.get(server1).size());
   }
 
+  /**
+   * Regions on the overloaded RS report low block-cache ratio; no RS reports 
prefetch/historical
+   * cache for those regions (so {@link 
CacheAwareLoadBalancer.CacheAwareCandidateGenerator} has no
+   * "old server" to prefer). Another RS has ample free block cache. The 
balancer should still emit
+   * plans that shed load from the hot RS onto the idle RS with spare cache 
capacity.
+   */
   @Test
-  public void 
testRegionsPartiallyCachedOnOldServerAndNotCachedOnCurrentServer() throws 
Exception {
-    // The regions are partially cached on old server but not cached on the 
current server
+  public void 
testLowCacheRatioNoHistoricalCacheRelocatesWhenTargetHasFreeBlockCache()
+    throws Exception {
+    Map<ServerName, List<RegionInfo>> clusterState = new HashMap<>();
+    ServerName server0 = servers.get(0);
+    ServerName server1 = servers.get(1);
+    ServerName server2 = servers.get(2);
 
+    List<RegionInfo> regionsOnServer0 = randomRegions(10);
+    List<RegionInfo> regionsOnServer1 = randomRegions(0);
+    List<RegionInfo> regionsOnServer2 = randomRegions(5);
+
+    clusterState.put(server0, regionsOnServer0);
+    clusterState.put(server1, regionsOnServer1);
+    clusterState.put(server2, regionsOnServer2);
+
+    // Below LOW_CACHE_RATIO_FOR_RELOCATION_DEFAULT (0.35);
+    ServerMetrics sm0 = mockServerMetricsWithRegionCacheInfo(server0, 
regionsOnServer0, 0.1f,
+      new ArrayList<>(), 0, 10);
+    when(sm0.getCacheFreeSize()).thenReturn(0L);
+    ServerMetrics sm1 = mockServerMetricsWithRegionCacheInfo(server1, 
regionsOnServer1, 0.0f,
+      new ArrayList<>(), 0, 10);
+    // Simulates 1GB free cache space on server1
+    when(sm1.getCacheFreeSize()).thenReturn(1024L * 1024 * 1024);
+    ServerMetrics sm2 = mockServerMetricsWithRegionCacheInfo(server2, 
regionsOnServer2, 1.0f,
+      new ArrayList<>(), 0, 10);
+    when(sm2.getCacheFreeSize()).thenReturn(0L);
+
+    Map<ServerName, ServerMetrics> serverMetricsMap = new TreeMap<>();
+    serverMetricsMap.put(server0, sm0);
+    serverMetricsMap.put(server1, sm1);
+    serverMetricsMap.put(server2, sm2);
+    ClusterMetrics clusterMetrics = mock(ClusterMetrics.class);
+    when(clusterMetrics.getLiveServerMetrics()).thenReturn(serverMetricsMap);
+    loadBalancer.updateClusterMetrics(clusterMetrics);
+
+    CacheAwareLoadBalancer internalBalancer =
+      (CacheAwareLoadBalancer) loadBalancer.getInternalBalancer();
+    assertNotNull(internalBalancer);
+    assertTrue(internalBalancer.regionCacheRatioOnOldServerMap.isEmpty());
+
+    Map<TableName, Map<ServerName, List<RegionInfo>>> loadOfAllTable =
+      (Map) mockClusterServersWithTables(clusterState);
+    List<RegionPlan> plans = loadBalancer.balanceCluster(loadOfAllTable);
+    assertNotNull(plans);
+
+    Set<RegionInfo> regionsMovedFromServer0 = new HashSet<>();
+    Map<ServerName, List<RegionInfo>> targetServers = new HashMap<>();
+    for (RegionPlan plan : plans) {
+      if (plan.getSource().equals(server0)) {
+        regionsMovedFromServer0.add(plan.getRegionInfo());
+        if (!targetServers.containsKey(plan.getDestination())) {
+          targetServers.put(plan.getDestination(), new ArrayList<>());
+        }
+        targetServers.get(plan.getDestination()).add(plan.getRegionInfo());
+      }
+    }
+    assertEquals(5, regionsMovedFromServer0.size());
+    assertNotNull(targetServers.get(server1));
+    assertEquals(5, targetServers.get(server1).size());
+  }
+
+  @Test
+  public void 
testRegionsPartiallyCachedOnOldServerAndNotCachedOnCurrentServer() throws 
Exception {
     Map<ServerName, List<RegionInfo>> clusterState = new HashMap<>();
     ServerName server0 = servers.get(0);
     ServerName server1 = servers.get(1);
@@ -150,7 +217,7 @@ public class 
TestRSGroupBasedLoadBalancerWithCacheAwareLoadBalancerAsInternal
     // Mock cluster metrics
 
     // Mock 5 regions from server0 were previously hosted on server1
-    List<RegionInfo> oldCachedRegions = regionsOnServer0.subList(5, 
regionsOnServer0.size() - 1);
+    List<RegionInfo> oldCachedRegions = regionsOnServer0.subList(5, 
regionsOnServer0.size());
 
     Map<ServerName, ServerMetrics> serverMetricsMap = new TreeMap<>();
     serverMetricsMap.put(server0, 
mockServerMetricsWithRegionCacheInfo(server0, regionsOnServer0,
@@ -387,7 +454,7 @@ public class 
TestRSGroupBasedLoadBalancerWithCacheAwareLoadBalancerAsInternal
     // Mock cluster metrics
 
     // Mock 5 regions from server0 were previously hosted on server1
-    List<RegionInfo> oldCachedRegions = regionsOnServer0.subList(5, 
regionsOnServer0.size() - 1);
+    List<RegionInfo> oldCachedRegions = regionsOnServer0.subList(5, 
regionsOnServer0.size());
 
     Map<ServerName, ServerMetrics> serverMetricsMap = new TreeMap<>();
     serverMetricsMap.put(server0, 
mockServerMetricsWithRegionCacheInfo(server0, regionsOnServer0,
@@ -441,7 +508,7 @@ public class 
TestRSGroupBasedLoadBalancerWithCacheAwareLoadBalancerAsInternal
     // Mock cluster metrics
 
     // Mock 5 regions from server0 were previously hosted on server1
-    List<RegionInfo> oldCachedRegions = regionsOnServer0.subList(5, 
regionsOnServer0.size() - 1);
+    List<RegionInfo> oldCachedRegions = regionsOnServer0.subList(5, 
regionsOnServer0.size());
 
     Map<ServerName, ServerMetrics> serverMetricsMap = new TreeMap<>();
     serverMetricsMap.put(server0, 
mockServerMetricsWithRegionCacheInfo(server0, regionsOnServer0,
@@ -495,7 +562,7 @@ public class 
TestRSGroupBasedLoadBalancerWithCacheAwareLoadBalancerAsInternal
     // Mock cluster metrics
 
     // Mock 5 regions from server0 were previously hosted on server1
-    List<RegionInfo> oldCachedRegions = regionsOnServer0.subList(5, 
regionsOnServer0.size() - 1);
+    List<RegionInfo> oldCachedRegions = regionsOnServer0.subList(5, 
regionsOnServer0.size());
 
     Map<ServerName, ServerMetrics> serverMetricsMap = new TreeMap<>();
     serverMetricsMap.put(server0, 
mockServerMetricsWithRegionCacheInfo(server0, regionsOnServer0,

Reply via email to