This is an automated email from the ASF dual-hosted git repository.

wchevreuil pushed a commit to branch branch-3
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-3 by this push:
     new c3c9dd169d7 HBASE-30134: Improve CacheAwareLoadBalancer to consider 
low cache ratio when calculating imbalance (#8197)
c3c9dd169d7 is described below

commit c3c9dd169d78153438d173bef0955b0791ec1dfd
Author: Wellington Ramos Chevreuil <[email protected]>
AuthorDate: Mon May 11 11:02:30 2026 +0100

    HBASE-30134: Improve CacheAwareLoadBalancer to consider low cache ratio 
when calculating imbalance (#8197)
    
    Signed-off-by: Tak Lon (Stephen) Wu <[email protected]>
---
 .../master/balancer/BalancerClusterState.java      |  34 +++++-
 .../master/balancer/BalancerConditionals.java      |   2 +-
 .../hbase/master/balancer/BaseLoadBalancer.java    |   2 +-
 .../master/balancer/CacheAwareLoadBalancer.java    | 135 +++++++++++++++++++--
 .../master/balancer/StochasticLoadBalancer.java    |  12 +-
 .../balancer/CandidateGeneratorTestUtil.java       |   2 +-
 .../TestStoreFileTableSkewCostFunction.java        |   2 +-
 .../org/apache/hadoop/hbase/ServerMetrics.java     |  18 +++
 .../apache/hadoop/hbase/ServerMetricsBuilder.java  |  40 +++++-
 .../src/main/protobuf/server/ClusterStatus.proto   |  11 ++
 .../hadoop/hbase/regionserver/HRegionServer.java   |   6 +
 .../hbase/master/TestRegionsRecoveryChore.java     |   5 +
 .../TestCacheAwareLoadBalancerCostFunctions.java   | 107 +++++++++++++++-
 13 files changed, 349 insertions(+), 27 deletions(-)

diff --git 
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
 
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
index 86c45dae09b..183ccf4fc25 100644
--- 
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
+++ 
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
@@ -128,6 +128,8 @@ class BalancerClusterState {
   private int[] regionServerIndexWithBestRegionCachedRatio;
   // Maps regionName -> oldServerName -> cache ratio of the region on the old 
server
   Map<String, Pair<ServerName, Float>> regionCacheRatioOnOldServerMap;
+  // cache free space available on each server, aligned to the "servers" array 
indices;
+  long[] serverBlockCacheFreeSize;
 
   private final Supplier<List<Integer>> shuffledServerIndicesSupplier =
     Suppliers.memoizeWithExpiration(() -> {
@@ -148,20 +150,23 @@ class BalancerClusterState {
   BalancerClusterState(Map<ServerName, List<RegionInfo>> clusterState,
     Map<String, Deque<BalancerRegionLoad>> loads, 
RegionHDFSBlockLocationFinder regionFinder,
     RackManager rackManager) {
-    this(null, clusterState, loads, regionFinder, rackManager, null);
+    this(null, clusterState, loads, regionFinder, rackManager, null, null);
   }
 
   protected BalancerClusterState(Map<ServerName, List<RegionInfo>> 
clusterState,
     Map<String, Deque<BalancerRegionLoad>> loads, 
RegionHDFSBlockLocationFinder regionFinder,
-    RackManager rackManager, Map<String, Pair<ServerName, Float>> 
oldRegionServerRegionCacheRatio) {
-    this(null, clusterState, loads, regionFinder, rackManager, 
oldRegionServerRegionCacheRatio);
+    RackManager rackManager, Map<String, Pair<ServerName, Float>> 
oldRegionServerRegionCacheRatio,
+    Map<ServerName, Long> serverBlockCacheFreeByServer) {
+    this(null, clusterState, loads, regionFinder, rackManager, 
oldRegionServerRegionCacheRatio,
+      serverBlockCacheFreeByServer);
   }
 
   @SuppressWarnings("unchecked")
   BalancerClusterState(Collection<RegionInfo> unassignedRegions,
     Map<ServerName, List<RegionInfo>> clusterState, Map<String, 
Deque<BalancerRegionLoad>> loads,
     RegionHDFSBlockLocationFinder regionFinder, RackManager rackManager,
-    Map<String, Pair<ServerName, Float>> oldRegionServerRegionCacheRatio) {
+    Map<String, Pair<ServerName, Float>> oldRegionServerRegionCacheRatio,
+    Map<ServerName, Long> serverBlockCacheFreeByServer) {
     if (unassignedRegions == null) {
       unassignedRegions = Collections.emptyList();
     }
@@ -394,6 +399,15 @@ class BalancerClusterState {
       populateRegionPerLocationFromServer(regionsPerRack, 
colocatedReplicaCountsPerRack,
         serversPerRack);
     }
+
+    this.serverBlockCacheFreeSize = new long[numServers];
+    if (serverBlockCacheFreeByServer != null) {
+      for (int i = 0; i < numServers; i++) {
+        ServerName sn = servers[i];
+        this.serverBlockCacheFreeSize[i] =
+          sn == null ? 0L : serverBlockCacheFreeByServer.getOrDefault(sn, 0L);
+      }
+    }
   }
 
   private void populateRegionPerLocationFromServer(int[][] regionsPerLocation,
@@ -714,6 +728,18 @@ class BalancerClusterState {
     return regionServerIndexWithBestRegionCachedRatio;
   }
 
+  /**
+   * Finds and return the latest reported cache ratio for the region on the 
RegionServer it's
+   * currently online.
+   */
+  float getObservedRegionCacheRatio(int region) {
+    Deque<BalancerRegionLoad> dq = regionLoads[region];
+    if (dq == null || dq.isEmpty()) {
+      return 0.0f;
+    }
+    return dq.getLast().getCurrentRegionCacheRatio();
+  }
+
   /**
    * Maps region index to rack index
    */
diff --git 
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java
 
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java
index 7e5bd98a013..28af67a4954 100644
--- 
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java
+++ 
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java
@@ -185,7 +185,7 @@ final class BalancerConditionals implements Configurable {
   private RegionPlanConditional createConditional(Class<? extends 
RegionPlanConditional> clazz,
     BalancerClusterState cluster) {
     if (cluster == null) {
-      cluster = new BalancerClusterState(Collections.emptyMap(), null, null, 
null, null);
+      cluster = new BalancerClusterState(Collections.emptyMap(), null, null, 
null, null, null);
     }
     try {
       Constructor<? extends RegionPlanConditional> ctor =
diff --git 
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
 
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
index c3f17d88cde..be99c967025 100644
--- 
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
+++ 
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
@@ -238,7 +238,7 @@ public abstract class BaseLoadBalancer implements 
LoadBalancer {
       }
     }
     return new BalancerClusterState(regions, clusterState, null, 
this.regionFinder, rackManager,
-      null);
+      null, null);
   }
 
   private List<ServerName> findIdleServers(List<ServerName> servers) {
diff --git 
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java
 
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java
index 4f45687097e..d1b17f78854 100644
--- 
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java
+++ 
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java
@@ -45,6 +45,7 @@ import org.apache.hadoop.hbase.ServerName;
 import org.apache.hadoop.hbase.Size;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.master.RackManager;
 import org.apache.hadoop.hbase.master.RegionPlan;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.yetus.audience.InterfaceAudience;
@@ -59,6 +60,30 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
     "hbase.master.balancer.stochastic.throttling.cacheRatio";
   public static final float CACHE_RATIO_THRESHOLD_DEFAULT = 0.8f;
 
+  /**
+   * Below this cache ratio on the current host, a move may be considered for 
the free-space
+   * heuristic.
+   */
+  public static final String LOW_CACHE_RATIO_FOR_RELOCATION_KEY =
+    "hbase.master.balancer.cacheaware.lowCacheRatioThreshold";
+  public static final float LOW_CACHE_RATIO_FOR_RELOCATION_DEFAULT = 0.35f;
+
+  /**
+   * Optimistic region cache ratio assumed for cost purposes when a better 
host has free cache space
+   * (actual warmup is not modeled).
+   */
+  public static final String POTENTIAL_CACHE_RATIO_AFTER_MOVE_KEY =
+    "hbase.master.balancer.cacheaware.potentialCacheRatioAfterMove";
+  public static final float POTENTIAL_CACHE_RATIO_AFTER_MOVE_DEFAULT = 0.95f;
+
+  /**
+   * Minimum free block cache on a target server, as a multiple of the 
region's on-disk size in
+   * bytes, required to count that server as a relocation opportunity.
+   */
+  public static final String MIN_FREE_CACHE_SPACE_FACTOR_KEY =
+    "hbase.master.balancer.cacheaware.minFreeCacheSpaceFactor";
+  public static final float MIN_FREE_CACHE_SPACE_FACTOR_DEFAULT = 1.0f;
+
   public Float ratioThreshold;
 
   private Long sleepTime;
@@ -110,6 +135,23 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
     updateRegionLoad();
   }
 
+  protected Map<ServerName, Long> getServerBlockCacheFreeBytes() {
+    if (clusterStatus == null) {
+      return null;
+    }
+    Map<ServerName, Long> map = new HashMap<>();
+    clusterStatus.getLiveServerMetrics().forEach((sn, sm) -> map.put(sn, 
sm.getCacheFreeSize()));
+    return map;
+  }
+
+  @Override
+  protected BalancerClusterState createState(Map<ServerName, List<RegionInfo>> 
clusterState,
+    Map<String, Deque<BalancerRegionLoad>> loads, 
RegionHDFSBlockLocationFinder finder,
+    RackManager rackManager) {
+    return new BalancerClusterState(clusterState, loads, finder, rackManager,
+      regionCacheRatioOnOldServerMap, getServerBlockCacheFreeBytes());
+  }
+
   /**
    * Collect the amount of region cached for all the regions from all the 
active region servers.
    */
@@ -150,8 +192,16 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
           if (!ServerName.isSameAddress(currentServer, sn)) {
             int regionSizeMB =
               
regionCacheRatioOnCurrentServerMap.get(regionEncodedName).getSecond();
+            // The coldDataSize accounts for data size classified as "cold" by 
DataTieringManager,
+            // which should be kept out of cache. We sum cold region size in 
the cache ratio, as we
+            // don't want to move regions with low cache ratio due to data 
classified as cold.
             float regionCacheRatioOnOldServer =
-              regionSizeMB == 0 ? 0.0f : (float) regionSizeInCache / 
regionSizeMB;
+              regionSizeMB
+                  == 0
+                    ? 0.0f
+                    : (float) (regionSizeInCache
+                      + 
sm.getRegionColdDataSize().getOrDefault(regionEncodedName, 0))
+                      / regionSizeMB;
             regionCacheRatioOnOldServerMap.put(regionEncodedName,
               new Pair<>(sn, regionCacheRatioOnOldServer));
           }
@@ -479,6 +529,9 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
     private static final String CACHE_COST_KEY = 
"hbase.master.balancer.stochastic.cacheCost";
     private double cacheRatio;
     private double bestCacheRatio;
+    private final float lowCacheRatioThreshold;
+    private final float potentialCacheRatioAfterMove;
+    private final float minFreeCacheSpaceFactor;
 
     private static final float DEFAULT_CACHE_COST = 20;
 
@@ -489,25 +542,87 @@ public class CacheAwareLoadBalancer extends 
StochasticLoadBalancer {
         !isPersistentCache ? 0.0f : conf.getFloat(CACHE_COST_KEY, 
DEFAULT_CACHE_COST));
       bestCacheRatio = 0.0;
       cacheRatio = 0.0;
+      lowCacheRatioThreshold =
+        conf.getFloat(LOW_CACHE_RATIO_FOR_RELOCATION_KEY, 
LOW_CACHE_RATIO_FOR_RELOCATION_DEFAULT);
+      potentialCacheRatioAfterMove = Math.min(1.0f, conf
+        .getFloat(POTENTIAL_CACHE_RATIO_AFTER_MOVE_KEY, 
POTENTIAL_CACHE_RATIO_AFTER_MOVE_DEFAULT));
+      minFreeCacheSpaceFactor =
+        conf.getFloat(MIN_FREE_CACHE_SPACE_FACTOR_KEY, 
MIN_FREE_CACHE_SPACE_FACTOR_DEFAULT);
     }
 
     @Override
     void prepare(BalancerClusterState cluster) {
       super.prepare(cluster);
-      cacheRatio = 0.0;
-      bestCacheRatio = 0.0;
+      recomputeCacheRatio(cluster);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("CacheAwareCostFunction: Cost: {}", 1 - cacheRatio);
+      }
+    }
 
+    private void recomputeCacheRatio(BalancerClusterState cluster) {
+      double[] currentWeighted = computeCurrentWeightedContributions(cluster);
+      double currentSum = 0.0;
+      double bestCacheSum = 0.0;
       for (int region = 0; region < cluster.numRegions; region++) {
-        cacheRatio += cluster.getOrComputeWeightedRegionCacheRatio(region,
-          cluster.regionIndexToServerIndex[region]);
-        bestCacheRatio += cluster.getOrComputeWeightedRegionCacheRatio(region,
-          getServerWithBestCacheRatioForRegion(region));
+        currentSum += currentWeighted[region];
+        // here we only get the server index where this region cache ratio is 
the highest
+        int serverIndexBestCache = 
cluster.getOrComputeServerWithBestRegionCachedRatio()[region];
+        double currentHighestCache =
+          cluster.getOrComputeWeightedRegionCacheRatio(region, 
serverIndexBestCache);
+        // Get a hypothetical best cache ratio for this region if any server 
has enough free cache
+        // to host it.
+        double potentialHighestCache =
+          potentialBestWeightedFromFreeCache(cluster, region, 
currentHighestCache);
+        double actualHighest = Math.max(currentHighestCache, 
potentialHighestCache);
+        bestCacheSum += actualHighest;
       }
+      bestCacheRatio = bestCacheSum;
+      if (bestCacheSum <= 0.0) {
+        cacheRatio = cluster.numRegions == 0 ? 1.0 : 0.0;
+      } else {
+        cacheRatio = Math.min(1.0, currentSum / bestCacheSum);
+      }
+    }
 
-      cacheRatio = bestCacheRatio == 0 ? 1.0 : cacheRatio / bestCacheRatio;
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("CacheAwareCostFunction: Cost: {}", 1 - cacheRatio);
+    private double[] computeCurrentWeightedContributions(BalancerClusterState 
cluster) {
+      int totalRegions = cluster.numRegions;
+      double[] contrib = new double[totalRegions];
+      for (int r = 0; r < totalRegions; r++) {
+        int s = cluster.regionIndexToServerIndex[r];
+        int sizeMb = cluster.getTotalRegionHFileSizeMB(r);
+        if (sizeMb <= 0) {
+          contrib[r] = 0.0;
+          continue;
+        }
+        contrib[r] = cluster.getOrComputeWeightedRegionCacheRatio(r, s);
+      }
+      return contrib;
+    }
+
+    /*
+     * If this region is cold in metrics and at least one RS (including its 
current host) reports
+     * enough free block cache to hold it, return an optimistic weighted cache 
score ({@link
+     * #potentialCacheRatioAfterMove} * region MB) so placement is not 
considered optimal solely
+     * from low ratios when capacity exists somewhere in the cluster.
+     */
+    private double potentialBestWeightedFromFreeCache(BalancerClusterState 
cluster, int region,
+      double currentHighestCache) {
+      float observedRatio = cluster.getObservedRegionCacheRatio(region);
+      if (observedRatio >= lowCacheRatioThreshold) {
+        return 0.0;
+      }
+      int regionSizeMb = cluster.getTotalRegionHFileSizeMB(region);
+      if (regionSizeMb <= 0) {
+        return 0.0;
+      }
+      long regionSizeBytes = (long) regionSizeMb * 1024L * 1024L;
+      long requiredFree = (long) (regionSizeBytes * minFreeCacheSpaceFactor);
+      for (int s = 0; s < cluster.numServers; s++) {
+        if (cluster.serverBlockCacheFreeSize[s] >= requiredFree) {
+          return Math.max(currentHighestCache, regionSizeMb * 
potentialCacheRatioAfterMove);
+        }
       }
+      return 0.0;
     }
 
     @Override
diff --git 
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
 
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
index f2b2240a174..62b1c2a3454 100644
--- 
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
+++ 
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
@@ -321,8 +321,7 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
     if ((this.localityCost != null) || (this.rackLocalityCost != null)) {
       finder = this.regionFinder;
     }
-    BalancerClusterState cluster =
-      new BalancerClusterState(loadOfOneTable, loads, finder, rackManager);
+    BalancerClusterState cluster = createState(loadOfOneTable, loads, finder, 
rackManager);
 
     initCosts(cluster);
     curOverallCost = computeCost(cluster, Double.MAX_VALUE);
@@ -330,6 +329,12 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
     updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
   }
 
+  protected BalancerClusterState createState(Map<ServerName, List<RegionInfo>> 
clusterState,
+    Map<String, Deque<BalancerRegionLoad>> loads, 
RegionHDFSBlockLocationFinder finder,
+    RackManager rackManager) {
+    return new BalancerClusterState(clusterState, loads, finder, rackManager);
+  }
+
   @Override
   public void
     updateBalancerLoadInfo(Map<TableName, Map<ServerName, List<RegionInfo>>> 
loadOfAllTable) {
@@ -577,8 +582,7 @@ public class StochasticLoadBalancer extends 
BaseLoadBalancer {
     // The clusterState that is given to this method contains the state
     // of all the regions in the table(s) (that's true today)
     // Keep track of servers to iterate through them.
-    BalancerClusterState cluster = new BalancerClusterState(loadOfOneTable, 
loads, finder,
-      rackManager, regionCacheRatioOnOldServerMap);
+    BalancerClusterState cluster = createState(loadOfOneTable, loads, finder, 
rackManager);
 
     long startTime = EnvironmentEdgeManager.currentTime();
     cluster.setStopRequestedAt(startTime + maxRunningTime);
diff --git 
a/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java
 
b/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java
index 870c97a1f49..00d39fc4e74 100644
--- 
a/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java
+++ 
b/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java
@@ -218,7 +218,7 @@ public final class CandidateGeneratorTestUtil {
 
   static BalancerClusterState
     createMockBalancerClusterState(Map<ServerName, List<RegionInfo>> 
serverToRegions) {
-    return new BalancerClusterState(serverToRegions, null, null, null, null);
+    return new BalancerClusterState(serverToRegions, null, null, null, null, 
null);
   }
 
   /**
diff --git 
a/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStoreFileTableSkewCostFunction.java
 
b/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStoreFileTableSkewCostFunction.java
index 1b9aca931b4..cff009f0456 100644
--- 
a/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStoreFileTableSkewCostFunction.java
+++ 
b/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStoreFileTableSkewCostFunction.java
@@ -193,7 +193,7 @@ public class TestStoreFileTableSkewCostFunction {
     private final RegionInfo[] testRegions;
 
     DummyBalancerClusterState(BalancerClusterState bcs) {
-      super(bcs.clusterState, null, null, null, null);
+      super(bcs.clusterState, null, null, null, null, null);
       this.testRegions = bcs.regions;
     }
 
diff --git 
a/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetrics.java 
b/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetrics.java
index 2cf55a1abdc..e5447618ec2 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetrics.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetrics.java
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hbase;
 
 import edu.umd.cs.findbugs.annotations.Nullable;
+import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -112,4 +113,21 @@ public interface ServerMetrics {
    *         rounded to MB
    */
   Map<String, Integer> getRegionCachedInfo();
+
+  /**
+   * The available cache space on this region server (bytes), if reported in 
the server load.
+   */
+  default long getCacheFreeSize() {
+    return 0L;
+  }
+
+  /**
+   * Returns the region cold data information for the regions hosted on this 
server. Here, cold data
+   * refers only to region data that is classified as cold by the 
DataTieringManager according to
+   * the configured priority logic. These data should be kept out of block 
cache.
+   * @return map of region encoded name and its total cold data size, rounded 
to MB
+   */
+  default Map<String, Integer> getRegionColdDataSize() {
+    return Collections.emptyMap();
+  }
 }
diff --git 
a/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetricsBuilder.java 
b/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetricsBuilder.java
index c7aea21e845..66684ed244d 100644
--- 
a/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetricsBuilder.java
+++ 
b/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetricsBuilder.java
@@ -64,7 +64,7 @@ public final class ServerMetricsBuilder {
 
   public static ServerMetrics toServerMetrics(ServerName serverName, int 
versionNumber,
     String version, ClusterStatusProtos.ServerLoad serverLoadPB) {
-    return ServerMetricsBuilder.newBuilder(serverName)
+    ServerMetricsBuilder builder = ServerMetricsBuilder.newBuilder(serverName)
       .setRequestCountPerSecond(serverLoadPB.getNumberOfRequests())
       .setRequestCount(serverLoadPB.getTotalNumberOfRequests())
       .setInfoServerPort(serverLoadPB.getInfoServerPort())
@@ -88,7 +88,10 @@ public final class ServerMetricsBuilder {
       .setRegionCachedInfo(serverLoadPB.getRegionCachedInfoMap())
       .setReportTimestamp(serverLoadPB.getReportEndTime())
       
.setLastReportTimestamp(serverLoadPB.getReportStartTime()).setVersionNumber(versionNumber)
-      .setVersion(version).build();
+      .setVersion(version)
+      .setCacheFreeSize(serverLoadPB.hasCacheFreeSize() ? 
serverLoadPB.getCacheFreeSize() : 0L)
+      .setRegionColdDataInfo(serverLoadPB.getRegionColdDataMap());
+    return builder.build();
   }
 
   public static List<HBaseProtos.Coprocessor> toCoprocessor(Collection<String> 
names) {
@@ -118,6 +121,7 @@ public final class ServerMetricsBuilder {
     if (metrics.getReplicationLoadSink() != null) {
       
builder.setReplLoadSink(ProtobufUtil.toReplicationLoadSink(metrics.getReplicationLoadSink()));
     }
+    builder.setCacheFreeSize(metrics.getCacheFreeSize());
     return builder.build();
   }
 
@@ -145,6 +149,8 @@ public final class ServerMetricsBuilder {
   private long lastReportTimestamp = 0;
   private final List<ServerTask> tasks = new ArrayList<>();
   private Map<String, Integer> regionCachedInfo = new HashMap<>();
+  private long cacheFreeSize;
+  private Map<String, Integer> regionColdDataInfo = Collections.emptyMap();
 
   private ServerMetricsBuilder(ServerName serverName) {
     this.serverName = serverName;
@@ -240,11 +246,21 @@ public final class ServerMetricsBuilder {
     return this;
   }
 
+  public ServerMetricsBuilder setCacheFreeSize(long blockCacheFreeSize) {
+    this.cacheFreeSize = blockCacheFreeSize;
+    return this;
+  }
+
+  public ServerMetricsBuilder setRegionColdDataInfo(Map<String, Integer> 
regionColdDataInfo) {
+    this.regionColdDataInfo = regionColdDataInfo;
+    return this;
+  }
+
   public ServerMetrics build() {
     return new ServerMetricsImpl(serverName, versionNumber, version, 
requestCountPerSecond,
       requestCount, readRequestCount, writeRequestCount, usedHeapSize, 
maxHeapSize, infoServerPort,
       sources, sink, regionStatus, coprocessorNames, reportTimestamp, 
lastReportTimestamp,
-      userMetrics, tasks, regionCachedInfo);
+      userMetrics, tasks, regionCachedInfo, cacheFreeSize, regionColdDataInfo);
   }
 
   private static class ServerMetricsImpl implements ServerMetrics {
@@ -268,6 +284,8 @@ public final class ServerMetricsBuilder {
     private final Map<byte[], UserMetrics> userMetrics;
     private final List<ServerTask> tasks;
     private final Map<String, Integer> regionCachedInfo;
+    private final long cacheFreeSize;
+    private final Map<String, Integer> regionColdDataInfo;
 
     ServerMetricsImpl(ServerName serverName, int versionNumber, String version,
       long requestCountPerSecond, long requestCount, long readRequestsCount,
@@ -275,7 +293,8 @@ public final class ServerMetricsBuilder {
       List<ReplicationLoadSource> sources, ReplicationLoadSink sink,
       Map<byte[], RegionMetrics> regionStatus, Set<String> coprocessorNames, 
long reportTimestamp,
       long lastReportTimestamp, Map<byte[], UserMetrics> userMetrics, 
List<ServerTask> tasks,
-      Map<String, Integer> regionCachedInfo) {
+      Map<String, Integer> regionCachedInfo, long cacheFreeSize,
+      Map<String, Integer> regionColdDataInfo) {
       this.serverName = Preconditions.checkNotNull(serverName);
       this.versionNumber = versionNumber;
       this.version = version;
@@ -295,6 +314,8 @@ public final class ServerMetricsBuilder {
       this.lastReportTimestamp = lastReportTimestamp;
       this.tasks = tasks;
       this.regionCachedInfo = regionCachedInfo;
+      this.cacheFreeSize = cacheFreeSize;
+      this.regionColdDataInfo = regionColdDataInfo;
     }
 
     @Override
@@ -402,6 +423,17 @@ public final class ServerMetricsBuilder {
       return Collections.unmodifiableMap(regionCachedInfo);
     }
 
+    @Override
+    public long getCacheFreeSize() {
+      return cacheFreeSize;
+    }
+
+    @Override
+    public Map<String, Integer> getRegionColdDataSize() {
+      return Collections
+        .unmodifiableMap(regionColdDataInfo != null ? regionColdDataInfo : 
Collections.emptyMap());
+    }
+
     @Override
     public String toString() {
       int storeCount = 0;
diff --git a/hbase-protocol-shaded/src/main/protobuf/server/ClusterStatus.proto 
b/hbase-protocol-shaded/src/main/protobuf/server/ClusterStatus.proto
index afff971687d..2db64346c28 100644
--- a/hbase-protocol-shaded/src/main/protobuf/server/ClusterStatus.proto
+++ b/hbase-protocol-shaded/src/main/protobuf/server/ClusterStatus.proto
@@ -332,6 +332,17 @@ message ServerLoad {
    * The metrics for region cached on this region server
    */
   map<string, uint32> regionCachedInfo = 16;
+
+  /**
+* Unallocated block cache capacity on this RegionServer, in bytes.
+* Used by the master for cache-aware load balancing (optional).
+*/
+  optional uint64 cacheFreeSize = 17;
+
+  /**
+ * The metrics for total region cold data size
+ */
+  map<string, uint32> regionColdData = 18;
 }
 
 message LiveServerInfo {
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
index 672607fe5bc..ab75a9ecbca 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
@@ -1242,6 +1242,12 @@ public class HRegionServer extends 
HBaseServerBase<RSRpcServices>
         });
       });
     });
+    serverLoad.setCacheFreeSize(regionServerWrapper.getBlockCacheFreeSize());
+    if (DataTieringManager.getInstance() != null) {
+      DataTieringManager.getInstance().getRegionColdDataSize()
+        .forEach((regionName, coldDataSize) -> 
serverLoad.putRegionColdData(regionName,
+          roundSize(coldDataSize.getSecond(), unitMB)));
+    }
     serverLoad.setReportStartTime(reportStartTime);
     serverLoad.setReportEndTime(reportEndTime);
     if (this.infoServer != null) {
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java
index eeba839ac11..2c38f56cfdd 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java
@@ -400,6 +400,11 @@ public class TestRegionsRecoveryChore {
         return new HashMap<>();
       }
 
+      @Override
+      public Map<String, Integer> getRegionColdDataSize() {
+        return new HashMap<>();
+      }
+
     };
     return serverMetrics;
   }
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java
index 0551adf9601..42a0ad213cf 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java
@@ -20,13 +20,25 @@ package org.apache.hadoop.hbase.master.balancer;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
 
+import java.util.ArrayDeque;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Deque;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.RegionMetrics;
 import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.Size;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
 import org.apache.hadoop.hbase.testclassification.MasterTests;
 import org.apache.hadoop.hbase.testclassification.MediumTests;
 import org.apache.hadoop.hbase.util.Pair;
@@ -251,12 +263,105 @@ public class TestCacheAwareLoadBalancerCostFunctions 
extends StochasticBalancerT
     }
   }
 
+  /**
+   * When block-cache persistence, cold regions (below
+   * {@link CacheAwareLoadBalancer#LOW_CACHE_RATIO_FOR_RELOCATION_KEY}) 
together with RS-reported
+   * block-cache free bytes inflate plausible best placement so weighted cache 
cost crosses
+   * {@code minCostNeedBalance}; {@link StochasticLoadBalancer#needsBalance} 
returns true even with
+   * evenly spread region-count skew.
+   */
+  @Test
+  public void testNeedsBalanceWhenLowCacheRatioRegionsAndFreeBlockCacheSpace() 
{
+    conf.set(HConstants.BUCKET_CACHE_PERSISTENT_PATH_KEY, 
"/tmp/prefetch.persistence");
+    CacheAwareLoadBalancer lb = newCacheAwareBalancer(conf);
+    int regionSizeMb = 64;
+    long cacheFreeInBytes = regionSizeMb * 1024L * 1024L;
+    // simulates a cache ratio lower than
+    // CacheAwareLoadBalancer.LOW_CACHE_RATIO_FOR_RELOCATION_DEFAULT
+    float simulatedCacheRatio = 0.1f;
+    Map<ServerName, List<RegionInfo>> clusterServers =
+      mockClusterServersUnsorted(new int[] { 1, 1 }, 1);
+    List<RegionInfo> regions = new ArrayList<>();
+    clusterServers.values().forEach(regions::addAll);
+    List<ServerName> serversList = getServersInInsertionOrder(clusterServers);
+    Map<ServerName, Long> blockCacheFree = new HashMap<>();
+    blockCacheFree.put(serversList.get(0), 0L);
+    blockCacheFree.put(serversList.get(1), cacheFreeInBytes);
+    BalancerClusterState cluster = new BalancerClusterState(clusterServers,
+      buildRegionLoads(regions, simulatedCacheRatio, regionSizeMb), null, null,
+      Collections.emptyMap(), blockCacheFree);
+    lb.initCosts(cluster);
+    assertTrue(lb.needsBalance(
+      
TableName.valueOf("testNeedsBalanceWhenLowCacheRatioRegionsAndFreeBlockCacheSpace"),
+      cluster));
+  }
+
+  /**
+   * Checks that needsBalance isn't true when regions report high cache ratios
+   */
+  @Test
+  public void testNeedsBalanceFalseWhenWarmRegionsDespiteFreeBlockCacheSpace() 
{
+    conf.set(HConstants.BUCKET_CACHE_PERSISTENT_PATH_KEY, 
"/tmp/prefetch.persistence");
+    CacheAwareLoadBalancer lb = newCacheAwareBalancer(conf);
+    int regionSizeMb = 64;
+    long cacheFreeInBytes = regionSizeMb * 1024L * 1024L;
+    Map<ServerName, List<RegionInfo>> clusterServers =
+      mockClusterServersUnsorted(new int[] { 1, 1 }, 1);
+    List<RegionInfo> all = new ArrayList<>();
+    clusterServers.values().forEach(all::addAll);
+    List<ServerName> serversList = getServersInInsertionOrder(clusterServers);
+    Map<ServerName, Long> blockCacheFree = new HashMap<>();
+    blockCacheFree.put(serversList.get(0), cacheFreeInBytes + 1024 * 1024);
+    blockCacheFree.put(serversList.get(1), cacheFreeInBytes + 1024 * 1024);
+    BalancerClusterState cluster =
+      new BalancerClusterState(clusterServers, buildRegionLoads(all, 1.0f, 
regionSizeMb), null,
+        null, Collections.emptyMap(), blockCacheFree);
+    lb.initCosts(cluster);
+    assertFalse(lb.needsBalance(
+      
TableName.valueOf("testNeedsBalanceFalseWhenWarmRegionsDespiteFreeBlockCacheSpace"),
+      cluster));
+  }
+
+  private static CacheAwareLoadBalancer newCacheAwareBalancer(Configuration 
cfg) {
+    CacheAwareLoadBalancer lb = new CacheAwareLoadBalancer();
+    lb.setClusterInfoProvider(new DummyClusterInfoProvider(cfg));
+    lb.loadConf(cfg);
+    return lb;
+  }
+
+  private static Map<String, Deque<BalancerRegionLoad>>
+    buildRegionLoads(Collection<RegionInfo> regions, float cachedRatio, int 
regionSizeMb) {
+    RegionMetrics rm = mock(RegionMetrics.class);
+    when(rm.getReadRequestCount()).thenReturn(0L);
+    when(rm.getCpRequestCount()).thenReturn(0L);
+    when(rm.getWriteRequestCount()).thenReturn(0L);
+    when(rm.getMemStoreSize()).thenReturn(Size.ZERO);
+    when(rm.getStoreFileSize()).thenReturn(Size.ZERO);
+    when(rm.getRegionSizeMB()).thenReturn(new Size(regionSizeMb, 
Size.Unit.MEGABYTE));
+    when(rm.getCurrentRegionCachedRatio()).thenReturn(cachedRatio);
+
+    BalancerRegionLoad brl = new BalancerRegionLoad(rm);
+    Map<String, Deque<BalancerRegionLoad>> loads = new HashMap<>();
+    for (RegionInfo ri : regions) {
+      ArrayDeque<BalancerRegionLoad> dq = new ArrayDeque<>(1);
+      dq.add(brl);
+      loads.put(ri.getRegionNameAsString(), dq);
+      loads.put(ri.getEncodedName(), dq);
+    }
+    return loads;
+  }
+
+  private static List<ServerName>
+    getServersInInsertionOrder(Map<ServerName, List<RegionInfo>> cluster) {
+    return new ArrayList<>(cluster.keySet());
+  }
+
   private class MockClusterForCacheCost extends BalancerClusterState {
     private final Map<Pair<Integer, Integer>, Float> regionServerCacheRatio = 
new HashMap<>();
 
     public MockClusterForCacheCost(int[][] regionsArray) {
       // regions[0] is an array where index = serverIndex and value = number 
of regions
-      super(mockClusterServersUnsorted(regionsArray[0], 1), null, null, null, 
null);
+      super(mockClusterServersUnsorted(regionsArray[0], 1), null, null, null, 
null, null);
       Map<String, Pair<ServerName, Float>> oldCacheRatio = new HashMap<>();
       for (int i = 1; i < regionsArray.length; i++) {
         int regionIndex = i - 1;


Reply via email to