wchevreuil commented on code in PR #8218:
URL: https://github.com/apache/hbase/pull/8218#discussion_r3225319924


##########
hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java:
##########
@@ -552,25 +605,87 @@ static class CacheAwareCostFunction extends CostFunction {
         !isPersistentCache ? 0.0f : conf.getFloat(CACHE_COST_KEY, 
DEFAULT_CACHE_COST));
       bestCacheRatio = 0.0;
       cacheRatio = 0.0;
+      lowCacheRatioThreshold =
+        conf.getFloat(LOW_CACHE_RATIO_FOR_RELOCATION_KEY, 
LOW_CACHE_RATIO_FOR_RELOCATION_DEFAULT);
+      potentialCacheRatioAfterMove = Math.min(1.0f, conf
+        .getFloat(POTENTIAL_CACHE_RATIO_AFTER_MOVE_KEY, 
POTENTIAL_CACHE_RATIO_AFTER_MOVE_DEFAULT));
+      minFreeCacheSpaceFactor =
+        conf.getFloat(MIN_FREE_CACHE_SPACE_FACTOR_KEY, 
MIN_FREE_CACHE_SPACE_FACTOR_DEFAULT);
     }
 
     @Override
     void prepare(BalancerClusterState cluster) {
       super.prepare(cluster);
-      cacheRatio = 0.0;
-      bestCacheRatio = 0.0;
+      recomputeCacheRatio(cluster);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("CacheAwareCostFunction: Cost: {}", 1 - cacheRatio);
+      }
+    }
 
+    private void recomputeCacheRatio(BalancerClusterState cluster) {
+      double[] currentWeighted = computeCurrentWeightedContributions(cluster);
+      double currentSum = 0.0;
+      double bestCacheSum = 0.0;
       for (int region = 0; region < cluster.numRegions; region++) {
-        cacheRatio += cluster.getOrComputeWeightedRegionCacheRatio(region,
-          cluster.regionIndexToServerIndex[region]);
-        bestCacheRatio += cluster.getOrComputeWeightedRegionCacheRatio(region,
-          getServerWithBestCacheRatioForRegion(region));
+        currentSum += currentWeighted[region];
+        // here we only get the server index where this region cache ratio is 
the highest
+        int serverIndexBestCache = 
cluster.getOrComputeServerWithBestRegionCachedRatio()[region];
+        double currentHighestCache =
+          cluster.getOrComputeWeightedRegionCacheRatio(region, 
serverIndexBestCache);
+        // Get a hypothetical best cache ratio for this region if any server 
has enough free cache
+        // to host it.
+        double potentialHighestCache =
+          potentialBestWeightedFromFreeCache(cluster, region, 
currentHighestCache);
+        double actualHighest = Math.max(currentHighestCache, 
potentialHighestCache);
+        bestCacheSum += actualHighest;
       }
+      bestCacheRatio = bestCacheSum;
+      if (bestCacheSum <= 0.0) {
+        cacheRatio = cluster.numRegions == 0 ? 1.0 : 0.0;
+      } else {
+        cacheRatio = Math.min(1.0, currentSum / bestCacheSum);
+      }
+    }
 
-      cacheRatio = bestCacheRatio == 0 ? 1.0 : cacheRatio / bestCacheRatio;
-      if (LOG.isDebugEnabled()) {
-        LOG.debug("CacheAwareCostFunction: Cost: {}", 1 - cacheRatio);
+    private double[] computeCurrentWeightedContributions(BalancerClusterState 
cluster) {
+      int totalRegions = cluster.numRegions;
+      double[] contrib = new double[totalRegions];
+      for (int r = 0; r < totalRegions; r++) {
+        int s = cluster.regionIndexToServerIndex[r];
+        int sizeMb = cluster.getTotalRegionHFileSizeMB(r);
+        if (sizeMb <= 0) {
+          contrib[r] = 0.0;
+          continue;
+        }
+        contrib[r] = cluster.getOrComputeWeightedRegionCacheRatio(r, s);
+      }
+      return contrib;
+    }
+
+    /*
+     * If this region is cold in metrics and at least one RS (including its 
current host) reports
+     * enough free block cache to hold it, return an optimistic weighted cache 
score ({@link
+     * #potentialCacheRatioAfterMove} * region MB) so placement is not 
considered optimal solely
+     * from low ratios when capacity exists somewhere in the cluster.
+     */
+    private double potentialBestWeightedFromFreeCache(BalancerClusterState 
cluster, int region,

Review Comment:
   Yes, let me fix this in HBASE-30135.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to