This is an automated email from the ASF dual-hosted git repository.
wchevreuil pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/master by this push:
new 8fd79a0ea7f HBASE-30135: Improve CacheAwareLoadBalancer to simulate
low cache ratio regions as cached in candidate servers with enough cache space
(#8221)
8fd79a0ea7f is described below
commit 8fd79a0ea7fdac6b21f1d1fcc70cad7ed8da4048
Author: Wellington Ramos Chevreuil <[email protected]>
AuthorDate: Mon May 18 10:30:00 2026 +0100
HBASE-30135: Improve CacheAwareLoadBalancer to simulate low cache ratio
regions as cached in candidate servers with enough cache space (#8221)
Signed-off-by: Peter Somogyi <[email protected]>
---
.../master/balancer/BalancerClusterState.java | 38 +++--
.../hbase/master/balancer/BalancerRegionLoad.java | 6 +
.../master/balancer/CacheAwareLoadBalancer.java | 155 ++++++++++++++++-----
.../master/balancer/StochasticLoadBalancer.java | 9 ++
.../TestCacheAwareLoadBalancerCostFunctions.java | 14 +-
...lancerWithCacheAwareLoadBalancerAsInternal.java | 79 ++++++++++-
6 files changed, 240 insertions(+), 61 deletions(-)
diff --git
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
index 183ccf4fc25..aa73b52a404 100644
---
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
+++
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
@@ -547,7 +547,29 @@ class BalancerClusterState {
if (load == null) {
return 0;
}
- return regionLoads[region].getLast().getStorefileSizeMB();
+ return load.getLast().getStorefileSizeMB();
+ }
+
+ /**
+ * Finds and return the sum of latest reported cache ratio and cold data
ratio for the region on
+ * the RegionServer it's currently online.
+ */
+ float getSumRegionCacheAndColdDataRatio(int region) {
+ Deque<BalancerRegionLoad> dq = regionLoads[region];
+ if (dq == null || dq.isEmpty()) {
+ return 0.0f;
+ }
+ BalancerRegionLoad load = dq.getLast();
+ return load.getCurrentRegionCacheRatio() + load.getRegionColdDataRatio();
+ }
+
+ int getRegionSizeMinusColdDataMB(int region) {
+ Deque<BalancerRegionLoad> dq = regionLoads[region];
+ if (dq == null || dq.isEmpty()) {
+ return 0;
+ }
+ BalancerRegionLoad load = dq.getLast();
+ return load.getRegionSizeMB() - (int) (load.getRegionSizeMB() *
load.getRegionColdDataRatio());
}
/**
@@ -592,23 +614,11 @@ class BalancerClusterState {
}
- /**
- * Returns the size of hFiles from the most recent RegionLoad for region
- */
- public int getTotalRegionHFileSizeMB(int region) {
- Deque<BalancerRegionLoad> load = regionLoads[region];
- if (load == null) {
- // This means, that the region has no actual data on disk
- return 0;
- }
- return regionLoads[region].getLast().getRegionSizeMB();
- }
-
/**
* Returns the weighted cache ratio of a region on the given region server
*/
public float getOrComputeWeightedRegionCacheRatio(int region, int server) {
- return getTotalRegionHFileSizeMB(region) *
getOrComputeRegionCacheRatio(region, server);
+ return getRegionSizeMinusColdDataMB(region) *
getOrComputeRegionCacheRatio(region, server);
}
/**
diff --git
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerRegionLoad.java
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerRegionLoad.java
index 33d00e3de86..5c9e73a1053 100644
---
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerRegionLoad.java
+++
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerRegionLoad.java
@@ -36,6 +36,7 @@ class BalancerRegionLoad {
private final int storefileSizeMB;
private final int regionSizeMB;
private final float currentRegionPrefetchRatio;
+ private final float regionColdDataRatio;
BalancerRegionLoad(RegionMetrics regionMetrics) {
readRequestsCount = regionMetrics.getReadRequestCount();
@@ -45,6 +46,7 @@ class BalancerRegionLoad {
storefileSizeMB = (int)
regionMetrics.getStoreFileSize().get(Size.Unit.MEGABYTE);
regionSizeMB = (int)
regionMetrics.getRegionSizeMB().get(Size.Unit.MEGABYTE);
currentRegionPrefetchRatio = regionMetrics.getCurrentRegionCachedRatio();
+ regionColdDataRatio = regionMetrics.getCurrentRegionColdDataRatio();
}
public long getReadRequestsCount() {
@@ -74,4 +76,8 @@ class BalancerRegionLoad {
public float getCurrentRegionCacheRatio() {
return currentRegionPrefetchRatio;
}
+
+ public float getRegionColdDataRatio() {
+ return regionColdDataRatio;
+ }
}
diff --git
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java
index 3d59cef5dee..effb5f0e6b9 100644
---
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java
+++
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java
@@ -28,6 +28,7 @@ package org.apache.hadoop.hbase.master.balancer;
import static
org.apache.hadoop.hbase.HConstants.BUCKET_CACHE_PERSISTENT_PATH_KEY;
+import java.math.BigDecimal;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
@@ -36,6 +37,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
+import java.util.concurrent.ThreadLocalRandom;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.ClusterMetrics;
import org.apache.hadoop.hbase.RegionMetrics;
@@ -88,10 +90,11 @@ public class CacheAwareLoadBalancer extends
StochasticLoadBalancer {
private Long sleepTime;
private Configuration configuration;
- public enum GeneratorFunctionType {
- LOAD,
- CACHE_RATIO
- }
+ private float lowCacheRatioThreshold;
+ private float potentialCacheRatioAfterMove;
+ private float minFreeCacheSpaceFactor;
+
+ private BigDecimal simulatedRatio = BigDecimal.ZERO;
@Override
public void loadConf(Configuration configuration) {
@@ -101,6 +104,12 @@ public class CacheAwareLoadBalancer extends
StochasticLoadBalancer {
ratioThreshold =
this.configuration.getFloat(CACHE_RATIO_THRESHOLD,
CACHE_RATIO_THRESHOLD_DEFAULT);
sleepTime = configuration.getLong(MOVE_THROTTLING,
MOVE_THROTTLING_DEFAULT.toMillis());
+ lowCacheRatioThreshold =
configuration.getFloat(LOW_CACHE_RATIO_FOR_RELOCATION_KEY,
+ LOW_CACHE_RATIO_FOR_RELOCATION_DEFAULT);
+ potentialCacheRatioAfterMove =
configuration.getFloat(POTENTIAL_CACHE_RATIO_AFTER_MOVE_KEY,
+ POTENTIAL_CACHE_RATIO_AFTER_MOVE_DEFAULT);
+ minFreeCacheSpaceFactor =
+ configuration.getFloat(MIN_FREE_CACHE_SPACE_FACTOR_KEY,
MIN_FREE_CACHE_SPACE_FACTOR_DEFAULT);
}
@Override
@@ -192,15 +201,13 @@ public class CacheAwareLoadBalancer extends
StochasticLoadBalancer {
int regionSizeMB =
regionCacheRatioOnCurrentServerMap.get(regionEncodedName).getSecond();
// The coldDataSize accounts for data size classified as "cold" by
DataTieringManager,
- // which should be kept out of cache. We sum cold region size in
the cache ratio, as we
+ // which should be kept out of cache. We calculate cache ratio on
old server based
+ // only on the hot data size for the region (regionSizeMB -
coldDataSize), as we
// don't want to move regions with low cache ratio due to data
classified as cold.
- float regionCacheRatioOnOldServer =
- regionSizeMB
- == 0
- ? 0.0f
- : (float) (regionSizeInCache
- +
sm.getRegionColdDataSize().getOrDefault(regionEncodedName, 0))
- / regionSizeMB;
+ int coldDataSize =
sm.getRegionColdDataSize().getOrDefault(regionEncodedName, 0);
+ float regionCacheRatioOnOldServer = (regionSizeMB - coldDataSize)
<= 0
+ ? 0.0f
+ : (float) regionSizeInCache / (regionSizeMB - coldDataSize);
regionCacheRatioOnOldServerMap.put(regionEncodedName,
new Pair<>(sn, regionCacheRatioOnOldServer));
}
@@ -271,6 +278,7 @@ public class CacheAwareLoadBalancer extends
StochasticLoadBalancer {
private class CacheAwareCandidateGenerator extends CandidateGenerator {
@Override
protected BalanceAction generate(BalancerClusterState cluster) {
+ simulatedRatio = BigDecimal.ZERO;
// Move the regions to the servers they were previously hosted on based
on the cache ratio
if (
!regionCacheRatioOnOldServerMap.isEmpty()
@@ -310,6 +318,50 @@ public class CacheAwareLoadBalancer extends
StochasticLoadBalancer {
regionCacheRatioOnOldServerMap.remove(regionEncodedName);
return action;
}
+ return generatePlanForFreeCacheSpace(cluster);
+ }
+
+ private BalanceAction generatePlanForFreeCacheSpace(BalancerClusterState
cluster) {
+ if (cluster.serverBlockCacheFreeSize == null) {
+ return BalanceAction.NULL_ACTION;
+ }
+ List<BalanceAction> possibleActions = new ArrayList<>();
+ Map<Integer, Long> serverFreeCacheAfterAction = new HashMap<>();
+ for (int region = 0; region < cluster.numRegions; region++) {
+ RegionInfo regionInfo = cluster.regions[region];
+ if (regionInfo.isMetaRegion() ||
regionInfo.getTable().isSystemTable()) {
+ continue;
+ }
+ int currentServer = cluster.regionIndexToServerIndex[region];
+ float ratio = cluster.getSumRegionCacheAndColdDataRatio(region);
+ if (ratio >= lowCacheRatioThreshold) {
+ continue;
+ }
+ int regionSizeMb = cluster.getRegionSizeMinusColdDataMB(region);
+ if (regionSizeMb <= 0) {
+ continue;
+ }
+ long bytesNeeded = (long) (regionSizeMb * 1024L * 1024L *
minFreeCacheSpaceFactor);
+ for (int server = 0; server < cluster.numServers; server++) {
+ // Skips current server for region, as we can't generate a move to
same server
+ if (server == currentServer) {
+ continue;
+ }
+ serverFreeCacheAfterAction.putIfAbsent(server,
cluster.serverBlockCacheFreeSize[server]);
+ if (serverFreeCacheAfterAction.get(server) >= bytesNeeded) {
+ serverFreeCacheAfterAction.compute(server, (s, freeCache) ->
freeCache - bytesNeeded);
+ possibleActions.add(getAction(currentServer, region, server, -1));
+ }
+ }
+ }
+ if (!possibleActions.isEmpty()) {
+ BalanceAction action =
+
possibleActions.get(ThreadLocalRandom.current().nextInt(possibleActions.size()));
+ LOG.debug("region {} had sum ratio {}",
+ cluster.regions[((MoveRegionAction)
action).getRegion()].getEncodedName(),
+ cluster.getSumRegionCacheAndColdDataRatio(((MoveRegionAction)
action).getRegion()));
+ return action;
+ }
return BalanceAction.NULL_ACTION;
}
@@ -319,7 +371,7 @@ public class CacheAwareLoadBalancer extends
StochasticLoadBalancer {
return moveRegionToOldServer(cluster, regionIndex, currentServerIndex,
cacheRatioOnCurrentServer, oldServerIndex, cacheRatioOnOldServer)
? getAction(currentServerIndex, regionIndex, oldServerIndex, -1)
- : BalanceAction.NULL_ACTION;
+ : generatePlanForFreeCacheSpace(cluster);
}
private boolean moveRegionToOldServer(BalancerClusterState cluster, int
regionIndex,
@@ -385,6 +437,7 @@ public class CacheAwareLoadBalancer extends
StochasticLoadBalancer {
private class CacheAwareSkewnessCandidateGenerator extends
LoadCandidateGenerator {
@Override
BalanceAction pickRandomRegions(BalancerClusterState cluster, int
thisServer, int otherServer) {
+ simulatedRatio = BigDecimal.ZERO;
// First move all the regions which were hosted previously on some other
server back to their
// old servers
if (
@@ -519,7 +572,7 @@ public class CacheAwareLoadBalancer extends
StochasticLoadBalancer {
}
}
- static class CacheAwareCostFunction extends CostFunction {
+ class CacheAwareCostFunction extends CostFunction {
private static final String CACHE_COST_KEY =
"hbase.master.balancer.stochastic.cacheCost";
private double cacheRatio;
private double bestCacheRatio;
@@ -561,14 +614,13 @@ public class CacheAwareLoadBalancer extends
StochasticLoadBalancer {
currentSum += currentWeighted[region];
// here we only get the server index where this region cache ratio is
the highest
int serverIndexBestCache =
cluster.getOrComputeServerWithBestRegionCachedRatio()[region];
+ // get the highest cacheRatio for this region on the current state of
allocations
double currentHighestCache =
cluster.getOrComputeWeightedRegionCacheRatio(region,
serverIndexBestCache);
// Get a hypothetical best cache ratio for this region if any server
has enough free cache
// to host it.
- double potentialHighestCache =
- potentialBestWeightedFromFreeCache(cluster, region,
currentHighestCache);
- double actualHighest = Math.max(currentHighestCache,
potentialHighestCache);
- bestCacheSum += actualHighest;
+ double potentialHighestCache =
potentialBestWeightedFromFreeCache(cluster, region);
+ bestCacheSum += Math.max(currentHighestCache, potentialHighestCache);
}
bestCacheRatio = bestCacheSum;
if (bestCacheSum <= 0.0) {
@@ -583,11 +635,24 @@ public class CacheAwareLoadBalancer extends
StochasticLoadBalancer {
double[] contrib = new double[totalRegions];
for (int r = 0; r < totalRegions; r++) {
int s = cluster.regionIndexToServerIndex[r];
- int sizeMb = cluster.getTotalRegionHFileSizeMB(r);
+ int sizeMb = cluster.getRegionSizeMinusColdDataMB(r);
if (sizeMb <= 0) {
contrib[r] = 0.0;
continue;
}
+ boolean movedInSimulation = cluster.initialRegionIndexToServerIndex[r]
!= s;
+ if (
+ cluster.serverBlockCacheFreeSize != null && movedInSimulation
+ && cluster.getSumRegionCacheAndColdDataRatio(r) <
lowCacheRatioThreshold
+ ) {
+ LOG.debug("Region {} is simulated moved to new server {}",
+ cluster.regions[r].getEncodedName(),
cluster.servers[s].getHostname());
+ long bytesNeeded = (long) (sizeMb * 1024L * 1024L *
minFreeCacheSpaceFactor);
+ if (cluster.serverBlockCacheFreeSize[s] >= bytesNeeded) {
+ contrib[r] = sizeMb * potentialCacheRatioAfterMove;
+ continue;
+ }
+ }
contrib[r] = cluster.getOrComputeWeightedRegionCacheRatio(r, s);
}
return contrib;
@@ -599,13 +664,12 @@ public class CacheAwareLoadBalancer extends
StochasticLoadBalancer {
* #potentialCacheRatioAfterMove} * region MB) so placement is not
considered optimal solely
* from low ratios when capacity exists somewhere in the cluster.
*/
- private double potentialBestWeightedFromFreeCache(BalancerClusterState
cluster, int region,
- double currentHighestCache) {
- float observedRatio = cluster.getObservedRegionCacheRatio(region);
+ private double potentialBestWeightedFromFreeCache(BalancerClusterState
cluster, int region) {
+ float observedRatio = cluster.getSumRegionCacheAndColdDataRatio(region);
if (observedRatio >= lowCacheRatioThreshold) {
return 0.0;
}
- int regionSizeMb = cluster.getTotalRegionHFileSizeMB(region);
+ int regionSizeMb = cluster.getRegionSizeMinusColdDataMB(region);
if (regionSizeMb <= 0) {
return 0.0;
}
@@ -613,7 +677,7 @@ public class CacheAwareLoadBalancer extends
StochasticLoadBalancer {
long requiredFree = (long) (regionSizeBytes * minFreeCacheSpaceFactor);
for (int s = 0; s < cluster.numServers; s++) {
if (cluster.serverBlockCacheFreeSize[s] >= requiredFree) {
- return Math.max(currentHighestCache, regionSizeMb *
potentialCacheRatioAfterMove);
+ return regionSizeMb * potentialCacheRatioAfterMove;
}
}
return 0.0;
@@ -628,18 +692,39 @@ public class CacheAwareLoadBalancer extends
StochasticLoadBalancer {
protected void regionMoved(int region, int oldServer, int newServer) {
double regionCacheRatioOnOldServer =
cluster.getOrComputeWeightedRegionCacheRatio(region, oldServer);
- double regionCacheRatioOnNewServer =
- cluster.getOrComputeWeightedRegionCacheRatio(region, newServer);
- double cacheRatioDiff = regionCacheRatioOnNewServer -
regionCacheRatioOnOldServer;
- double normalizedDelta = bestCacheRatio == 0.0 ? 0.0 : cacheRatioDiff /
bestCacheRatio;
- cacheRatio += normalizedDelta;
- if (LOG.isDebugEnabled() && (cacheRatio < 0.0 || cacheRatio > 1.0)) {
+ if (simulatedRatio.equals(BigDecimal.ZERO)) {
+ double potentialCachedSizeOnNewServer =
+ cluster.getRegionSizeMinusColdDataMB(region) *
potentialCacheRatioAfterMove;
+ boolean simulateCacheBasedOnFreeSpace =
+ cluster.getOrComputeRegionCacheRatio(region, oldServer) <
lowCacheRatioThreshold
+ && cluster.serverBlockCacheFreeSize[newServer] >=
potentialCachedSizeOnNewServer;
+ double regionCacheRatioOnNewServer = simulateCacheBasedOnFreeSpace
+ ? potentialCachedSizeOnNewServer
+ : cluster.getOrComputeWeightedRegionCacheRatio(region, newServer);
+ double cacheRatioDiff = regionCacheRatioOnNewServer -
regionCacheRatioOnOldServer;
+ double normalizedDelta = bestCacheRatio == 0.0 ? 0.0 : cacheRatioDiff
/ bestCacheRatio;
LOG.debug(
-
"CacheAwareCostFunction:regionMoved:region:{}:from:{}:to:{}:regionCacheRatioOnOldServer:{}:"
- +
"regionCacheRatioOnNewServer:{}:bestRegionCacheRatio:{}:cacheRatio:{}",
- cluster.regions[region].getEncodedName(),
cluster.servers[oldServer].getHostname(),
- cluster.servers[newServer].getHostname(),
regionCacheRatioOnOldServer,
- regionCacheRatioOnNewServer, bestCacheRatio, cacheRatio);
+ "simulating moving region {} using simulateCacheBasedOnFreeSpace={} "
+ + "got a normalized delta of {} to be added to cacheRatio: {}",
+ cluster.regions[region].getEncodedName(),
simulateCacheBasedOnFreeSpace, normalizedDelta,
+ cacheRatio);
+ simulatedRatio = BigDecimal.valueOf(normalizedDelta);
+ cacheRatio += normalizedDelta;
+ if (cacheRatio < 0.0 || cacheRatio > 1.0) {
+ LOG.info(
+ "Recomputing cacheRatio after calculating impact of region move:
\n "
+ + "CacheAwareCostFunction:regionMoved:region:{}:from:{}:to:{}:"
+ +
"regionCacheRatioOnOldServer:{}:regionCacheRatioOnNewServer:{}:"
+ + "bestRegionCacheRatio:{}:cacheRatio:{}",
+ cluster.regions[region].getEncodedName(),
cluster.servers[oldServer].getHostname(),
+ cluster.servers[newServer].getHostname(),
regionCacheRatioOnOldServer,
+ regionCacheRatioOnNewServer, bestCacheRatio, cacheRatio);
+ recomputeCacheRatio(cluster);
+ }
+ } else {
+ // This means we are in an undoAction call and need to reverse the
cache delta applied in
+ // the region move simulation
+ cacheRatio -= simulatedRatio.doubleValue();
}
}
diff --git
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
index 62b1c2a3454..79a0129518e 100644
---
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
+++
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
@@ -688,6 +688,15 @@ public class StochasticLoadBalancer extends
BaseLoadBalancer {
newCost = computeCost(cluster, currentCost);
+ if (LOG.isDebugEnabled() && action.getType() ==
BalanceAction.Type.MOVE_REGION) {
+ LOG.debug(
+ "action moving region {} from {} to {} with cost {}. currentCost={},
functionCost={}",
+ cluster.regions[((MoveRegionAction)
action).getRegion()].getEncodedName(),
+ cluster.servers[((MoveRegionAction)
action).getFromServer()].getServerName(),
+ cluster.servers[((MoveRegionAction)
action).getToServer()].getServerName(), newCost,
+ currentCost, functionCost());
+ }
+
double costImprovement = currentCost - newCost;
double minimumImprovement =
Math.max(CostFunction.getCostEpsilon(currentCost),
CostFunction.getCostEpsilon(newCost));
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java
index 42a0ad213cf..8586e1fc626 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java
@@ -251,8 +251,9 @@ public class TestCacheAwareLoadBalancerCostFunctions
extends StochasticBalancerT
@Test
public void testCacheCost() {
conf.set(HConstants.BUCKET_CACHE_PERSISTENT_PATH_KEY,
"/tmp/prefetch.persistence");
+ CacheAwareLoadBalancer lb = newCacheAwareBalancer(conf);
CacheAwareLoadBalancer.CacheAwareCostFunction costFunction =
- new CacheAwareLoadBalancer.CacheAwareCostFunction(conf);
+ lb.new CacheAwareCostFunction(conf);
for (int test = 0; test < clusterRegionCacheRatioMocks.length; test++) {
int[][] clusterRegionLocations = clusterRegionCacheRatioMocks[test];
@@ -379,11 +380,6 @@ public class TestCacheAwareLoadBalancerCostFunctions
extends StochasticBalancerT
regionCacheRatioOnOldServerMap = oldCacheRatio;
}
- @Override
- public int getTotalRegionHFileSizeMB(int region) {
- return 1;
- }
-
@Override
protected float getRegionCacheRatioOnRegionServer(int region, int
regionServerIndex) {
float cacheRatio = 0.0f;
@@ -412,5 +408,11 @@ public class TestCacheAwareLoadBalancerCostFunctions
extends StochasticBalancerT
}
return cacheRatio;
}
+
+ @Override
+ int getRegionSizeMinusColdDataMB(int region) {
+ return 1;
+ }
+
}
}
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestRSGroupBasedLoadBalancerWithCacheAwareLoadBalancerAsInternal.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestRSGroupBasedLoadBalancerWithCacheAwareLoadBalancerAsInternal.java
index 71154ebcb33..4ed6c84a026 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestRSGroupBasedLoadBalancerWithCacheAwareLoadBalancerAsInternal.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestRSGroupBasedLoadBalancerWithCacheAwareLoadBalancerAsInternal.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.master.balancer;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@@ -129,10 +130,76 @@ public class
TestRSGroupBasedLoadBalancerWithCacheAwareLoadBalancerAsInternal
assertEquals(5, targetServers.get(server1).size());
}
+ /**
+ * Regions on the overloaded RS report low block-cache ratio; no RS reports
prefetch/historical
+ * cache for those regions (so {@link
CacheAwareLoadBalancer.CacheAwareCandidateGenerator} has no
+ * "old server" to prefer). Another RS has ample free block cache. The
balancer should still emit
+ * plans that shed load from the hot RS onto the idle RS with spare cache
capacity.
+ */
@Test
- public void
testRegionsPartiallyCachedOnOldServerAndNotCachedOnCurrentServer() throws
Exception {
- // The regions are partially cached on old server but not cached on the
current server
+ public void
testLowCacheRatioNoHistoricalCacheRelocatesWhenTargetHasFreeBlockCache()
+ throws Exception {
+ Map<ServerName, List<RegionInfo>> clusterState = new HashMap<>();
+ ServerName server0 = servers.get(0);
+ ServerName server1 = servers.get(1);
+ ServerName server2 = servers.get(2);
+ List<RegionInfo> regionsOnServer0 = randomRegions(10);
+ List<RegionInfo> regionsOnServer1 = randomRegions(0);
+ List<RegionInfo> regionsOnServer2 = randomRegions(5);
+
+ clusterState.put(server0, regionsOnServer0);
+ clusterState.put(server1, regionsOnServer1);
+ clusterState.put(server2, regionsOnServer2);
+
+ // Below LOW_CACHE_RATIO_FOR_RELOCATION_DEFAULT (0.35);
+ ServerMetrics sm0 = mockServerMetricsWithRegionCacheInfo(server0,
regionsOnServer0, 0.1f,
+ new ArrayList<>(), 0, 10);
+ when(sm0.getCacheFreeSize()).thenReturn(0L);
+ ServerMetrics sm1 = mockServerMetricsWithRegionCacheInfo(server1,
regionsOnServer1, 0.0f,
+ new ArrayList<>(), 0, 10);
+ // Simulates 1GB free cache space on server1
+ when(sm1.getCacheFreeSize()).thenReturn(1024L * 1024 * 1024);
+ ServerMetrics sm2 = mockServerMetricsWithRegionCacheInfo(server2,
regionsOnServer2, 1.0f,
+ new ArrayList<>(), 0, 10);
+ when(sm2.getCacheFreeSize()).thenReturn(0L);
+
+ Map<ServerName, ServerMetrics> serverMetricsMap = new TreeMap<>();
+ serverMetricsMap.put(server0, sm0);
+ serverMetricsMap.put(server1, sm1);
+ serverMetricsMap.put(server2, sm2);
+ ClusterMetrics clusterMetrics = mock(ClusterMetrics.class);
+ when(clusterMetrics.getLiveServerMetrics()).thenReturn(serverMetricsMap);
+ loadBalancer.updateClusterMetrics(clusterMetrics);
+
+ CacheAwareLoadBalancer internalBalancer =
+ (CacheAwareLoadBalancer) loadBalancer.getInternalBalancer();
+ assertNotNull(internalBalancer);
+ assertTrue(internalBalancer.regionCacheRatioOnOldServerMap.isEmpty());
+
+ Map<TableName, Map<ServerName, List<RegionInfo>>> loadOfAllTable =
+ (Map) mockClusterServersWithTables(clusterState);
+ List<RegionPlan> plans = loadBalancer.balanceCluster(loadOfAllTable);
+ assertNotNull(plans);
+
+ Set<RegionInfo> regionsMovedFromServer0 = new HashSet<>();
+ Map<ServerName, List<RegionInfo>> targetServers = new HashMap<>();
+ for (RegionPlan plan : plans) {
+ if (plan.getSource().equals(server0)) {
+ regionsMovedFromServer0.add(plan.getRegionInfo());
+ if (!targetServers.containsKey(plan.getDestination())) {
+ targetServers.put(plan.getDestination(), new ArrayList<>());
+ }
+ targetServers.get(plan.getDestination()).add(plan.getRegionInfo());
+ }
+ }
+ assertEquals(5, regionsMovedFromServer0.size());
+ assertNotNull(targetServers.get(server1));
+ assertEquals(5, targetServers.get(server1).size());
+ }
+
+ @Test
+ public void
testRegionsPartiallyCachedOnOldServerAndNotCachedOnCurrentServer() throws
Exception {
Map<ServerName, List<RegionInfo>> clusterState = new HashMap<>();
ServerName server0 = servers.get(0);
ServerName server1 = servers.get(1);
@@ -150,7 +217,7 @@ public class
TestRSGroupBasedLoadBalancerWithCacheAwareLoadBalancerAsInternal
// Mock cluster metrics
// Mock 5 regions from server0 were previously hosted on server1
- List<RegionInfo> oldCachedRegions = regionsOnServer0.subList(5,
regionsOnServer0.size() - 1);
+ List<RegionInfo> oldCachedRegions = regionsOnServer0.subList(5,
regionsOnServer0.size());
Map<ServerName, ServerMetrics> serverMetricsMap = new TreeMap<>();
serverMetricsMap.put(server0,
mockServerMetricsWithRegionCacheInfo(server0, regionsOnServer0,
@@ -387,7 +454,7 @@ public class
TestRSGroupBasedLoadBalancerWithCacheAwareLoadBalancerAsInternal
// Mock cluster metrics
// Mock 5 regions from server0 were previously hosted on server1
- List<RegionInfo> oldCachedRegions = regionsOnServer0.subList(5,
regionsOnServer0.size() - 1);
+ List<RegionInfo> oldCachedRegions = regionsOnServer0.subList(5,
regionsOnServer0.size());
Map<ServerName, ServerMetrics> serverMetricsMap = new TreeMap<>();
serverMetricsMap.put(server0,
mockServerMetricsWithRegionCacheInfo(server0, regionsOnServer0,
@@ -441,7 +508,7 @@ public class
TestRSGroupBasedLoadBalancerWithCacheAwareLoadBalancerAsInternal
// Mock cluster metrics
// Mock 5 regions from server0 were previously hosted on server1
- List<RegionInfo> oldCachedRegions = regionsOnServer0.subList(5,
regionsOnServer0.size() - 1);
+ List<RegionInfo> oldCachedRegions = regionsOnServer0.subList(5,
regionsOnServer0.size());
Map<ServerName, ServerMetrics> serverMetricsMap = new TreeMap<>();
serverMetricsMap.put(server0,
mockServerMetricsWithRegionCacheInfo(server0, regionsOnServer0,
@@ -495,7 +562,7 @@ public class
TestRSGroupBasedLoadBalancerWithCacheAwareLoadBalancerAsInternal
// Mock cluster metrics
// Mock 5 regions from server0 were previously hosted on server1
- List<RegionInfo> oldCachedRegions = regionsOnServer0.subList(5,
regionsOnServer0.size() - 1);
+ List<RegionInfo> oldCachedRegions = regionsOnServer0.subList(5,
regionsOnServer0.size());
Map<ServerName, ServerMetrics> serverMetricsMap = new TreeMap<>();
serverMetricsMap.put(server0,
mockServerMetricsWithRegionCacheInfo(server0, regionsOnServer0,