This is an automated email from the ASF dual-hosted git repository.
wchevreuil pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/master by this push:
new e2c484d96c1 HBASE-30134: Improve CacheAwareLoadBalancer to consider
low cache ratio when calculating imbalance (#8197)
e2c484d96c1 is described below
commit e2c484d96c1438cca994cdf86f2a412f9b2f9f8c
Author: Wellington Ramos Chevreuil <[email protected]>
AuthorDate: Mon May 11 11:02:30 2026 +0100
HBASE-30134: Improve CacheAwareLoadBalancer to consider low cache ratio
when calculating imbalance (#8197)
Signed-off-by: Tak Lon (Stephen) Wu <[email protected]>
---
.../master/balancer/BalancerClusterState.java | 34 +++++-
.../master/balancer/BalancerConditionals.java | 2 +-
.../hbase/master/balancer/BaseLoadBalancer.java | 2 +-
.../master/balancer/CacheAwareLoadBalancer.java | 135 +++++++++++++++++++--
.../master/balancer/StochasticLoadBalancer.java | 12 +-
.../balancer/CandidateGeneratorTestUtil.java | 2 +-
.../TestStoreFileTableSkewCostFunction.java | 2 +-
.../org/apache/hadoop/hbase/ServerMetrics.java | 18 +++
.../apache/hadoop/hbase/ServerMetricsBuilder.java | 40 +++++-
.../src/main/protobuf/server/ClusterStatus.proto | 11 ++
.../hadoop/hbase/regionserver/HRegionServer.java | 6 +
.../hbase/master/TestRegionsRecoveryChore.java | 5 +
.../TestCacheAwareLoadBalancerCostFunctions.java | 107 +++++++++++++++-
13 files changed, 349 insertions(+), 27 deletions(-)
diff --git
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
index 86c45dae09b..183ccf4fc25 100644
---
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
+++
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerClusterState.java
@@ -128,6 +128,8 @@ class BalancerClusterState {
private int[] regionServerIndexWithBestRegionCachedRatio;
// Maps regionName -> oldServerName -> cache ratio of the region on the old
server
Map<String, Pair<ServerName, Float>> regionCacheRatioOnOldServerMap;
+ // cache free space available on each server, aligned to the "servers" array
indices;
+ long[] serverBlockCacheFreeSize;
private final Supplier<List<Integer>> shuffledServerIndicesSupplier =
Suppliers.memoizeWithExpiration(() -> {
@@ -148,20 +150,23 @@ class BalancerClusterState {
BalancerClusterState(Map<ServerName, List<RegionInfo>> clusterState,
Map<String, Deque<BalancerRegionLoad>> loads,
RegionHDFSBlockLocationFinder regionFinder,
RackManager rackManager) {
- this(null, clusterState, loads, regionFinder, rackManager, null);
+ this(null, clusterState, loads, regionFinder, rackManager, null, null);
}
protected BalancerClusterState(Map<ServerName, List<RegionInfo>>
clusterState,
Map<String, Deque<BalancerRegionLoad>> loads,
RegionHDFSBlockLocationFinder regionFinder,
- RackManager rackManager, Map<String, Pair<ServerName, Float>>
oldRegionServerRegionCacheRatio) {
- this(null, clusterState, loads, regionFinder, rackManager,
oldRegionServerRegionCacheRatio);
+ RackManager rackManager, Map<String, Pair<ServerName, Float>>
oldRegionServerRegionCacheRatio,
+ Map<ServerName, Long> serverBlockCacheFreeByServer) {
+ this(null, clusterState, loads, regionFinder, rackManager,
oldRegionServerRegionCacheRatio,
+ serverBlockCacheFreeByServer);
}
@SuppressWarnings("unchecked")
BalancerClusterState(Collection<RegionInfo> unassignedRegions,
Map<ServerName, List<RegionInfo>> clusterState, Map<String,
Deque<BalancerRegionLoad>> loads,
RegionHDFSBlockLocationFinder regionFinder, RackManager rackManager,
- Map<String, Pair<ServerName, Float>> oldRegionServerRegionCacheRatio) {
+ Map<String, Pair<ServerName, Float>> oldRegionServerRegionCacheRatio,
+ Map<ServerName, Long> serverBlockCacheFreeByServer) {
if (unassignedRegions == null) {
unassignedRegions = Collections.emptyList();
}
@@ -394,6 +399,15 @@ class BalancerClusterState {
populateRegionPerLocationFromServer(regionsPerRack,
colocatedReplicaCountsPerRack,
serversPerRack);
}
+
+ this.serverBlockCacheFreeSize = new long[numServers];
+ if (serverBlockCacheFreeByServer != null) {
+ for (int i = 0; i < numServers; i++) {
+ ServerName sn = servers[i];
+ this.serverBlockCacheFreeSize[i] =
+ sn == null ? 0L : serverBlockCacheFreeByServer.getOrDefault(sn, 0L);
+ }
+ }
}
private void populateRegionPerLocationFromServer(int[][] regionsPerLocation,
@@ -714,6 +728,18 @@ class BalancerClusterState {
return regionServerIndexWithBestRegionCachedRatio;
}
+ /**
+ * Finds and return the latest reported cache ratio for the region on the
RegionServer it's
+ * currently online.
+ */
+ float getObservedRegionCacheRatio(int region) {
+ Deque<BalancerRegionLoad> dq = regionLoads[region];
+ if (dq == null || dq.isEmpty()) {
+ return 0.0f;
+ }
+ return dq.getLast().getCurrentRegionCacheRatio();
+ }
+
/**
* Maps region index to rack index
*/
diff --git
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java
index 7e5bd98a013..28af67a4954 100644
---
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java
+++
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerConditionals.java
@@ -185,7 +185,7 @@ final class BalancerConditionals implements Configurable {
private RegionPlanConditional createConditional(Class<? extends
RegionPlanConditional> clazz,
BalancerClusterState cluster) {
if (cluster == null) {
- cluster = new BalancerClusterState(Collections.emptyMap(), null, null,
null, null);
+ cluster = new BalancerClusterState(Collections.emptyMap(), null, null,
null, null, null);
}
try {
Constructor<? extends RegionPlanConditional> ctor =
diff --git
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
index c3f17d88cde..be99c967025 100644
---
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
+++
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/BaseLoadBalancer.java
@@ -238,7 +238,7 @@ public abstract class BaseLoadBalancer implements
LoadBalancer {
}
}
return new BalancerClusterState(regions, clusterState, null,
this.regionFinder, rackManager,
- null);
+ null, null);
}
private List<ServerName> findIdleServers(List<ServerName> servers) {
diff --git
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java
index b61ca31073e..3d59cef5dee 100644
---
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java
+++
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java
@@ -44,6 +44,7 @@ import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.Size;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
+import org.apache.hadoop.hbase.master.RackManager;
import org.apache.hadoop.hbase.master.RegionPlan;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.yetus.audience.InterfaceAudience;
@@ -58,6 +59,30 @@ public class CacheAwareLoadBalancer extends
StochasticLoadBalancer {
"hbase.master.balancer.stochastic.throttling.cacheRatio";
public static final float CACHE_RATIO_THRESHOLD_DEFAULT = 0.8f;
+ /**
+ * Below this cache ratio on the current host, a move may be considered for
the free-space
+ * heuristic.
+ */
+ public static final String LOW_CACHE_RATIO_FOR_RELOCATION_KEY =
+ "hbase.master.balancer.cacheaware.lowCacheRatioThreshold";
+ public static final float LOW_CACHE_RATIO_FOR_RELOCATION_DEFAULT = 0.35f;
+
+ /**
+ * Optimistic region cache ratio assumed for cost purposes when a better
host has free cache space
+ * (actual warmup is not modeled).
+ */
+ public static final String POTENTIAL_CACHE_RATIO_AFTER_MOVE_KEY =
+ "hbase.master.balancer.cacheaware.potentialCacheRatioAfterMove";
+ public static final float POTENTIAL_CACHE_RATIO_AFTER_MOVE_DEFAULT = 0.95f;
+
+ /**
+ * Minimum free block cache on a target server, as a multiple of the
region's on-disk size in
+ * bytes, required to count that server as a relocation opportunity.
+ */
+ public static final String MIN_FREE_CACHE_SPACE_FACTOR_KEY =
+ "hbase.master.balancer.cacheaware.minFreeCacheSpaceFactor";
+ public static final float MIN_FREE_CACHE_SPACE_FACTOR_DEFAULT = 1.0f;
+
public Float ratioThreshold;
private Long sleepTime;
@@ -109,6 +134,23 @@ public class CacheAwareLoadBalancer extends
StochasticLoadBalancer {
updateRegionLoad();
}
+ protected Map<ServerName, Long> getServerBlockCacheFreeBytes() {
+ if (clusterStatus == null) {
+ return null;
+ }
+ Map<ServerName, Long> map = new HashMap<>();
+ clusterStatus.getLiveServerMetrics().forEach((sn, sm) -> map.put(sn,
sm.getCacheFreeSize()));
+ return map;
+ }
+
+ @Override
+ protected BalancerClusterState createState(Map<ServerName, List<RegionInfo>>
clusterState,
+ Map<String, Deque<BalancerRegionLoad>> loads,
RegionHDFSBlockLocationFinder finder,
+ RackManager rackManager) {
+ return new BalancerClusterState(clusterState, loads, finder, rackManager,
+ regionCacheRatioOnOldServerMap, getServerBlockCacheFreeBytes());
+ }
+
/**
* Collect the amount of region cached for all the regions from all the
active region servers.
*/
@@ -149,8 +191,16 @@ public class CacheAwareLoadBalancer extends
StochasticLoadBalancer {
if (!ServerName.isSameAddress(currentServer, sn)) {
int regionSizeMB =
regionCacheRatioOnCurrentServerMap.get(regionEncodedName).getSecond();
+ // The coldDataSize accounts for data size classified as "cold" by
DataTieringManager,
+ // which should be kept out of cache. We sum cold region size in
the cache ratio, as we
+ // don't want to move regions with low cache ratio due to data
classified as cold.
float regionCacheRatioOnOldServer =
- regionSizeMB == 0 ? 0.0f : (float) regionSizeInCache /
regionSizeMB;
+ regionSizeMB
+ == 0
+ ? 0.0f
+ : (float) (regionSizeInCache
+ +
sm.getRegionColdDataSize().getOrDefault(regionEncodedName, 0))
+ / regionSizeMB;
regionCacheRatioOnOldServerMap.put(regionEncodedName,
new Pair<>(sn, regionCacheRatioOnOldServer));
}
@@ -473,6 +523,9 @@ public class CacheAwareLoadBalancer extends
StochasticLoadBalancer {
private static final String CACHE_COST_KEY =
"hbase.master.balancer.stochastic.cacheCost";
private double cacheRatio;
private double bestCacheRatio;
+ private final float lowCacheRatioThreshold;
+ private final float potentialCacheRatioAfterMove;
+ private final float minFreeCacheSpaceFactor;
private static final float DEFAULT_CACHE_COST = 20;
@@ -483,25 +536,87 @@ public class CacheAwareLoadBalancer extends
StochasticLoadBalancer {
!isPersistentCache ? 0.0f : conf.getFloat(CACHE_COST_KEY,
DEFAULT_CACHE_COST));
bestCacheRatio = 0.0;
cacheRatio = 0.0;
+ lowCacheRatioThreshold =
+ conf.getFloat(LOW_CACHE_RATIO_FOR_RELOCATION_KEY,
LOW_CACHE_RATIO_FOR_RELOCATION_DEFAULT);
+ potentialCacheRatioAfterMove = Math.min(1.0f, conf
+ .getFloat(POTENTIAL_CACHE_RATIO_AFTER_MOVE_KEY,
POTENTIAL_CACHE_RATIO_AFTER_MOVE_DEFAULT));
+ minFreeCacheSpaceFactor =
+ conf.getFloat(MIN_FREE_CACHE_SPACE_FACTOR_KEY,
MIN_FREE_CACHE_SPACE_FACTOR_DEFAULT);
}
@Override
void prepare(BalancerClusterState cluster) {
super.prepare(cluster);
- cacheRatio = 0.0;
- bestCacheRatio = 0.0;
+ recomputeCacheRatio(cluster);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("CacheAwareCostFunction: Cost: {}", 1 - cacheRatio);
+ }
+ }
+ private void recomputeCacheRatio(BalancerClusterState cluster) {
+ double[] currentWeighted = computeCurrentWeightedContributions(cluster);
+ double currentSum = 0.0;
+ double bestCacheSum = 0.0;
for (int region = 0; region < cluster.numRegions; region++) {
- cacheRatio += cluster.getOrComputeWeightedRegionCacheRatio(region,
- cluster.regionIndexToServerIndex[region]);
- bestCacheRatio += cluster.getOrComputeWeightedRegionCacheRatio(region,
- getServerWithBestCacheRatioForRegion(region));
+ currentSum += currentWeighted[region];
+ // here we only get the server index where this region cache ratio is
the highest
+ int serverIndexBestCache =
cluster.getOrComputeServerWithBestRegionCachedRatio()[region];
+ double currentHighestCache =
+ cluster.getOrComputeWeightedRegionCacheRatio(region,
serverIndexBestCache);
+ // Get a hypothetical best cache ratio for this region if any server
has enough free cache
+ // to host it.
+ double potentialHighestCache =
+ potentialBestWeightedFromFreeCache(cluster, region,
currentHighestCache);
+ double actualHighest = Math.max(currentHighestCache,
potentialHighestCache);
+ bestCacheSum += actualHighest;
}
+ bestCacheRatio = bestCacheSum;
+ if (bestCacheSum <= 0.0) {
+ cacheRatio = cluster.numRegions == 0 ? 1.0 : 0.0;
+ } else {
+ cacheRatio = Math.min(1.0, currentSum / bestCacheSum);
+ }
+ }
- cacheRatio = bestCacheRatio == 0 ? 1.0 : cacheRatio / bestCacheRatio;
- if (LOG.isDebugEnabled()) {
- LOG.debug("CacheAwareCostFunction: Cost: {}", 1 - cacheRatio);
+ private double[] computeCurrentWeightedContributions(BalancerClusterState
cluster) {
+ int totalRegions = cluster.numRegions;
+ double[] contrib = new double[totalRegions];
+ for (int r = 0; r < totalRegions; r++) {
+ int s = cluster.regionIndexToServerIndex[r];
+ int sizeMb = cluster.getTotalRegionHFileSizeMB(r);
+ if (sizeMb <= 0) {
+ contrib[r] = 0.0;
+ continue;
+ }
+ contrib[r] = cluster.getOrComputeWeightedRegionCacheRatio(r, s);
+ }
+ return contrib;
+ }
+
+ /*
+ * If this region is cold in metrics and at least one RS (including its
current host) reports
+ * enough free block cache to hold it, return an optimistic weighted cache
score ({@link
+ * #potentialCacheRatioAfterMove} * region MB) so placement is not
considered optimal solely
+ * from low ratios when capacity exists somewhere in the cluster.
+ */
+ private double potentialBestWeightedFromFreeCache(BalancerClusterState
cluster, int region,
+ double currentHighestCache) {
+ float observedRatio = cluster.getObservedRegionCacheRatio(region);
+ if (observedRatio >= lowCacheRatioThreshold) {
+ return 0.0;
+ }
+ int regionSizeMb = cluster.getTotalRegionHFileSizeMB(region);
+ if (regionSizeMb <= 0) {
+ return 0.0;
+ }
+ long regionSizeBytes = (long) regionSizeMb * 1024L * 1024L;
+ long requiredFree = (long) (regionSizeBytes * minFreeCacheSpaceFactor);
+ for (int s = 0; s < cluster.numServers; s++) {
+ if (cluster.serverBlockCacheFreeSize[s] >= requiredFree) {
+ return Math.max(currentHighestCache, regionSizeMb *
potentialCacheRatioAfterMove);
+ }
}
+ return 0.0;
}
@Override
diff --git
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
index f2b2240a174..62b1c2a3454 100644
---
a/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
+++
b/hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/StochasticLoadBalancer.java
@@ -321,8 +321,7 @@ public class StochasticLoadBalancer extends
BaseLoadBalancer {
if ((this.localityCost != null) || (this.rackLocalityCost != null)) {
finder = this.regionFinder;
}
- BalancerClusterState cluster =
- new BalancerClusterState(loadOfOneTable, loads, finder, rackManager);
+ BalancerClusterState cluster = createState(loadOfOneTable, loads, finder,
rackManager);
initCosts(cluster);
curOverallCost = computeCost(cluster, Double.MAX_VALUE);
@@ -330,6 +329,12 @@ public class StochasticLoadBalancer extends
BaseLoadBalancer {
updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
}
+ protected BalancerClusterState createState(Map<ServerName, List<RegionInfo>>
clusterState,
+ Map<String, Deque<BalancerRegionLoad>> loads,
RegionHDFSBlockLocationFinder finder,
+ RackManager rackManager) {
+ return new BalancerClusterState(clusterState, loads, finder, rackManager);
+ }
+
@Override
public void
updateBalancerLoadInfo(Map<TableName, Map<ServerName, List<RegionInfo>>>
loadOfAllTable) {
@@ -577,8 +582,7 @@ public class StochasticLoadBalancer extends
BaseLoadBalancer {
// The clusterState that is given to this method contains the state
// of all the regions in the table(s) (that's true today)
// Keep track of servers to iterate through them.
- BalancerClusterState cluster = new BalancerClusterState(loadOfOneTable,
loads, finder,
- rackManager, regionCacheRatioOnOldServerMap);
+ BalancerClusterState cluster = createState(loadOfOneTable, loads, finder,
rackManager);
long startTime = EnvironmentEdgeManager.currentTime();
cluster.setStopRequestedAt(startTime + maxRunningTime);
diff --git
a/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java
b/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java
index 870c97a1f49..00d39fc4e74 100644
---
a/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java
+++
b/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/CandidateGeneratorTestUtil.java
@@ -218,7 +218,7 @@ public final class CandidateGeneratorTestUtil {
static BalancerClusterState
createMockBalancerClusterState(Map<ServerName, List<RegionInfo>>
serverToRegions) {
- return new BalancerClusterState(serverToRegions, null, null, null, null);
+ return new BalancerClusterState(serverToRegions, null, null, null, null,
null);
}
/**
diff --git
a/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStoreFileTableSkewCostFunction.java
b/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStoreFileTableSkewCostFunction.java
index 1b9aca931b4..cff009f0456 100644
---
a/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStoreFileTableSkewCostFunction.java
+++
b/hbase-balancer/src/test/java/org/apache/hadoop/hbase/master/balancer/TestStoreFileTableSkewCostFunction.java
@@ -193,7 +193,7 @@ public class TestStoreFileTableSkewCostFunction {
private final RegionInfo[] testRegions;
DummyBalancerClusterState(BalancerClusterState bcs) {
- super(bcs.clusterState, null, null, null, null);
+ super(bcs.clusterState, null, null, null, null, null);
this.testRegions = bcs.regions;
}
diff --git
a/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetrics.java
b/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetrics.java
index 2cf55a1abdc..e5447618ec2 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetrics.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetrics.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hbase;
import edu.umd.cs.findbugs.annotations.Nullable;
+import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -112,4 +113,21 @@ public interface ServerMetrics {
* rounded to MB
*/
Map<String, Integer> getRegionCachedInfo();
+
+ /**
+ * The available cache space on this region server (bytes), if reported in
the server load.
+ */
+ default long getCacheFreeSize() {
+ return 0L;
+ }
+
+ /**
+ * Returns the region cold data information for the regions hosted on this
server. Here, cold data
+ * refers only to region data that is classified as cold by the
DataTieringManager according to
+ * the configured priority logic. These data should be kept out of block
cache.
+ * @return map of region encoded name and its total cold data size, rounded
to MB
+ */
+ default Map<String, Integer> getRegionColdDataSize() {
+ return Collections.emptyMap();
+ }
}
diff --git
a/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetricsBuilder.java
b/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetricsBuilder.java
index c7aea21e845..66684ed244d 100644
---
a/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetricsBuilder.java
+++
b/hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetricsBuilder.java
@@ -64,7 +64,7 @@ public final class ServerMetricsBuilder {
public static ServerMetrics toServerMetrics(ServerName serverName, int
versionNumber,
String version, ClusterStatusProtos.ServerLoad serverLoadPB) {
- return ServerMetricsBuilder.newBuilder(serverName)
+ ServerMetricsBuilder builder = ServerMetricsBuilder.newBuilder(serverName)
.setRequestCountPerSecond(serverLoadPB.getNumberOfRequests())
.setRequestCount(serverLoadPB.getTotalNumberOfRequests())
.setInfoServerPort(serverLoadPB.getInfoServerPort())
@@ -88,7 +88,10 @@ public final class ServerMetricsBuilder {
.setRegionCachedInfo(serverLoadPB.getRegionCachedInfoMap())
.setReportTimestamp(serverLoadPB.getReportEndTime())
.setLastReportTimestamp(serverLoadPB.getReportStartTime()).setVersionNumber(versionNumber)
- .setVersion(version).build();
+ .setVersion(version)
+ .setCacheFreeSize(serverLoadPB.hasCacheFreeSize() ?
serverLoadPB.getCacheFreeSize() : 0L)
+ .setRegionColdDataInfo(serverLoadPB.getRegionColdDataMap());
+ return builder.build();
}
public static List<HBaseProtos.Coprocessor> toCoprocessor(Collection<String>
names) {
@@ -118,6 +121,7 @@ public final class ServerMetricsBuilder {
if (metrics.getReplicationLoadSink() != null) {
builder.setReplLoadSink(ProtobufUtil.toReplicationLoadSink(metrics.getReplicationLoadSink()));
}
+ builder.setCacheFreeSize(metrics.getCacheFreeSize());
return builder.build();
}
@@ -145,6 +149,8 @@ public final class ServerMetricsBuilder {
private long lastReportTimestamp = 0;
private final List<ServerTask> tasks = new ArrayList<>();
private Map<String, Integer> regionCachedInfo = new HashMap<>();
+ private long cacheFreeSize;
+ private Map<String, Integer> regionColdDataInfo = Collections.emptyMap();
private ServerMetricsBuilder(ServerName serverName) {
this.serverName = serverName;
@@ -240,11 +246,21 @@ public final class ServerMetricsBuilder {
return this;
}
+ public ServerMetricsBuilder setCacheFreeSize(long blockCacheFreeSize) {
+ this.cacheFreeSize = blockCacheFreeSize;
+ return this;
+ }
+
+ public ServerMetricsBuilder setRegionColdDataInfo(Map<String, Integer>
regionColdDataInfo) {
+ this.regionColdDataInfo = regionColdDataInfo;
+ return this;
+ }
+
public ServerMetrics build() {
return new ServerMetricsImpl(serverName, versionNumber, version,
requestCountPerSecond,
requestCount, readRequestCount, writeRequestCount, usedHeapSize,
maxHeapSize, infoServerPort,
sources, sink, regionStatus, coprocessorNames, reportTimestamp,
lastReportTimestamp,
- userMetrics, tasks, regionCachedInfo);
+ userMetrics, tasks, regionCachedInfo, cacheFreeSize, regionColdDataInfo);
}
private static class ServerMetricsImpl implements ServerMetrics {
@@ -268,6 +284,8 @@ public final class ServerMetricsBuilder {
private final Map<byte[], UserMetrics> userMetrics;
private final List<ServerTask> tasks;
private final Map<String, Integer> regionCachedInfo;
+ private final long cacheFreeSize;
+ private final Map<String, Integer> regionColdDataInfo;
ServerMetricsImpl(ServerName serverName, int versionNumber, String version,
long requestCountPerSecond, long requestCount, long readRequestsCount,
@@ -275,7 +293,8 @@ public final class ServerMetricsBuilder {
List<ReplicationLoadSource> sources, ReplicationLoadSink sink,
Map<byte[], RegionMetrics> regionStatus, Set<String> coprocessorNames,
long reportTimestamp,
long lastReportTimestamp, Map<byte[], UserMetrics> userMetrics,
List<ServerTask> tasks,
- Map<String, Integer> regionCachedInfo) {
+ Map<String, Integer> regionCachedInfo, long cacheFreeSize,
+ Map<String, Integer> regionColdDataInfo) {
this.serverName = Preconditions.checkNotNull(serverName);
this.versionNumber = versionNumber;
this.version = version;
@@ -295,6 +314,8 @@ public final class ServerMetricsBuilder {
this.lastReportTimestamp = lastReportTimestamp;
this.tasks = tasks;
this.regionCachedInfo = regionCachedInfo;
+ this.cacheFreeSize = cacheFreeSize;
+ this.regionColdDataInfo = regionColdDataInfo;
}
@Override
@@ -402,6 +423,17 @@ public final class ServerMetricsBuilder {
return Collections.unmodifiableMap(regionCachedInfo);
}
+ @Override
+ public long getCacheFreeSize() {
+ return cacheFreeSize;
+ }
+
+ @Override
+ public Map<String, Integer> getRegionColdDataSize() {
+ return Collections
+ .unmodifiableMap(regionColdDataInfo != null ? regionColdDataInfo :
Collections.emptyMap());
+ }
+
@Override
public String toString() {
int storeCount = 0;
diff --git a/hbase-protocol-shaded/src/main/protobuf/server/ClusterStatus.proto
b/hbase-protocol-shaded/src/main/protobuf/server/ClusterStatus.proto
index afff971687d..2db64346c28 100644
--- a/hbase-protocol-shaded/src/main/protobuf/server/ClusterStatus.proto
+++ b/hbase-protocol-shaded/src/main/protobuf/server/ClusterStatus.proto
@@ -332,6 +332,17 @@ message ServerLoad {
* The metrics for region cached on this region server
*/
map<string, uint32> regionCachedInfo = 16;
+
+ /**
+* Unallocated block cache capacity on this RegionServer, in bytes.
+* Used by the master for cache-aware load balancing (optional).
+*/
+ optional uint64 cacheFreeSize = 17;
+
+ /**
+ * The metrics for total region cold data size
+ */
+ map<string, uint32> regionColdData = 18;
}
message LiveServerInfo {
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
index 672607fe5bc..ab75a9ecbca 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
@@ -1242,6 +1242,12 @@ public class HRegionServer extends
HBaseServerBase<RSRpcServices>
});
});
});
+ serverLoad.setCacheFreeSize(regionServerWrapper.getBlockCacheFreeSize());
+ if (DataTieringManager.getInstance() != null) {
+ DataTieringManager.getInstance().getRegionColdDataSize()
+ .forEach((regionName, coldDataSize) ->
serverLoad.putRegionColdData(regionName,
+ roundSize(coldDataSize.getSecond(), unitMB)));
+ }
serverLoad.setReportStartTime(reportStartTime);
serverLoad.setReportEndTime(reportEndTime);
if (this.infoServer != null) {
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java
index eeba839ac11..2c38f56cfdd 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionsRecoveryChore.java
@@ -400,6 +400,11 @@ public class TestRegionsRecoveryChore {
return new HashMap<>();
}
+ @Override
+ public Map<String, Integer> getRegionColdDataSize() {
+ return new HashMap<>();
+ }
+
};
return serverMetrics;
}
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java
index 0551adf9601..42a0ad213cf 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java
@@ -20,13 +20,25 @@ package org.apache.hadoop.hbase.master.balancer;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Deque;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.RegionMetrics;
import org.apache.hadoop.hbase.ServerName;
+import org.apache.hadoop.hbase.Size;
+import org.apache.hadoop.hbase.TableName;
+import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.testclassification.MasterTests;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.util.Pair;
@@ -251,12 +263,105 @@ public class TestCacheAwareLoadBalancerCostFunctions
extends StochasticBalancerT
}
}
+ /**
+ * When block-cache persistence, cold regions (below
+ * {@link CacheAwareLoadBalancer#LOW_CACHE_RATIO_FOR_RELOCATION_KEY})
together with RS-reported
+ * block-cache free bytes inflate plausible best placement so weighted cache
cost crosses
+ * {@code minCostNeedBalance}; {@link StochasticLoadBalancer#needsBalance}
returns true even with
+ * evenly spread region-count skew.
+ */
+ @Test
+ public void testNeedsBalanceWhenLowCacheRatioRegionsAndFreeBlockCacheSpace()
{
+ conf.set(HConstants.BUCKET_CACHE_PERSISTENT_PATH_KEY,
"/tmp/prefetch.persistence");
+ CacheAwareLoadBalancer lb = newCacheAwareBalancer(conf);
+ int regionSizeMb = 64;
+ long cacheFreeInBytes = regionSizeMb * 1024L * 1024L;
+ // simulates a cache ratio lower than
+ // CacheAwareLoadBalancer.LOW_CACHE_RATIO_FOR_RELOCATION_DEFAULT
+ float simulatedCacheRatio = 0.1f;
+ Map<ServerName, List<RegionInfo>> clusterServers =
+ mockClusterServersUnsorted(new int[] { 1, 1 }, 1);
+ List<RegionInfo> regions = new ArrayList<>();
+ clusterServers.values().forEach(regions::addAll);
+ List<ServerName> serversList = getServersInInsertionOrder(clusterServers);
+ Map<ServerName, Long> blockCacheFree = new HashMap<>();
+ blockCacheFree.put(serversList.get(0), 0L);
+ blockCacheFree.put(serversList.get(1), cacheFreeInBytes);
+ BalancerClusterState cluster = new BalancerClusterState(clusterServers,
+ buildRegionLoads(regions, simulatedCacheRatio, regionSizeMb), null, null,
+ Collections.emptyMap(), blockCacheFree);
+ lb.initCosts(cluster);
+ assertTrue(lb.needsBalance(
+
TableName.valueOf("testNeedsBalanceWhenLowCacheRatioRegionsAndFreeBlockCacheSpace"),
+ cluster));
+ }
+
+ /**
+ * Checks that needsBalance isn't true when regions report high cache ratios
+ */
+ @Test
+ public void testNeedsBalanceFalseWhenWarmRegionsDespiteFreeBlockCacheSpace()
{
+ conf.set(HConstants.BUCKET_CACHE_PERSISTENT_PATH_KEY,
"/tmp/prefetch.persistence");
+ CacheAwareLoadBalancer lb = newCacheAwareBalancer(conf);
+ int regionSizeMb = 64;
+ long cacheFreeInBytes = regionSizeMb * 1024L * 1024L;
+ Map<ServerName, List<RegionInfo>> clusterServers =
+ mockClusterServersUnsorted(new int[] { 1, 1 }, 1);
+ List<RegionInfo> all = new ArrayList<>();
+ clusterServers.values().forEach(all::addAll);
+ List<ServerName> serversList = getServersInInsertionOrder(clusterServers);
+ Map<ServerName, Long> blockCacheFree = new HashMap<>();
+ blockCacheFree.put(serversList.get(0), cacheFreeInBytes + 1024 * 1024);
+ blockCacheFree.put(serversList.get(1), cacheFreeInBytes + 1024 * 1024);
+ BalancerClusterState cluster =
+ new BalancerClusterState(clusterServers, buildRegionLoads(all, 1.0f,
regionSizeMb), null,
+ null, Collections.emptyMap(), blockCacheFree);
+ lb.initCosts(cluster);
+ assertFalse(lb.needsBalance(
+
TableName.valueOf("testNeedsBalanceFalseWhenWarmRegionsDespiteFreeBlockCacheSpace"),
+ cluster));
+ }
+
+ private static CacheAwareLoadBalancer newCacheAwareBalancer(Configuration
cfg) {
+ CacheAwareLoadBalancer lb = new CacheAwareLoadBalancer();
+ lb.setClusterInfoProvider(new DummyClusterInfoProvider(cfg));
+ lb.loadConf(cfg);
+ return lb;
+ }
+
+ private static Map<String, Deque<BalancerRegionLoad>>
+ buildRegionLoads(Collection<RegionInfo> regions, float cachedRatio, int
regionSizeMb) {
+ RegionMetrics rm = mock(RegionMetrics.class);
+ when(rm.getReadRequestCount()).thenReturn(0L);
+ when(rm.getCpRequestCount()).thenReturn(0L);
+ when(rm.getWriteRequestCount()).thenReturn(0L);
+ when(rm.getMemStoreSize()).thenReturn(Size.ZERO);
+ when(rm.getStoreFileSize()).thenReturn(Size.ZERO);
+ when(rm.getRegionSizeMB()).thenReturn(new Size(regionSizeMb,
Size.Unit.MEGABYTE));
+ when(rm.getCurrentRegionCachedRatio()).thenReturn(cachedRatio);
+
+ BalancerRegionLoad brl = new BalancerRegionLoad(rm);
+ Map<String, Deque<BalancerRegionLoad>> loads = new HashMap<>();
+ for (RegionInfo ri : regions) {
+ ArrayDeque<BalancerRegionLoad> dq = new ArrayDeque<>(1);
+ dq.add(brl);
+ loads.put(ri.getRegionNameAsString(), dq);
+ loads.put(ri.getEncodedName(), dq);
+ }
+ return loads;
+ }
+
+ private static List<ServerName>
+ getServersInInsertionOrder(Map<ServerName, List<RegionInfo>> cluster) {
+ return new ArrayList<>(cluster.keySet());
+ }
+
private class MockClusterForCacheCost extends BalancerClusterState {
private final Map<Pair<Integer, Integer>, Float> regionServerCacheRatio =
new HashMap<>();
public MockClusterForCacheCost(int[][] regionsArray) {
// regions[0] is an array where index = serverIndex and value = number
of regions
- super(mockClusterServersUnsorted(regionsArray[0], 1), null, null, null,
null);
+ super(mockClusterServersUnsorted(regionsArray[0], 1), null, null, null,
null, null);
Map<String, Pair<ServerName, Float>> oldCacheRatio = new HashMap<>();
for (int i = 1; i < regionsArray.length; i++) {
int regionIndex = i - 1;