Copilot commented on code in PR #8197:
URL: https://github.com/apache/hbase/pull/8197#discussion_r3196737412
##########
hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetricsBuilder.java:
##########
@@ -145,6 +149,8 @@ public static ServerMetricsBuilder newBuilder(ServerName
sn) {
private long lastReportTimestamp = 0;
private final List<ServerTask> tasks = new ArrayList<>();
private Map<String, Integer> regionCachedInfo = new HashMap<>();
+ private long cacheFreeSize;
+ private Map<String, Integer> regionColdDataInfo;
Review Comment:
`regionColdDataInfo` is never initialized to a non-null default, but
`getRegionColdDataSize()` always wraps it with
`Collections.unmodifiableMap(...)`, which will throw an NPE if the setter is
not invoked. Initialize `regionColdDataInfo` to `Collections.emptyMap()`
(and/or defensively treat null as empty in the setter and constructor) to keep
ServerMetrics safe for callers.
##########
hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetricsBuilder.java:
##########
@@ -88,7 +88,10 @@ public static ServerMetrics toServerMetrics(ServerName
serverName, int versionNu
.setRegionCachedInfo(serverLoadPB.getRegionCachedInfoMap())
.setReportTimestamp(serverLoadPB.getReportEndTime())
.setLastReportTimestamp(serverLoadPB.getReportStartTime()).setVersionNumber(versionNumber)
- .setVersion(version).build();
+ .setVersion(version)
+ .setCacheFreeSize(serverLoadPB.hasCacheFreeSize() ?
serverLoadPB.getCacheFreeSize() : 0L)
+ .setRegionColdDataInfo(serverLoadPB.getRegionColdData());
Review Comment:
`serverLoadPB.getRegionColdData()` is unlikely to be a valid accessor for a
protobuf `map` field (the existing pattern above uses
`getRegionCachedInfoMap()`). This looks like a compile-time issue; it should
use the generated `getRegionColdDataMap()` (or equivalent) accessor and pass
that map into `setRegionColdDataInfo(...)`.
##########
hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java:
##########
@@ -1242,6 +1242,12 @@ private ClusterStatusProtos.ServerLoad
buildServerLoad(long reportStartTime, lon
});
});
});
+ serverLoad.setCacheFreeSize(regionServerWrapper.getBlockCacheFreeSize());
+ if (DataTieringManager.getInstance() != null) {
+ DataTieringManager.getInstance().getRegionColdDataSize()
Review Comment:
`DataTieringManager.getInstance()` is called twice; if the singleton can
change between calls (e.g., shutdown/disable path), the second call could be
null even though the first was not. Store the result in a local variable and
use it for both the null-check and access to avoid a potential race/NPE.
##########
hbase-balancer/src/main/java/org/apache/hadoop/hbase/master/balancer/CacheAwareLoadBalancer.java:
##########
@@ -483,25 +536,90 @@ static class CacheAwareCostFunction extends CostFunction {
!isPersistentCache ? 0.0f : conf.getFloat(CACHE_COST_KEY,
DEFAULT_CACHE_COST));
bestCacheRatio = 0.0;
cacheRatio = 0.0;
+ lowCacheRatioThreshold =
+ conf.getFloat(LOW_CACHE_RATIO_FOR_RELOCATION_KEY,
LOW_CACHE_RATIO_FOR_RELOCATION_DEFAULT);
+ potentialCacheRatioAfterMove = Math.min(1.0f, conf
+ .getFloat(POTENTIAL_CACHE_RATIO_AFTER_MOVE_KEY,
POTENTIAL_CACHE_RATIO_AFTER_MOVE_DEFAULT));
+ minFreeCacheSpaceFactor =
+ conf.getFloat(MIN_FREE_CACHE_SPACE_FACTOR_KEY,
MIN_FREE_CACHE_SPACE_FACTOR_DEFAULT);
}
@Override
void prepare(BalancerClusterState cluster) {
super.prepare(cluster);
- cacheRatio = 0.0;
- bestCacheRatio = 0.0;
+ recomputeCacheRatio(cluster);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("CacheAwareCostFunction: Cost: {}", 1 - cacheRatio);
+ }
+ }
+ private void recomputeCacheRatio(BalancerClusterState cluster) {
+ double[] currentWeighted = computeCurrentWeightedContributions(cluster);
+ double currentSum = 0.0;
+ double bestCacheSum = 0.0;
for (int region = 0; region < cluster.numRegions; region++) {
- cacheRatio += cluster.getOrComputeWeightedRegionCacheRatio(region,
- cluster.regionIndexToServerIndex[region]);
- bestCacheRatio += cluster.getOrComputeWeightedRegionCacheRatio(region,
- getServerWithBestCacheRatioForRegion(region));
+ currentSum += currentWeighted[region];
+ // here we only get the server index where this region cache ratio is
the highest
+ int serverIndexBestCache =
cluster.getOrComputeServerWithBestRegionCachedRatio()[region];
+ double currentHighestCache =
+ cluster.getOrComputeWeightedRegionCacheRatio(region,
serverIndexBestCache);
+ // Get a hypothetical best cache ratio for this region if any server
has enough free cache
+ // to host it.
+ double potentialHighestCache =
+ potentialBestWeightedFromFreeCache(cluster, region,
currentHighestCache);
+ double actualHighest = Math.max(currentHighestCache,
potentialHighestCache);
+ bestCacheSum += actualHighest;
}
+ bestCacheRatio = bestCacheSum;
+ if (bestCacheSum <= 0.0) {
+ cacheRatio = cluster.numRegions == 0 ? 1.0 : 0.0;
+ } else {
+ cacheRatio = Math.min(1.0, currentSum / bestCacheSum);
+ }
+ }
- cacheRatio = bestCacheRatio == 0 ? 1.0 : cacheRatio / bestCacheRatio;
- if (LOG.isDebugEnabled()) {
- LOG.debug("CacheAwareCostFunction: Cost: {}", 1 - cacheRatio);
+ private double[] computeCurrentWeightedContributions(BalancerClusterState
cluster) {
+ int totalRegions = cluster.numRegions;
+ double[] contrib = new double[totalRegions];
+ for (int r = 0; r < totalRegions; r++) {
+ int s = cluster.regionIndexToServerIndex[r];
+ int sizeMb = cluster.getTotalRegionHFileSizeMB(r);
+ if (sizeMb <= 0) {
+ contrib[r] = 0.0;
+ continue;
+ }
+ contrib[r] = cluster.getOrComputeWeightedRegionCacheRatio(r, s);
+ }
+ return contrib;
+ }
+
+ /*
+ * If this region is cold in metrics and at least one RS (including its
current host) reports
+ * enough free block cache to hold it, return an optimistic weighted cache
score ({@link
+ * #potentialCacheRatioAfterMove} * region MB) so placement is not
considered optimal solely
+ * from low ratios when capacity exists somewhere in the cluster.
+ */
+ private double potentialBestWeightedFromFreeCache(BalancerClusterState
cluster, int region,
+ double currentHighestCache) {
+ if (cluster.serverBlockCacheFreeSize == null) {
+ return 0.0;
+ }
Review Comment:
`serverBlockCacheFreeSize` is always initialized in `BalancerClusterState`
(it’s set to a new array even when the input map is null), so this null-check
is dead code. If you need to distinguish “not reported” from “reported as
zero”, consider preserving `serverBlockCacheFreeSize` as `null` when no data is
provided (or add an explicit boolean flag) so the heuristic can reliably
short-circuit and the code remains clear.
##########
hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetrics.java:
##########
@@ -112,4 +112,19 @@ default String getVersion() {
* rounded to MB
*/
Map<String, Integer> getRegionCachedInfo();
+
+ /**
+ * The available cache space on this region server (bytes), if reported in
the server load.
+ */
+ default long getCacheFreeSize() {
+ return 0L;
+ }
+
+ /**
+ * Returns the region cold data information for the regions hosted on this
server. Here, cold data
+ * refers only to region data that is classified as cold by the
DataTieringManager according to
+ * the configured priority logic. These data should be kept out of block
cache.
+ * @return map of region encoded name and its total cold data size, rounded
to MB
+ */
+ Map<String, Integer> getRegionColdDataSize();
Review Comment:
Adding a new non-default method to a public interface is a binary/source
compatibility break for any external `ServerMetrics` implementations. Consider
making `getRegionColdDataSize()` a `default` method returning
`Collections.emptyMap()` (similar to `getCacheFreeSize()`), so older
implementations continue to compile and run.
##########
hbase-protocol-shaded/src/main/protobuf/server/ClusterStatus.proto:
##########
@@ -332,6 +332,17 @@ message ServerLoad {
* The metrics for region cached on this region server
*/
map<string, uint32> regionCachedInfo = 16;
+
+ /**
+* Unallocated block cache capacity on this RegionServer, in bytes.
+* Used by the master for cache-aware load balancing (optional).
+*/
Review Comment:
The new comment block formatting is inconsistent with surrounding proto docs
(missing leading space before `*` and indentation). Reformat to match the
existing style (`/**` then lines starting with ` * ...`) for readability and
consistency.
##########
hbase-client/src/main/java/org/apache/hadoop/hbase/ServerMetricsBuilder.java:
##########
@@ -402,6 +423,16 @@ public Map<String, Integer> getRegionCachedInfo() {
return Collections.unmodifiableMap(regionCachedInfo);
}
+ @Override
+ public long getCacheFreeSize() {
+ return cacheFreeSize;
+ }
+
+ @Override
+ public Map<String, Integer> getRegionColdDataSize() {
+ return Collections.unmodifiableMap(regionColdDataInfo);
Review Comment:
`regionColdDataInfo` is never initialized to a non-null default, but
`getRegionColdDataSize()` always wraps it with
`Collections.unmodifiableMap(...)`, which will throw an NPE if the setter is
not invoked. Initialize `regionColdDataInfo` to `Collections.emptyMap()`
(and/or defensively treat null as empty in the setter and constructor) to keep
ServerMetrics safe for callers.
##########
hbase-server/src/test/java/org/apache/hadoop/hbase/master/balancer/TestCacheAwareLoadBalancerCostFunctions.java:
##########
@@ -251,12 +263,105 @@ public void testCacheCost() {
}
}
+ /**
+ * When block-cache persistence, cold regions (below
+ * {@link CacheAwareLoadBalancer#LOW_CACHE_RATIO_FOR_RELOCATION_KEY})
together with RS-reported
+ * block-cache free bytes inflate plausible best placement so weighted cache
cost crosses
+ * {@code minCostNeedBalance}; {@link StochasticLoadBalancer#needsBalance}
returns true even with
+ * evenly spread region-count skew.
+ */
+ @Test
+ public void testNeedsBalanceWhenLowCacheRatioRegionsAndFreeBlockCacheSpace()
{
+ conf.set(HConstants.BUCKET_CACHE_PERSISTENT_PATH_KEY,
"/tmp/prefetch.persistence");
+ CacheAwareLoadBalancer lb = newCacheAwareBalancer(conf);
+ int regionSizeMb = 64;
+ long cacheFreeInBytes = regionSizeMb * 1024L * 1024L;
+ // simulates a cache ratio lower than
+ // CacheAwareLoadBalancer.LOW_CACHE_RATIO_FOR_RELOCATION_DEFAULT
+ float simulatedCacheRatio = 0.1f;
+ Map<ServerName, List<RegionInfo>> clusterServers =
+ mockClusterServersUnsorted(new int[] { 1, 1 }, 1);
+ List<RegionInfo> regions = new ArrayList<>();
+ clusterServers.values().forEach(regions::addAll);
+ List<ServerName> serversList = getServersInInsertionOrder(clusterServers);
+ Map<ServerName, Long> blockCacheFree = new HashMap<>();
+ blockCacheFree.put(serversList.get(0), 0L);
+ blockCacheFree.put(serversList.get(1), cacheFreeInBytes);
+ BalancerClusterState cluster = new BalancerClusterState(clusterServers,
+ buildRegionLoads(regions, simulatedCacheRatio, regionSizeMb), null, null,
+ Collections.emptyMap(), blockCacheFree);
+ lb.initCosts(cluster);
+ assertTrue(lb.needsBalance(
+
TableName.valueOf("testNeedsBalanceWhenLowCacheRatioRegionsAndFreeBlockCacheSpace"),
+ cluster));
+ }
+
+ /**
+ * Checks that needsBalance isn't true when regions report high cache ratios
+ */
+ @Test
+ public void testNeedsBalanceFalseWhenWarmRegionsDespiteFreeBlockCacheSpace()
{
+ conf.set(HConstants.BUCKET_CACHE_PERSISTENT_PATH_KEY,
"/tmp/prefetch.persistence");
+ CacheAwareLoadBalancer lb = newCacheAwareBalancer(conf);
+ int regionSizeMb = 64;
+ long cacheFreeInBytes = regionSizeMb * 1024L * 1024L;
+ Map<ServerName, List<RegionInfo>> clusterServers =
+ mockClusterServersUnsorted(new int[] { 1, 1 }, 1);
+ List<RegionInfo> all = new ArrayList<>();
+ clusterServers.values().forEach(all::addAll);
+ List<ServerName> serversList = getServersInInsertionOrder(clusterServers);
+ Map<ServerName, Long> blockCacheFree = new HashMap<>();
+ blockCacheFree.put(serversList.get(0), cacheFreeInBytes + 1024 * 1024);
+ blockCacheFree.put(serversList.get(1), cacheFreeInBytes + 1024 * 1024);
+ BalancerClusterState cluster =
+ new BalancerClusterState(clusterServers, buildRegionLoads(all, 1.0f,
regionSizeMb), null,
+ null, Collections.emptyMap(), blockCacheFree);
+ lb.initCosts(cluster);
+ assertFalse(lb.needsBalance(
+
TableName.valueOf("testNeedsBalanceFalseWhenWarmRegionsDespiteFreeBlockCacheSpace"),
+ cluster));
+ }
+
+ private static CacheAwareLoadBalancer newCacheAwareBalancer(Configuration
cfg) {
+ CacheAwareLoadBalancer lb = new CacheAwareLoadBalancer();
+ lb.setClusterInfoProvider(new DummyClusterInfoProvider(cfg));
+ lb.loadConf(cfg);
+ return lb;
+ }
+
+ private static Map<String, Deque<BalancerRegionLoad>>
+ buildRegionLoads(Collection<RegionInfo> regions, float cachedRatio, int
regionSizeMb) {
+ RegionMetrics rm = mock(RegionMetrics.class);
+ when(rm.getReadRequestCount()).thenReturn(0L);
+ when(rm.getCpRequestCount()).thenReturn(0L);
+ when(rm.getWriteRequestCount()).thenReturn(0L);
+ when(rm.getMemStoreSize()).thenReturn(Size.ZERO);
+ when(rm.getStoreFileSize()).thenReturn(Size.ZERO);
+ when(rm.getRegionSizeMB()).thenReturn(new Size(regionSizeMb,
Size.Unit.MEGABYTE));
+ when(rm.getCurrentRegionCachedRatio()).thenReturn(cachedRatio);
+
+ BalancerRegionLoad brl = new BalancerRegionLoad(rm);
+ Map<String, Deque<BalancerRegionLoad>> loads = new HashMap<>();
+ for (RegionInfo ri : regions) {
+ ArrayDeque<BalancerRegionLoad> dq = new ArrayDeque<>(1);
+ dq.add(brl);
+ loads.put(ri.getRegionNameAsString(), dq);
+ loads.put(ri.getEncodedName(), dq);
+ }
+ return loads;
+ }
+
+ private static List<ServerName>
+ getServersInInsertionOrder(Map<ServerName, List<RegionInfo>> cluster) {
+ return new ArrayList<>(cluster.keySet());
+ }
Review Comment:
This helper assumes deterministic “insertion order”, but `new
ArrayList<>(cluster.keySet())` is only stable if the input map preserves
insertion order (e.g., LinkedHashMap). Given the use of
`mockClusterServersUnsorted(...)`, this can make the new tests flaky by
assigning `blockCacheFree` to an arbitrary server. Prefer sorting the
`ServerName`s (or returning a deterministic ordering from the mock) before
indexing into `serversList`.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]