This is an automated email from the ASF dual-hosted git repository.
wchevreuil pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2 by this push:
new fe0cbdc8d5a HBASE-29276 Compute and display hit ratio by configurable,
granular time periods (#6950) (#6975)
fe0cbdc8d5a is described below
commit fe0cbdc8d5a8c7865a51f32762f8311fedce1268
Author: Wellington Ramos Chevreuil <[email protected]>
AuthorDate: Fri May 9 14:57:00 2025 +0100
HBASE-29276 Compute and display hit ratio by configurable, granular time
periods (#6950) (#6975)
Signed-off-by: Tak Lon (Stephen) Wu <[email protected]>
---
.../hbase/tmpl/regionserver/BlockCacheTmpl.jamon | 170 ++++++++++++++++++++-
.../hadoop/hbase/io/hfile/BlockCacheFactory.java | 22 +++
.../apache/hadoop/hbase/io/hfile/CacheStats.java | 69 ++++++++-
.../hadoop/hbase/io/hfile/bucket/BucketCache.java | 12 +-
.../hbase/io/hfile/bucket/BucketCacheStats.java | 5 +-
.../hadoop/hbase/io/hfile/TestCacheStats.java | 78 ++++++++++
6 files changed, 350 insertions(+), 6 deletions(-)
diff --git
a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/BlockCacheTmpl.jamon
b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/BlockCacheTmpl.jamon
index 67b3df5df16..82609aad719 100644
---
a/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/BlockCacheTmpl.jamon
+++
b/hbase-server/src/main/jamon/org/apache/hadoop/hbase/tmpl/regionserver/BlockCacheTmpl.jamon
@@ -195,6 +195,113 @@
org.apache.hadoop.util.StringUtils.TraditionalBinaryPrefix;
<%args>
BlockCache bc;
</%args>
+<%java>
+ int hitPeriods = 0;
+ for(int i=0; i<bc.getStats().getNumPeriodsInWindow(); i++) {
+ if(bc.getStats().getWindowPeriods()[i] != null) {
+ hitPeriods++;
+ }
+ }
+</%java>
+<%if hitPeriods > 0 %>
+ <script>
+ // Wait for document to be fully loaded
+ document.addEventListener('DOMContentLoaded', function() {
+
+ // Count actual items in the DOM
+ const itemRows = document.querySelectorAll('tr.item-row');
+
+ // Pagination state
+ let currentPage = 1;
+ const pageSize = 10;
+ const totalItems = itemRows.length;
+ const totalPages = Math.ceil(totalItems / pageSize);
+
+ // Create page buttons
+ const pageButtonsContainer = document.getElementById('page-buttons');
+ if (pageButtonsContainer) {
+ for (let i = 1; i <= totalPages; i++) {
+ const button = document.createElement('button');
+ button.className = 'page-number';
+ button.textContent = i;
+ button.onclick = function() { goToPage(i); };
+ pageButtonsContainer.appendChild(button);
+ }
+ }
+ function displayItems() {
+ // Hide all item rows
+ itemRows.forEach(row => {
+ row.style.display = 'none';
+ });
+
+ // Calculate indexes
+ const startIndex = (currentPage - 1) * pageSize;
+ const endIndex = Math.min(startIndex + pageSize, totalItems);
+
+ // Show rows for current page
+ let displayedCount = 0;
+ for (let i = startIndex; i < endIndex; i++) {
+ const row = document.getElementById('row-' + i);
+ if (row) {
+ row.style.display = 'table-row';
+ displayedCount++;
+ }
+ }
+
+ // Update pagination UI
+ document.querySelectorAll('.page-number').forEach(btn => {
+ if (parseInt(btn.textContent) === currentPage) {
+ btn.classList.add('active');
+ } else {
+ btn.classList.remove('active');
+ }
+ });
+
+ const prevBtn = document.getElementById('prev-page');
+ const nextBtn = document.getElementById('next-page');
+
+ if (prevBtn) prevBtn.disabled = currentPage === 1;
+ if (nextBtn) nextBtn.disabled = currentPage === totalPages;
+
+ // Update page info
+ const pageInfo = document.getElementById('page-info');
+ if (pageInfo) {
+ pageInfo.textContent = `Showing ${startIndex + 1} to ${endIndex} of
${totalItems} items`;
+ }
+ }
+
+ function goToPage(page) {
+ if (page >= 1 && page <= totalPages) {
+ currentPage = page;
+ displayItems();
+ }
+ }
+
+ window.nextPage = function() {
+ goToPage(currentPage + 1);
+ };
+
+ window.prevPage = function() {
+ goToPage(currentPage - 1);
+ };
+
+ window.goToPage = goToPage;
+
+ // Check URL for initial page
+ const urlParams = new URLSearchParams(window.location.search);
+ const pageParam = urlParams.get('page');
+ if (pageParam) {
+ const parsedPage = parseInt(pageParam);
+ if (!isNaN(parsedPage) && parsedPage >= 1) {
+ currentPage = parsedPage;
+ }
+ }
+
+ // Initial display
+ displayItems();
+ });
+ </script>
+</%if>
<tr>
<td>Hits</td>
<td><% String.format("%,d", bc.getStats().getHitCount()) %></td>
@@ -216,11 +323,71 @@
org.apache.hadoop.util.StringUtils.TraditionalBinaryPrefix;
<td>Block requests that were cache misses but only requests set to use
block cache</td>
</tr>
<tr>
- <td>Hit Ratio</td>
+ <td>All Time Hit Ratio</td>
<td><% String.format("%,.2f", bc.getStats().getHitRatio() * 100) %><%
"%" %></td>
<td>Hit Count divided by total requests count</td>
</tr>
+ <%for int i=0; i<hitPeriods; i++ %>
+ <tr id="row-<% i %>" class="item-row" style="display: none;">
+ <td>Hit Ratio for period starting at <%
bc.getStats().getWindowPeriods()[i] %></td>
+ <%if bc.getStats().getRequestCounts()[i] > 0 %>
+ <td><% String.format("%,.2f",
((double)bc.getStats().getHitCounts()[i] /
(double)bc.getStats().getRequestCounts()[i]) * 100.0) %><% "%" %></td>
+ <%else>
+ <td>No requests</td>
+ </%if>
+ <td>Hit Count divided by total requests count over the <% i %>th
period of <% bc.getStats().getPeriodTimeInMinutes() %> minutes</td>
+ </tr>
+ </%for>
+ <%if hitPeriods > 0 %>
+ <tr class="pagination-row">
+ <td colspan="3">
+ <div class="pagination-container">
+ <button id="prev-page" onclick="prevPage()">Previous</button>
+ <span id="page-buttons" class="page-numbers">
+ </span>
+ <button id="next-page" onclick="nextPage()">Next</button>
+ <span id="page-info" class="page-info"></span>
+ </div>
+ </td>
+ </tr>
+ </%if>
+ <%if bc.getStats().getPeriodTimeInMinutes() > 0 %>
+ <tr>
+ <td>Last <%
bc.getStats().getNumPeriodsInWindow()*bc.getStats().getPeriodTimeInMinutes() %>
minutes Hit Ratio</td>
+ <td><% String.format("%,.2f",
bc.getStats().getHitRatioPastNPeriods() * 100.0) %><% "%" %></td>
+ <td>Hit Count divided by total requests count for the last <%
bc.getStats().getNumPeriodsInWindow()*bc.getStats().getPeriodTimeInMinutes() %>
minutes</td>
+ </tr>
+ <style>
+ .pagination-container {
+ display: flex;
+ align-items: center;
+ gap: 8px;
+ padding: 10px 0;
+ }
+
+ .page-numbers {
+ display: flex;
+ gap: 4px;
+ }
+
+ .page-number {
+ min-width: 30px;
+ text-align: center;
+ }
+
+ .page-number.active {
+ font-weight: bold;
+ background-color: #eee;
+ }
+
+ .page-info {
+ margin-left: 15px;
+ font-size: 0.9em;
+ color: #666;
+ }
+ </style>
+ </%if>
</%def>
<%def bc_stats>
@@ -383,6 +550,7 @@ are combined counts. Request count is sum of hits and
misses.</p>
</tr>
</%if>
</table>
+
<%doc>Call through to block cache Detail rendering template</%doc>
<p>
View block cache <a href="?format=json&bcn=<% name %>">as JSON</a> | Block
cache <a href="?format=json&bcn=<% name %>&bcv=file">as JSON by file</a>
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheFactory.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheFactory.java
index 744a6bbf012..080f6d1e139 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheFactory.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheFactory.java
@@ -82,6 +82,28 @@ public final class BlockCacheFactory {
@Deprecated
static final String DEPRECATED_BLOCKCACHE_BLOCKSIZE_KEY =
"hbase.offheapcache.minblocksize";
+ /**
+ * The window period length in minutes for CacheStats rolling metrics.
+ */
+ public static final String BLOCKCACHE_STATS_PERIOD_MINUTES_KEY =
+ "hbase.blockcache.stats.period.minutes";
+
+ /**
+ * Default window period length in minutes.
+ */
+ public static final int DEFAULT_BLOCKCACHE_STATS_PERIOD_MINUTES = 5;
+
+ /**
+ * The total number of periods in the window.
+ */
+ public static final String BLOCKCACHE_STATS_PERIODS =
"hbase.blockcache.stats.periods";
+
+ /**
+ * Default number of periods in the window. We define 12 periods of 5
minutes to give an hourly
+ * split of 5 minutes periods.
+ */
+ public static final int DEFAULT_BLOCKCACHE_STATS_PERIODS = 12;
+
private BlockCacheFactory() {
}
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheStats.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheStats.java
index c5a247dfce1..fbf3e108701 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheStats.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CacheStats.java
@@ -18,10 +18,17 @@
package org.apache.hadoop.hbase.io.hfile;
import java.util.Arrays;
+import java.util.Date;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.ScheduledThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.LongAdder;
import org.apache.hadoop.hbase.metrics.impl.FastLongHistogram;
+import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.yetus.audience.InterfaceAudience;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
/**
* Class that implements cache metrics.
@@ -29,6 +36,8 @@ import org.apache.yetus.audience.InterfaceAudience;
@InterfaceAudience.Private
public class CacheStats {
+ private static final Logger LOG = LoggerFactory.getLogger(CacheStats.class);
+
/**
* Sliding window statistics. The number of metric periods to include in
sliding window hit ratio
* calculations.
@@ -94,6 +103,9 @@ public class CacheStats {
private final LongAdder deleteFamilyBloomHitCount = new LongAdder();
private final LongAdder trailerHitCount = new LongAdder();
+ // Executor for periodic cache stats rolling
+ private ScheduledExecutorService metricsRollerScheduler;
+
/** The number of metrics periods to include in window */
private final int numPeriodsInWindow;
/** Hit counts for each period in window */
@@ -104,6 +116,8 @@ public class CacheStats {
private final long[] requestCounts;
/** Caching access counts for each period in window */
private final long[] requestCachingCounts;
+ /** The initial date for each period in window */
+ private final Date[] windowPeriods;
/** Last hit count read */
private long lastHitCount = 0;
/** Last hit caching count read */
@@ -120,17 +134,40 @@ public class CacheStats {
private FastLongHistogram ageAtEviction;
private long startTime = System.nanoTime();
+ private int periodTimeInMinutes;
+
public CacheStats(final String name) {
- this(name, DEFAULT_WINDOW_PERIODS);
+ this(name, DEFAULT_WINDOW_PERIODS, 0);
}
public CacheStats(final String name, int numPeriodsInWindow) {
+ this(name, numPeriodsInWindow, 0);
+ }
+
+ public CacheStats(final String name, int numPeriodsInWindow, int
periodTimeInMinutes) {
+ this(name, numPeriodsInWindow, periodTimeInMinutes, TimeUnit.MINUTES);
+ }
+
+ CacheStats(final String name, int numPeriodsInWindow, int periodTime,
TimeUnit unit) {
this.numPeriodsInWindow = numPeriodsInWindow;
this.hitCounts = new long[numPeriodsInWindow];
this.hitCachingCounts = new long[numPeriodsInWindow];
this.requestCounts = new long[numPeriodsInWindow];
this.requestCachingCounts = new long[numPeriodsInWindow];
+ this.windowPeriods = new Date[numPeriodsInWindow];
this.ageAtEviction = new FastLongHistogram();
+ this.periodTimeInMinutes = periodTime;
+ if (numPeriodsInWindow > 1 && periodTimeInMinutes > 0) {
+ this.metricsRollerScheduler = new ScheduledThreadPoolExecutor(1);
+ this.metricsRollerScheduler.scheduleAtFixedRate(() -> {
+ LOG.trace("Triggering metrics roll");
+ rollMetricsPeriod();
+ for (int i = 0; i < numPeriodsInWindow; i++) {
+ LOG.trace("period: {}, hit count: {}, request count: {}", i,
hitCounts[i],
+ requestCounts[i]);
+ }
+ }, 1, periodTimeInMinutes, unit);
+ }
}
@Override
@@ -250,6 +287,10 @@ public class CacheStats {
}
}
+ public ScheduledExecutorService getMetricsRollerScheduler() {
+ return metricsRollerScheduler;
+ }
+
public long failInsert() {
return failedInserts.incrementAndGet();
}
@@ -434,6 +475,8 @@ public class CacheStats {
}
public void rollMetricsPeriod() {
+ windowPeriods[windowIndex] =
+ new Date((EnvironmentEdgeManager.currentTime() - (periodTimeInMinutes *
60 * 1000L)));
hitCounts[windowIndex] = getHitCount() - lastHitCount;
lastHitCount = getHitCount();
hitCachingCounts[windowIndex] = getHitCachingCount() - lastHitCachingCount;
@@ -445,6 +488,30 @@ public class CacheStats {
windowIndex = (windowIndex + 1) % numPeriodsInWindow;
}
+ public long[] getHitCounts() {
+ return hitCounts;
+ }
+
+ public long[] getRequestCounts() {
+ return requestCounts;
+ }
+
+ public Date[] getWindowPeriods() {
+ return windowPeriods;
+ }
+
+ public int getWindowIndex() {
+ return windowIndex;
+ }
+
+ public int getNumPeriodsInWindow() {
+ return numPeriodsInWindow;
+ }
+
+ public int getPeriodTimeInMinutes() {
+ return periodTimeInMinutes;
+ }
+
public long getSumHitCountsPastNPeriods() {
return sum(hitCounts);
}
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
index 8d1aaf4e26e..faf8313b7a2 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
@@ -17,6 +17,10 @@
*/
package org.apache.hadoop.hbase.io.hfile.bucket;
+import static
org.apache.hadoop.hbase.io.hfile.BlockCacheFactory.BLOCKCACHE_STATS_PERIODS;
+import static
org.apache.hadoop.hbase.io.hfile.BlockCacheFactory.BLOCKCACHE_STATS_PERIOD_MINUTES_KEY;
+import static
org.apache.hadoop.hbase.io.hfile.BlockCacheFactory.DEFAULT_BLOCKCACHE_STATS_PERIODS;
+import static
org.apache.hadoop.hbase.io.hfile.BlockCacheFactory.DEFAULT_BLOCKCACHE_STATS_PERIOD_MINUTES;
import static
org.apache.hadoop.hbase.io.hfile.CacheConfig.BUCKETCACHE_PERSIST_INTERVAL_KEY;
import java.io.File;
@@ -219,7 +223,7 @@ public class BucketCache implements BlockCache, HeapSize {
private static final int DEFAULT_CACHE_WAIT_TIME = 50;
- private final BucketCacheStats cacheStats = new BucketCacheStats();
+ private final BucketCacheStats cacheStats;
private final String persistencePath;
static AtomicBoolean isCacheInconsistent = new AtomicBoolean(false);
private final long cacheCapacity;
@@ -334,6 +338,9 @@ public class BucketCache implements BlockCache, HeapSize {
// these sets the dynamic configs
this.onConfigurationChange(conf);
+ this.cacheStats =
+ new BucketCacheStats(conf.getInt(BLOCKCACHE_STATS_PERIODS,
DEFAULT_BLOCKCACHE_STATS_PERIODS),
+ conf.getInt(BLOCKCACHE_STATS_PERIOD_MINUTES_KEY,
DEFAULT_BLOCKCACHE_STATS_PERIOD_MINUTES));
LOG.info("Instantiating BucketCache with acceptableFactor: " +
acceptableFactor
+ ", minFactor: " + minFactor + ", extraFreeFactor: " + extraFreeFactor
+ ", singleFactor: "
@@ -1759,6 +1766,9 @@ public class BucketCache implements BlockCache, HeapSize {
this.fullyCachedFiles.clear();
this.regionCachedSize.clear();
}
+ if (cacheStats.getMetricsRollerScheduler() != null) {
+ cacheStats.getMetricsRollerScheduler().shutdownNow();
+ }
}
private void join() throws InterruptedException {
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCacheStats.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCacheStats.java
index 73ca011004a..7d0f4d78ea0 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCacheStats.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCacheStats.java
@@ -36,9 +36,8 @@ public class BucketCacheStats extends CacheStats {
/* Tracing failed Bucket Cache allocations. */
private LongAdder allocationFailCount = new LongAdder();
- BucketCacheStats() {
- super("BucketCache");
-
+ BucketCacheStats(int numPeriodsInWindow, int periodTimeInMinutes) {
+ super("BucketCache", numPeriodsInWindow, periodTimeInMinutes);
allocationFailCount.reset();
}
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheStats.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheStats.java
new file mode 100644
index 00000000000..75f1f38848f
--- /dev/null
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/TestCacheStats.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io.hfile;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.concurrent.TimeUnit;
+import org.apache.hadoop.hbase.HBaseClassTestRule;
+import org.apache.hadoop.hbase.testclassification.SmallTests;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+@Category({ SmallTests.class })
+public class TestCacheStats {
+
+ @ClassRule
+ public static final HBaseClassTestRule CLASS_RULE =
+ HBaseClassTestRule.forClass(TestCacheStats.class);
+
+ @Test
+ public void testPeriodicMetrics() throws Exception {
+ CacheStats cacheStats = new CacheStats("test", 5, 1, TimeUnit.SECONDS);
+ cacheStats.hit(false, false, BlockType.DATA);
+ cacheStats.hit(false, false, BlockType.DATA);
+ cacheStats.hit(false, false, BlockType.DATA);
+ cacheStats.miss(false, false, BlockType.DATA);
+ // first period should have a 75% hit, 25% miss
+ Thread.sleep(1001);
+ cacheStats.hit(false, false, BlockType.DATA);
+ cacheStats.hit(false, false, BlockType.DATA);
+ cacheStats.miss(false, false, BlockType.DATA);
+ cacheStats.miss(false, false, BlockType.DATA);
+ Thread.sleep(1001);
+ cacheStats.hit(false, false, BlockType.DATA);
+ cacheStats.miss(false, false, BlockType.DATA);
+ cacheStats.miss(false, false, BlockType.DATA);
+ cacheStats.miss(false, false, BlockType.DATA);
+ Thread.sleep(1001);
+ cacheStats.hit(false, false, BlockType.DATA);
+ cacheStats.hit(false, false, BlockType.DATA);
+ cacheStats.hit(false, false, BlockType.DATA);
+ cacheStats.hit(false, false, BlockType.DATA);
+ Thread.sleep(1001);
+ cacheStats.miss(false, false, BlockType.DATA);
+ cacheStats.miss(false, false, BlockType.DATA);
+ cacheStats.miss(false, false, BlockType.DATA);
+ cacheStats.miss(false, false, BlockType.DATA);
+ Thread.sleep(1001);
+ cacheStats.getMetricsRollerScheduler().shutdownNow();
+ long[] hitCounts = cacheStats.getHitCounts();
+ long[] requestCounts = cacheStats.getRequestCounts();
+ assertEquals(5, hitCounts.length);
+ assertEquals(5, requestCounts.length);
+ assertEquals(3, hitCounts[0]);
+ assertEquals(2, hitCounts[1]);
+ assertEquals(1, hitCounts[2]);
+ assertEquals(4, hitCounts[3]);
+ assertEquals(0, hitCounts[4]);
+ assertEquals(10, cacheStats.getHitCount());
+ assertEquals(0.5, cacheStats.getHitRatioPastNPeriods(), 0.01);
+ }
+}