This is an automated email from the ASF dual-hosted git repository.
bbeaudreault pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/master by this push:
new ebd189157e1 HBASE-27225 Add BucketAllocator bucket size statistic
logging (#4637)
ebd189157e1 is described below
commit ebd189157e18daab8446cd679829c5864b1a5c29
Author: Bryan Beaudreault <[email protected]>
AuthorDate: Mon Jul 25 20:54:40 2022 -0400
HBASE-27225 Add BucketAllocator bucket size statistic logging (#4637)
Signed-off-by: Wellington Chevreuil <[email protected]>
---
.../hbase/io/hfile/bucket/BucketAllocator.java | 174 ++++++++++++++++++---
.../hadoop/hbase/io/hfile/bucket/BucketCache.java | 11 +-
.../hbase/io/hfile/bucket/TestBucketCache.java | 21 ++-
3 files changed, 171 insertions(+), 35 deletions(-)
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketAllocator.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketAllocator.java
index 5d89f0cbdd3..54032e79c6f 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketAllocator.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketAllocator.java
@@ -168,12 +168,15 @@ public final class BucketAllocator {
// Free bucket means it has space to allocate a block;
// Completely free bucket means it has no block.
private LinkedMap bucketList, freeBuckets, completelyFreeBuckets;
+ // only modified under synchronization, but also read outside it.
+ private volatile long fragmentationBytes;
private int sizeIndex;
BucketSizeInfo(int sizeIndex) {
bucketList = new LinkedMap();
freeBuckets = new LinkedMap();
completelyFreeBuckets = new LinkedMap();
+ fragmentationBytes = 0;
this.sizeIndex = sizeIndex;
}
@@ -193,7 +196,7 @@ public final class BucketAllocator {
* Find a bucket to allocate a block
* @return the offset in the IOEngine
*/
- public long allocateBlock() {
+ public long allocateBlock(int blockSize) {
Bucket b = null;
if (freeBuckets.size() > 0) {
// Use up an existing one first...
@@ -206,6 +209,9 @@ public final class BucketAllocator {
if (b == null) return -1;
long result = b.allocate();
blockAllocated(b);
+ if (blockSize < b.getItemAllocationSize()) {
+ fragmentationBytes += b.getItemAllocationSize() - blockSize;
+ }
return result;
}
@@ -236,23 +242,38 @@ public final class BucketAllocator {
completelyFreeBuckets.remove(b);
}
- public void freeBlock(Bucket b, long offset) {
+ public void freeBlock(Bucket b, long offset, int length) {
assert bucketList.containsKey(b);
// else we shouldn't have anything to free...
assert (!completelyFreeBuckets.containsKey(b));
b.free(offset);
+ if (length < b.getItemAllocationSize()) {
+ fragmentationBytes -= b.getItemAllocationSize() - length;
+ }
if (!freeBuckets.containsKey(b)) freeBuckets.put(b, b);
if (b.isCompletelyFree()) completelyFreeBuckets.put(b, b);
}
public synchronized IndexStatistics statistics() {
long free = 0, used = 0;
+ int full = 0;
for (Object obj : bucketList.keySet()) {
Bucket b = (Bucket) obj;
free += b.freeCount();
used += b.usedCount();
+ if (!b.hasFreeSpace()) {
+ full++;
+ }
}
- return new IndexStatistics(free, used, bucketSizes[sizeIndex]);
+ int bucketObjectSize = bucketSizes[sizeIndex];
+ // this is most likely to always be 1 or 0
+ int fillingBuckets = Math.max(0, freeBuckets.size() -
completelyFreeBuckets.size());
+ // if bucket capacity is not perfectly divisible by a bucket's object
size, there will
+ // be some left over per bucket. for some object sizes this may be large
enough to be
+ // non-trivial and worth tuning by choosing a more divisible object size.
+ long wastedBytes = (bucketCapacity % bucketObjectSize) * (full +
fillingBuckets);
+ return new IndexStatistics(free, used, bucketObjectSize, full,
completelyFreeBuckets.size(),
+ wastedBytes, fragmentationBytes);
}
@Override
@@ -434,7 +455,7 @@ public final class BucketAllocator {
+ "; adjust BucketCache sizes " +
BlockCacheFactory.BUCKET_CACHE_BUCKETS_KEY
+ " to accomodate if size seems reasonable and you want it cached.");
}
- long offset = bsi.allocateBlock();
+ long offset = bsi.allocateBlock(blockSize);
// Ask caller to free up space and try again!
if (offset < 0) throw new CacheFullException(blockSize, bsi.sizeIndex());
@@ -455,11 +476,11 @@ public final class BucketAllocator {
* @param offset block's offset
* @return size freed
*/
- public synchronized int freeBlock(long offset) {
+ public synchronized int freeBlock(long offset, int length) {
int bucketNo = (int) (offset / bucketCapacity);
assert bucketNo >= 0 && bucketNo < buckets.length;
Bucket targetBucket = buckets[bucketNo];
- bucketSizeInfos[targetBucket.sizeIndex()].freeBlock(targetBucket, offset);
+ bucketSizeInfos[targetBucket.sizeIndex()].freeBlock(targetBucket, offset,
length);
usedSize -= targetBucket.getItemAllocationSize();
return targetBucket.getItemAllocationSize();
}
@@ -478,50 +499,141 @@ public final class BucketAllocator {
return targetBucket.getItemAllocationSize();
}
+ /**
+ * Statistics to give a glimpse into the distribution of BucketCache
objects. Each configured
+ * bucket size, denoted by {@link BucketSizeInfo}, gets an IndexStatistic. A
BucketSizeInfo
+ * allocates blocks of a configured size from claimed buckets. If you have a
bucket size of 512k,
+ * the corresponding BucketSizeInfo will always allocate chunks of 512k at a
time regardless of
+ * actual request.
+ * <p>
+ * Over time, as a BucketSizeInfo gets more allocations, it will claim more
buckets from the total
+ * pool of completelyFreeBuckets. As blocks are freed from a BucketSizeInfo,
those buckets may be
+ * returned to the completelyFreeBuckets pool.
+ * <p>
+ * The IndexStatistics help visualize how these buckets are currently
distributed, through counts
+ * of items, bytes, and fullBuckets. Additionally, mismatches between block
sizes and bucket sizes
+ * can manifest in inefficient cache usage. These typically manifest in
three ways:
+ * <p>
+ * 1. Allocation failures, because block size is larger than max bucket
size. These show up in
+ * logs and can be alleviated by adding larger bucket sizes if
appropriate.<br>
+ * 2. Memory fragmentation, because blocks are typically smaller than the
bucket size. See
+ * {@link #fragmentationBytes()} for details.<br>
+ * 3. Memory waste, because a bucket's itemSize is not a perfect divisor of
bucketCapacity. see
+ * {@link #wastedBytes()} for details.<br>
+ */
static class IndexStatistics {
- private long freeCount, usedCount, itemSize, totalCount;
+ private long freeCount, usedCount, itemSize, totalCount, wastedBytes,
fragmentationBytes;
+ private int fullBuckets, completelyFreeBuckets;
+ /**
+ * How many more items can be allocated from the currently claimed blocks
of this bucket size
+ */
public long freeCount() {
return freeCount;
}
+ /**
+ * How many items are currently taking up space in this bucket size's
buckets
+ */
public long usedCount() {
return usedCount;
}
+ /**
+ * Combined {@link #freeCount()} + {@link #usedCount()}
+ */
public long totalCount() {
return totalCount;
}
+ /**
+ * How many more bytes can be allocated from the currently claimed blocks
of this bucket size
+ */
public long freeBytes() {
return freeCount * itemSize;
}
+ /**
+ * How many bytes are currently taking up space in this bucket size's
buckets Note: If your
+ * items are less than the bucket size of this bucket, the actual used
bytes by items will be
+ * lower than this value. But since a bucket size can only allocate items
of a single size, this
+ * value is the true number of used bytes. The difference will be counted
in
+ * {@link #fragmentationBytes()}.
+ */
public long usedBytes() {
return usedCount * itemSize;
}
+ /**
+ * Combined {@link #totalCount()} * {@link #itemSize()}
+ */
public long totalBytes() {
return totalCount * itemSize;
}
+ /**
+ * This bucket size can only allocate items of this size, even if the
requested allocation size
+ * is smaller. The rest goes towards {@link #fragmentationBytes()}.
+ */
public long itemSize() {
return itemSize;
}
- public IndexStatistics(long free, long used, long itemSize) {
- setTo(free, used, itemSize);
+ /**
+ * How many buckets have been completely filled by blocks for this bucket
size. These buckets
+ * can't accept any more blocks unless some existing are freed.
+ */
+ public int fullBuckets() {
+ return fullBuckets;
+ }
+
+ /**
+ * How many buckets are currently claimed by this bucket size but as yet
totally unused. These
+ * buckets are available for reallocation to other bucket sizes if those
fill up.
+ */
+ public int completelyFreeBuckets() {
+ return completelyFreeBuckets;
+ }
+
+ /**
+ * If {@link #bucketCapacity} is not perfectly divisible by this {@link
#itemSize()}, the
+ * remainder will be unusable by in buckets of this size. A high value
here may be optimized by
+ * trying to choose bucket sizes which can better divide {@link
#bucketCapacity}.
+ */
+ public long wastedBytes() {
+ return wastedBytes;
+ }
+
+ /**
+ * Every time you allocate blocks in these buckets where the block size is
less than the bucket
+ * size, fragmentation increases by that difference. You can reduce
fragmentation by lowering
+ * the bucket size so that it is closer to the typical block size. This
may have the consequence
+ * of bumping some blocks to the next larger bucket size, so
experimentation may be needed.
+ */
+ public long fragmentationBytes() {
+ return fragmentationBytes;
+ }
+
+ public IndexStatistics(long free, long used, long itemSize, int
fullBuckets,
+ int completelyFreeBuckets, long wastedBytes, long fragmentationBytes) {
+ setTo(free, used, itemSize, fullBuckets, completelyFreeBuckets,
wastedBytes,
+ fragmentationBytes);
}
public IndexStatistics() {
- setTo(-1, -1, 0);
+ setTo(-1, -1, 0, 0, 0, 0, 0);
}
- public void setTo(long free, long used, long itemSize) {
+ public void setTo(long free, long used, long itemSize, int fullBuckets,
+ int completelyFreeBuckets, long wastedBytes, long fragmentationBytes) {
this.itemSize = itemSize;
this.freeCount = free;
this.usedCount = used;
this.totalCount = free + used;
+ this.fullBuckets = fullBuckets;
+ this.completelyFreeBuckets = completelyFreeBuckets;
+ this.wastedBytes = wastedBytes;
+ this.fragmentationBytes = fragmentationBytes;
}
}
@@ -529,26 +641,43 @@ public final class BucketAllocator {
return this.buckets;
}
- void logStatistics() {
+ void logDebugStatistics() {
+ if (!LOG.isDebugEnabled()) {
+ return;
+ }
+
IndexStatistics total = new IndexStatistics();
IndexStatistics[] stats = getIndexStatistics(total);
- LOG.info("Bucket allocator statistics follow:\n");
- LOG.info(" Free bytes=" + total.freeBytes() + "+; used bytes=" +
total.usedBytes()
- + "; total bytes=" + total.totalBytes());
+ LOG.debug("Bucket allocator statistics follow:");
+ LOG.debug(
+ " Free bytes={}; used bytes={}; total bytes={}; wasted bytes={};
fragmentation bytes={}; "
+ + "completelyFreeBuckets={}",
+ total.freeBytes(), total.usedBytes(), total.totalBytes(),
total.wastedBytes(),
+ total.fragmentationBytes(), total.completelyFreeBuckets());
for (IndexStatistics s : stats) {
- LOG.info(" Object size " + s.itemSize() + " used=" + s.usedCount() + ";
free="
- + s.freeCount() + "; total=" + s.totalCount());
+ LOG.debug(
+ " Object size {}; used={}; free={}; total={}; wasted bytes={};
fragmentation bytes={}, "
+ + "full buckets={}",
+ s.itemSize(), s.usedCount(), s.freeCount(), s.totalCount(),
s.wastedBytes(),
+ s.fragmentationBytes(), s.fullBuckets());
}
}
IndexStatistics[] getIndexStatistics(IndexStatistics grandTotal) {
IndexStatistics[] stats = getIndexStatistics();
- long totalfree = 0, totalused = 0;
+ long totalfree = 0, totalused = 0, totalWasted = 0, totalFragmented = 0;
+ int fullBuckets = 0, completelyFreeBuckets = 0;
+
for (IndexStatistics stat : stats) {
totalfree += stat.freeBytes();
totalused += stat.usedBytes();
+ totalWasted += stat.wastedBytes();
+ totalFragmented += stat.fragmentationBytes();
+ fullBuckets += stat.fullBuckets();
+ completelyFreeBuckets += stat.completelyFreeBuckets();
}
- grandTotal.setTo(totalfree, totalused, 1);
+ grandTotal.setTo(totalfree, totalused, 1, fullBuckets,
completelyFreeBuckets, totalWasted,
+ totalFragmented);
return stats;
}
@@ -559,13 +688,6 @@ public final class BucketAllocator {
return stats;
}
- public long freeBlock(long freeList[]) {
- long sz = 0;
- for (int i = 0; i < freeList.length; ++i)
- sz += freeBlock(freeList[i]);
- return sz;
- }
-
public int getBucketIndex(long offset) {
return (int) (offset / bucketCapacity);
}
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
index c9a940768ab..b30efd53fa7 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
@@ -580,7 +580,7 @@ public class BucketCache implements BlockCache, HeapSize {
* {@link BucketEntry#refCnt} becoming 0.
*/
void freeBucketEntry(BucketEntry bucketEntry) {
- bucketAllocator.freeBlock(bucketEntry.offset());
+ bucketAllocator.freeBlock(bucketEntry.offset(), bucketEntry.getLength());
realCacheSize.add(-1 * bucketEntry.getLength());
}
@@ -738,6 +738,8 @@ public class BucketCache implements BlockCache, HeapSize {
+ cacheStats.getEvictedCount() + ", " + "evictedPerRun=" +
cacheStats.evictedPerEviction()
+ ", " + "allocationFailCount=" + cacheStats.getAllocationFailCount());
cacheStats.reset();
+
+ bucketAllocator.logDebugStatistics();
}
public long getRealCacheSize() {
@@ -1119,8 +1121,9 @@ public class BucketCache implements BlockCache, HeapSize {
checkIOErrorIsTolerated();
// Since we failed sync, free the blocks in bucket allocator
for (int i = 0; i < entries.size(); ++i) {
- if (bucketEntries[i] != null) {
- bucketAllocator.freeBlock(bucketEntries[i].offset());
+ BucketEntry bucketEntry = bucketEntries[i];
+ if (bucketEntry != null) {
+ bucketAllocator.freeBlock(bucketEntry.offset(),
bucketEntry.getLength());
bucketEntries[i] = null;
}
}
@@ -1538,7 +1541,7 @@ public class BucketCache implements BlockCache, HeapSize {
succ = true;
} finally {
if (!succ) {
- alloc.freeBlock(offset);
+ alloc.freeBlock(offset, len);
}
}
realCacheSize.add(len);
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
index 2379bb51605..66962bcd7bb 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestBucketCache.java
@@ -23,6 +23,7 @@ import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.when;
import java.io.File;
import java.io.IOException;
@@ -56,6 +57,7 @@ import
org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.RAMQueueEntry;
import org.apache.hadoop.hbase.nio.ByteBuff;
import org.apache.hadoop.hbase.testclassification.IOTests;
import org.apache.hadoop.hbase.testclassification.LargeTests;
+import org.apache.hadoop.hbase.util.Pair;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
@@ -169,7 +171,7 @@ public class TestBucketCache {
final List<Integer> BLOCKSIZES = Arrays.asList(4 * 1024, 8 * 1024, 64 *
1024, 96 * 1024);
boolean full = false;
- ArrayList<Long> allocations = new ArrayList<>();
+ ArrayList<Pair<Long, Integer>> allocations = new ArrayList<>();
// Fill the allocated extents by choosing a random blocksize. Continues
selecting blocks until
// the cache is completely filled.
List<Integer> tmp = new ArrayList<>(BLOCKSIZES);
@@ -177,7 +179,7 @@ public class TestBucketCache {
Integer blockSize = null;
try {
blockSize = randFrom(tmp);
- allocations.add(mAllocator.allocateBlock(blockSize));
+ allocations.add(new Pair<>(mAllocator.allocateBlock(blockSize),
blockSize));
} catch (CacheFullException cfe) {
tmp.remove(blockSize);
if (tmp.isEmpty()) full = true;
@@ -188,10 +190,19 @@ public class TestBucketCache {
BucketSizeInfo bucketSizeInfo =
mAllocator.roundUpToBucketSizeInfo(blockSize);
IndexStatistics indexStatistics = bucketSizeInfo.statistics();
assertEquals("unexpected freeCount for " + bucketSizeInfo, 0,
indexStatistics.freeCount());
+
+ // we know the block sizes above are multiples of 1024, but default
bucket sizes give an
+ // additional 1024 on top of that so this counts towards fragmentation
in our test
+ // real life may have worse fragmentation because blocks may not be
perfectly sized to block
+ // size, given encoding/compression and large rows
+ assertEquals(1024 * indexStatistics.totalCount(),
indexStatistics.fragmentationBytes());
}
- for (long offset : allocations) {
- assertEquals(mAllocator.sizeOfAllocation(offset),
mAllocator.freeBlock(offset));
+ mAllocator.logDebugStatistics();
+
+ for (Pair<Long, Integer> allocation : allocations) {
+ assertEquals(mAllocator.sizeOfAllocation(allocation.getFirst()),
+ mAllocator.freeBlock(allocation.getFirst(), allocation.getSecond()));
}
assertEquals(0, mAllocator.getUsedSize());
}
@@ -674,7 +685,7 @@ public class TestBucketCache {
// initialize an mocked ioengine.
IOEngine ioEngine = Mockito.mock(IOEngine.class);
- Mockito.when(ioEngine.usesSharedMemory()).thenReturn(false);
+ when(ioEngine.usesSharedMemory()).thenReturn(false);
// Mockito.doNothing().when(ioEngine).write(Mockito.any(ByteBuffer.class),
Mockito.anyLong());
Mockito.doThrow(RuntimeException.class).when(ioEngine).write(Mockito.any(ByteBuffer.class),
Mockito.anyLong());