This is an automated email from the ASF dual-hosted git repository.

wchevreuil pushed a commit to branch branch-2.6
in repository https://gitbox.apache.org/repos/asf/hbase.git


The following commit(s) were added to refs/heads/branch-2.6 by this push:
     new fb30ecbf2ac HBASE-28839: Handle all types of exceptions during 
retrieval of bucket-cache from persistence. (#7579)
fb30ecbf2ac is described below

commit fb30ecbf2ac5466943c4913cc2cfabaee2527316
Author: Wellington Ramos Chevreuil <[email protected]>
AuthorDate: Tue Jan 6 10:18:25 2026 +0000

    HBASE-28839: Handle all types of exceptions during retrieval of 
bucket-cache from persistence. (#7579)
    
    (cherry picked from commit 0411e38cd161a42d19bfa1977037799018337759 by 
[email protected])
    Signed-off-by: Duo Zhang <[email protected]>
---
 .../hadoop/hbase/io/hfile/bucket/BucketCache.java  | 107 ++++++++-------------
 .../hbase/io/hfile/bucket/BucketProtoUtils.java    |  57 ++++++-----
 .../io/hfile/bucket/TestVerifyBucketCacheFile.java |   9 +-
 3 files changed, 79 insertions(+), 94 deletions(-)

diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
index c0745ca8f89..f8bab749b7d 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
@@ -394,17 +394,18 @@ public class BucketCache implements BlockCache, HeapSize {
       try {
         retrieveFromFile(bucketSizes);
         LOG.info("Persistent bucket cache recovery from {} is complete.", 
persistencePath);
-      } catch (IOException ioex) {
-        LOG.error("Can't restore from file[{}] because of ", persistencePath, 
ioex);
+      } catch (Throwable ex) {
+        LOG.warn("Can't restore from file[{}]. The bucket cache will be reset 
and rebuilt."
+          + " Exception seen: ", persistencePath, ex);
         backingMap.clear();
         fullyCachedFiles.clear();
         backingMapValidated.set(true);
+        regionCachedSize.clear();
         try {
           bucketAllocator = new BucketAllocator(capacity, bucketSizes);
-        } catch (BucketAllocatorException ex) {
-          LOG.error("Exception during Bucket Allocation", ex);
+        } catch (BucketAllocatorException allocatorException) {
+          LOG.error("Exception during Bucket Allocation", allocatorException);
         }
-        regionCachedSize.clear();
       } finally {
         this.cacheState = CacheState.ENABLED;
         startWriterThreads();
@@ -990,7 +991,8 @@ public class BucketCache implements BlockCache, HeapSize {
         : (StringUtils.formatPercent(cacheStats.getHitCachingRatio(), 2) + ", 
"))
       + "evictions=" + cacheStats.getEvictionCount() + ", " + "evicted="
       + cacheStats.getEvictedCount() + ", " + "evictedPerRun=" + 
cacheStats.evictedPerEviction()
-      + ", " + "allocationFailCount=" + cacheStats.getAllocationFailCount());
+      + ", " + "allocationFailCount=" + cacheStats.getAllocationFailCount() + 
", blocksCount="
+      + backingMap.size());
     cacheStats.reset();
 
     bucketAllocator.logDebugStatistics();
@@ -1599,7 +1601,7 @@ public class BucketCache implements BlockCache, HeapSize {
       } else if (Arrays.equals(pbuf, BucketProtoUtils.PB_MAGIC_V2)) {
         // The new persistence format of chunked persistence.
         LOG.info("Reading new chunked format of persistence.");
-        retrieveChunkedBackingMap(in, bucketSizes);
+        retrieveChunkedBackingMap(in);
       } else {
         // In 3.0 we have enough flexibility to dump the old cache data.
         // TODO: In 2.x line, this might need to be filled in to support 
reading the old format
@@ -1693,31 +1695,12 @@ public class BucketCache implements BlockCache, 
HeapSize {
     }
   }
 
-  private void parsePB(BucketCacheProtos.BucketCacheEntry firstChunk,
-    List<BucketCacheProtos.BackingMap> chunks) throws IOException {
-    fullyCachedFiles.clear();
-    Pair<ConcurrentHashMap<BlockCacheKey, BucketEntry>, 
NavigableSet<BlockCacheKey>> pair =
-      BucketProtoUtils.fromPB(firstChunk.getDeserializersMap(), 
firstChunk.getBackingMap(),
-        this::createRecycler);
-    backingMap.putAll(pair.getFirst());
-    blocksByHFile.addAll(pair.getSecond());
-    
fullyCachedFiles.putAll(BucketProtoUtils.fromPB(firstChunk.getCachedFilesMap()));
-
-    LOG.debug("Number of blocks after first chunk: {}, blocksByHFile: {}", 
backingMap.size(),
-      fullyCachedFiles.size());
-    int i = 1;
-    for (BucketCacheProtos.BackingMap chunk : chunks) {
-      Pair<ConcurrentHashMap<BlockCacheKey, BucketEntry>, 
NavigableSet<BlockCacheKey>> pair2 =
-        BucketProtoUtils.fromPB(firstChunk.getDeserializersMap(), chunk, 
this::createRecycler);
-      backingMap.putAll(pair2.getFirst());
-      blocksByHFile.addAll(pair2.getSecond());
-      LOG.debug("Number of blocks after {} reading chunk: {}, blocksByHFile: 
{}", ++i,
-        backingMap.size(), fullyCachedFiles.size());
-    }
-    verifyFileIntegrity(firstChunk);
-    verifyCapacityAndClasses(firstChunk.getCacheCapacity(), 
firstChunk.getIoClass(),
-      firstChunk.getMapClass());
-    updateRegionSizeMapWhileRetrievingFromFile();
+  private void updateCacheIndex(BucketCacheProtos.BackingMap chunk,
+    java.util.Map<java.lang.Integer, java.lang.String> deserializer) throws 
IOException {
+    Pair<ConcurrentHashMap<BlockCacheKey, BucketEntry>, 
NavigableSet<BlockCacheKey>> pair2 =
+      BucketProtoUtils.fromPB(deserializer, chunk, this::createRecycler);
+    backingMap.putAll(pair2.getFirst());
+    blocksByHFile.addAll(pair2.getSecond());
   }
 
   private void parsePB(BucketCacheProtos.BucketCacheEntry proto) throws 
IOException {
@@ -1738,52 +1721,42 @@ public class BucketCache implements BlockCache, 
HeapSize {
   }
 
   private void persistChunkedBackingMap(FileOutputStream fos) throws 
IOException {
-    long numChunks = backingMap.size() / persistenceChunkSize;
-    if (backingMap.size() % persistenceChunkSize != 0) {
-      numChunks += 1;
-    }
-
     LOG.debug(
       "persistToFile: before persisting backing map size: {}, "
-        + "fullycachedFiles size: {}, chunkSize: {}, numberofChunks: {}",
-      backingMap.size(), fullyCachedFiles.size(), persistenceChunkSize, 
numChunks);
+        + "fullycachedFiles size: {}, chunkSize: {}",
+      backingMap.size(), fullyCachedFiles.size(), persistenceChunkSize);
 
-    BucketProtoUtils.serializeAsPB(this, fos, persistenceChunkSize, numChunks);
+    BucketProtoUtils.serializeAsPB(this, fos, persistenceChunkSize);
 
     LOG.debug(
-      "persistToFile: after persisting backing map size: {}, "
-        + "fullycachedFiles size: {}, numChunksPersisteed: {}",
-      backingMap.size(), fullyCachedFiles.size(), numChunks);
+      "persistToFile: after persisting backing map size: {}, " + 
"fullycachedFiles size: {}",
+      backingMap.size(), fullyCachedFiles.size());
   }
 
-  private void retrieveChunkedBackingMap(FileInputStream in, int[] 
bucketSizes) throws IOException {
-    byte[] bytes = new byte[Long.BYTES];
-    int readSize = in.read(bytes);
-    if (readSize != Long.BYTES) {
-      throw new IOException("Invalid size of chunk-size read from persistence: 
" + readSize);
-    }
-    long batchSize = Bytes.toLong(bytes, 0);
-
-    readSize = in.read(bytes);
-    if (readSize != Long.BYTES) {
-      throw new IOException("Invalid size for number of chunks read from 
persistence: " + readSize);
-    }
-    long numChunks = Bytes.toLong(bytes, 0);
-
-    LOG.info("Number of chunks: {}, chunk size: {}", numChunks, batchSize);
-
-    ArrayList<BucketCacheProtos.BackingMap> bucketCacheMaps = new 
ArrayList<>();
+  private void retrieveChunkedBackingMap(FileInputStream in) throws 
IOException {
     // Read the first chunk that has all the details.
-    BucketCacheProtos.BucketCacheEntry firstChunk =
+    BucketCacheProtos.BucketCacheEntry cacheEntry =
       BucketCacheProtos.BucketCacheEntry.parseDelimitedFrom(in);
 
-    // Subsequent chunks have the backingMap entries.
-    for (int i = 1; i < numChunks; i++) {
-      LOG.info("Reading chunk no: {}", i + 1);
-      bucketCacheMaps.add(BucketCacheProtos.BackingMap.parseDelimitedFrom(in));
-      LOG.info("Retrieved chunk: {}", i + 1);
+    fullyCachedFiles.clear();
+    
fullyCachedFiles.putAll(BucketProtoUtils.fromPB(cacheEntry.getCachedFilesMap()));
+
+    backingMap.clear();
+    blocksByHFile.clear();
+
+    // Read the backing map entries in batches.
+    int numChunks = 0;
+    while (in.available() > 0) {
+      updateCacheIndex(BucketCacheProtos.BackingMap.parseDelimitedFrom(in),
+        cacheEntry.getDeserializersMap());
+      numChunks++;
     }
-    parsePB(firstChunk, bucketCacheMaps);
+
+    LOG.info("Retrieved {} of chunks with blockCount = {}.", numChunks, 
backingMap.size());
+    verifyFileIntegrity(cacheEntry);
+    verifyCapacityAndClasses(cacheEntry.getCacheCapacity(), 
cacheEntry.getIoClass(),
+      cacheEntry.getMapClass());
+    updateRegionSizeMapWhileRetrievingFromFile();
   }
 
   /**
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketProtoUtils.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketProtoUtils.java
index 4618200325c..eb9c2cb5de8 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketProtoUtils.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketProtoUtils.java
@@ -33,7 +33,6 @@ import org.apache.hadoop.hbase.io.hfile.BlockPriority;
 import org.apache.hadoop.hbase.io.hfile.BlockType;
 import org.apache.hadoop.hbase.io.hfile.CacheableDeserializerIdManager;
 import org.apache.hadoop.hbase.io.hfile.HFileBlock;
-import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.yetus.audience.InterfaceAudience;
 
@@ -51,47 +50,53 @@ final class BucketProtoUtils {
   }
 
   static BucketCacheProtos.BucketCacheEntry toPB(BucketCache cache,
-    BucketCacheProtos.BackingMap backingMap) {
+    BucketCacheProtos.BackingMap.Builder backingMapBuilder) {
     return 
BucketCacheProtos.BucketCacheEntry.newBuilder().setCacheCapacity(cache.getMaxSize())
       .setIoClass(cache.ioEngine.getClass().getName())
       .setMapClass(cache.backingMap.getClass().getName())
       .putAllDeserializers(CacheableDeserializerIdManager.save())
-      
.putAllCachedFiles(toCachedPB(cache.fullyCachedFiles)).setBackingMap(backingMap)
+      .putAllCachedFiles(toCachedPB(cache.fullyCachedFiles))
+      .setBackingMap(backingMapBuilder.build())
       .setChecksum(ByteString
         .copyFrom(((PersistentIOEngine) 
cache.ioEngine).calculateChecksum(cache.getAlgorithm())))
       .build();
   }
 
-  public static void serializeAsPB(BucketCache cache, FileOutputStream fos, 
long chunkSize,
-    long numChunks) throws IOException {
-    int blockCount = 0;
-    int chunkCount = 0;
-    int backingMapSize = cache.backingMap.size();
+  public static void serializeAsPB(BucketCache cache, FileOutputStream fos, 
long chunkSize)
+    throws IOException {
+    // Write the new version of magic number.
+    fos.write(PB_MAGIC_V2);
+
     BucketCacheProtos.BackingMap.Builder builder = 
BucketCacheProtos.BackingMap.newBuilder();
+    BucketCacheProtos.BackingMapEntry.Builder entryBuilder =
+      BucketCacheProtos.BackingMapEntry.newBuilder();
 
-    fos.write(PB_MAGIC_V2);
-    fos.write(Bytes.toBytes(chunkSize));
-    fos.write(Bytes.toBytes(numChunks));
+    // Persist the metadata first.
+    toPB(cache, builder).writeDelimitedTo(fos);
 
+    int blockCount = 0;
+    // Persist backing map entries in chunks of size 'chunkSize'.
     for (Map.Entry<BlockCacheKey, BucketEntry> entry : 
cache.backingMap.entrySet()) {
       blockCount++;
-      builder.addEntry(
-        
BucketCacheProtos.BackingMapEntry.newBuilder().setKey(BucketProtoUtils.toPB(entry.getKey()))
-          .setValue(BucketProtoUtils.toPB(entry.getValue())).build());
-      if (blockCount % chunkSize == 0 || (blockCount == backingMapSize)) {
-        chunkCount++;
-        if (chunkCount == 1) {
-          // Persist all details along with the first chunk into 
BucketCacheEntry
-          BucketProtoUtils.toPB(cache, builder.build()).writeDelimitedTo(fos);
-        } else {
-          // Directly persist subsequent backing-map chunks.
-          builder.build().writeDelimitedTo(fos);
-        }
-        if (blockCount < backingMapSize) {
-          builder = BucketCacheProtos.BackingMap.newBuilder();
-        }
+      addEntryToBuilder(entry, entryBuilder, builder);
+      if (blockCount % chunkSize == 0) {
+        builder.build().writeDelimitedTo(fos);
+        builder.clear();
       }
     }
+    // Persist the last chunk.
+    if (builder.getEntryList().size() > 0) {
+      builder.build().writeDelimitedTo(fos);
+    }
+  }
+
+  private static void addEntryToBuilder(Map.Entry<BlockCacheKey, BucketEntry> 
entry,
+    BucketCacheProtos.BackingMapEntry.Builder entryBuilder,
+    BucketCacheProtos.BackingMap.Builder builder) {
+    entryBuilder.clear();
+    entryBuilder.setKey(BucketProtoUtils.toPB(entry.getKey()));
+    entryBuilder.setValue(BucketProtoUtils.toPB(entry.getValue()));
+    builder.addEntry(entryBuilder.build());
   }
 
   private static BucketCacheProtos.BlockCacheKey toPB(BlockCacheKey key) {
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestVerifyBucketCacheFile.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestVerifyBucketCacheFile.java
index 58e347188c3..1e4ee38cc8e 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestVerifyBucketCacheFile.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/io/hfile/bucket/TestVerifyBucketCacheFile.java
@@ -415,10 +415,17 @@ public class TestVerifyBucketCacheFile {
   }
 
   @Test
-  public void testMultipleChunks() throws Exception {
+  public void testCompletelyFilledChunks() throws Exception {
+    // Test where the all the chunks are complete with chunkSize entries
     testChunkedBackingMapRecovery(5, 10);
   }
 
+  @Test
+  public void testPartiallyFilledChunks() throws Exception {
+    // Test where the last chunk is not completely filled.
+    testChunkedBackingMapRecovery(5, 13);
+  }
+
   private void testChunkedBackingMapRecovery(int chunkSize, int numBlocks) 
throws Exception {
     HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
     Path testDir = TEST_UTIL.getDataTestDir();

Reply via email to