This is an automated email from the ASF dual-hosted git repository. mmerli pushed a commit to branch branch-4.16 in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
commit 52d779ae4a6b28a1c3c7221ef21c04878257c342 Author: Yong Zhang <[email protected]> AuthorDate: Fri May 30 10:10:16 2025 +0800 Fix the data loss issue that caused by the wrong entry log header (#4607) * Fix the data loss issue that caused by the wrong entry log header --- # Motivation We observed numerous errors in the broker that failed to read the ledger from the bookkeeper; although the ledger metadata still exists, it was unable to read from the bookkeeper. After checking the data, we found the ledger located entry log was deleted by the bookkeeper. We have a data loss issue with the bookkeeper. The entry log file was deleted by the Garbage collector because the entry log file wrote a wrong file header. And there is an example that the shows the header is wrong: ``` Failed to get ledgers map index from: 82.log : Not all ledgers were found in ledgers map index. expected: -1932430239 -- found: 0 -- entryLogId: 82 ``` * Add test --- .../bookkeeper/bookie/DefaultEntryLogger.java | 4 +++ .../bookkeeper/bookie/EntryLoggerAllocator.java | 1 + .../bookkeeper/bookie/GarbageCollectorThread.java | 3 +- .../bookkeeper/bookie/DefaultEntryLogTest.java | 40 ++++++++++++++++++++++ 4 files changed, 47 insertions(+), 1 deletion(-) diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/DefaultEntryLogger.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/DefaultEntryLogger.java index ab200b7ca0..cfe7aac793 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/DefaultEntryLogger.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/DefaultEntryLogger.java @@ -1152,6 +1152,10 @@ public class DefaultEntryLogger implements EntryLogger { + " -- found: " + meta.getLedgersMap().size() + " -- entryLogId: " + entryLogId); } + if (header.ledgersCount == 0) { + throw new IOException("No ledgers map found in entryLogId " + entryLogId + ", do scan to double confirm"); + } + return meta; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLoggerAllocator.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLoggerAllocator.java index 68fc1eb3ca..e9ff5030d1 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLoggerAllocator.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLoggerAllocator.java @@ -81,6 +81,7 @@ class EntryLoggerAllocator { // within the same JVM. All of these Bookie instances access this header // so there can be race conditions when entry logs are rolled over and // this header buffer is cleared before writing it into the new logChannel. + logfileHeader.setZero(0, DefaultEntryLogger.LOGFILE_HEADER_SIZE); logfileHeader.writeBytes("BKLO".getBytes(UTF_8)); logfileHeader.writeInt(DefaultEntryLogger.HEADER_CURRENT_VERSION); logfileHeader.writerIndex(DefaultEntryLogger.LOGFILE_HEADER_SIZE); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java index 5abf79b2e6..d70082256f 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java @@ -791,11 +791,12 @@ public class GarbageCollectorThread implements Runnable { continue; } - LOG.info("Extracting entry log meta from entryLogId: {}", entryLogId); try { // Read through the entry log file and extract the entry log meta EntryLogMetadata entryLogMeta = entryLogger.getEntryLogMetadata(entryLogId, throttler); + LOG.info("Extracted entry log meta from entryLogId: {}, ledgers {}", + entryLogId, entryLogMeta.getLedgersMap().keys()); removeIfLedgerNotExists(entryLogMeta); if (entryLogMeta.isEmpty()) { // This means the entry log is not associated with any active diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/DefaultEntryLogTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/DefaultEntryLogTest.java index 38a9ebaf21..cbf1251112 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/DefaultEntryLogTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/DefaultEntryLogTest.java @@ -366,6 +366,46 @@ public class DefaultEntryLogTest { assertEquals(120, meta.getRemainingSize()); } + @Test + public void testLedgersMapIsEmpty() throws Exception { + // create some entries + entryLogger.addEntry(1L, generateEntry(1, 1).nioBuffer()); + entryLogger.addEntry(3L, generateEntry(3, 1).nioBuffer()); + entryLogger.addEntry(2L, generateEntry(2, 1).nioBuffer()); + entryLogger.addEntry(1L, generateEntry(1, 2).nioBuffer()); + ((EntryLogManagerBase) entryLogger.getEntryLogManager()).createNewLog(DefaultEntryLogger.UNASSIGNED_LEDGERID); + entryLogger.close(); + + // Rewrite the entry log header to be on V0 format + File f = new File(curDir, "0.log"); + RandomAccessFile raf = new RandomAccessFile(f, "rw"); + raf.seek(8); + // Mock that there is a ledgers map offset but the ledgers count is 0 + raf.writeLong(40); + raf.writeInt(0); + raf.close(); + + // now see which ledgers are in the log + entryLogger = new DefaultEntryLogger(conf, dirsMgr); + + try { + entryLogger.extractEntryLogMetadataFromIndex(0L); + fail("Should not be possible to recover from ledgers map index"); + } catch (IOException e) { + assertEquals("No ledgers map found in entryLogId 0, do scan to double confirm", e.getMessage()); + } + + // Public method should succeed by falling back to scanning the file + EntryLogMetadata meta = entryLogger.getEntryLogMetadata(0L); + LOG.info("Extracted Meta From Entry Log {}", meta); + assertEquals(60, meta.getLedgersMap().get(1L)); + assertEquals(30, meta.getLedgersMap().get(2L)); + assertEquals(30, meta.getLedgersMap().get(3L)); + assertFalse(meta.getLedgersMap().containsKey(4L)); + assertEquals(120, meta.getTotalSize()); + assertEquals(120, meta.getRemainingSize()); + } + /** * Explicitly try to recover using the ledgers map index at the end of the entry log. */
