This is an automated email from the ASF dual-hosted git repository. mmerli pushed a commit to branch branch-4.17 in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
commit b7fae1a20f8e049563c3396c2bc5c43615c0009a Author: Yong Zhang <[email protected]> AuthorDate: Fri May 30 10:10:16 2025 +0800 Fix the data loss issue that caused by the wrong entry log header (#4607) * Fix the data loss issue that caused by the wrong entry log header --- # Motivation We observed numerous errors in the broker that failed to read the ledger from the bookkeeper; although the ledger metadata still exists, it was unable to read from the bookkeeper. After checking the data, we found the ledger located entry log was deleted by the bookkeeper. We have a data loss issue with the bookkeeper. The entry log file was deleted by the Garbage collector because the entry log file wrote a wrong file header. And there is an example that the shows the header is wrong: ``` Failed to get ledgers map index from: 82.log : Not all ledgers were found in ledgers map index. expected: -1932430239 -- found: 0 -- entryLogId: 82 ``` * Add test --- .../bookkeeper/bookie/DefaultEntryLogger.java | 4 +++ .../bookkeeper/bookie/EntryLoggerAllocator.java | 1 + .../bookkeeper/bookie/GarbageCollectorThread.java | 3 +- .../bookkeeper/bookie/DefaultEntryLogTest.java | 40 ++++++++++++++++++++++ 4 files changed, 47 insertions(+), 1 deletion(-) diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/DefaultEntryLogger.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/DefaultEntryLogger.java index c47c0411c2..34c9f8e7e3 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/DefaultEntryLogger.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/DefaultEntryLogger.java @@ -1151,6 +1151,10 @@ public class DefaultEntryLogger implements EntryLogger { + " -- found: " + meta.getLedgersMap().size() + " -- entryLogId: " + entryLogId); } + if (header.ledgersCount == 0) { + throw new IOException("No ledgers map found in entryLogId " + entryLogId + ", do scan to double confirm"); + } + return meta; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLoggerAllocator.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLoggerAllocator.java index aec2fb1cd0..d65a31d87c 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLoggerAllocator.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/EntryLoggerAllocator.java @@ -83,6 +83,7 @@ class EntryLoggerAllocator { // within the same JVM. All of these Bookie instances access this header // so there can be race conditions when entry logs are rolled over and // this header buffer is cleared before writing it into the new logChannel. + logfileHeader.setZero(0, DefaultEntryLogger.LOGFILE_HEADER_SIZE); logfileHeader.writeBytes("BKLO".getBytes(UTF_8)); logfileHeader.writeInt(DefaultEntryLogger.HEADER_CURRENT_VERSION); logfileHeader.writerIndex(DefaultEntryLogger.LOGFILE_HEADER_SIZE); diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java index be2da2d544..f76294b3d9 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java @@ -801,11 +801,12 @@ public class GarbageCollectorThread implements Runnable { continue; } - LOG.info("Extracting entry log meta from entryLogId: {}", entryLogId); try { // Read through the entry log file and extract the entry log meta EntryLogMetadata entryLogMeta = entryLogger.getEntryLogMetadata(entryLogId, throttler); + LOG.info("Extracted entry log meta from entryLogId: {}, ledgers {}", + entryLogId, entryLogMeta.getLedgersMap().keys()); removeIfLedgerNotExists(entryLogMeta); if (entryLogMeta.isEmpty()) { // This means the entry log is not associated with any active diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/DefaultEntryLogTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/DefaultEntryLogTest.java index 3048ef33a8..e648638156 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/DefaultEntryLogTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/DefaultEntryLogTest.java @@ -414,6 +414,46 @@ public class DefaultEntryLogTest { assertEquals(120, meta.getRemainingSize()); } + @Test + public void testLedgersMapIsEmpty() throws Exception { + // create some entries + entryLogger.addEntry(1L, generateEntry(1, 1).nioBuffer()); + entryLogger.addEntry(3L, generateEntry(3, 1).nioBuffer()); + entryLogger.addEntry(2L, generateEntry(2, 1).nioBuffer()); + entryLogger.addEntry(1L, generateEntry(1, 2).nioBuffer()); + ((EntryLogManagerBase) entryLogger.getEntryLogManager()).createNewLog(DefaultEntryLogger.UNASSIGNED_LEDGERID); + entryLogger.close(); + + // Rewrite the entry log header to be on V0 format + File f = new File(curDir, "0.log"); + RandomAccessFile raf = new RandomAccessFile(f, "rw"); + raf.seek(8); + // Mock that there is a ledgers map offset but the ledgers count is 0 + raf.writeLong(40); + raf.writeInt(0); + raf.close(); + + // now see which ledgers are in the log + entryLogger = new DefaultEntryLogger(conf, dirsMgr); + + try { + entryLogger.extractEntryLogMetadataFromIndex(0L); + fail("Should not be possible to recover from ledgers map index"); + } catch (IOException e) { + assertEquals("No ledgers map found in entryLogId 0, do scan to double confirm", e.getMessage()); + } + + // Public method should succeed by falling back to scanning the file + EntryLogMetadata meta = entryLogger.getEntryLogMetadata(0L); + LOG.info("Extracted Meta From Entry Log {}", meta); + assertEquals(60, meta.getLedgersMap().get(1L)); + assertEquals(30, meta.getLedgersMap().get(2L)); + assertEquals(30, meta.getLedgersMap().get(3L)); + assertFalse(meta.getLedgersMap().containsKey(4L)); + assertEquals(120, meta.getTotalSize()); + assertEquals(120, meta.getRemainingSize()); + } + /** * Explicitly try to recover using the ledgers map index at the end of the entry log. */
