This is an automated email from the ASF dual-hosted git repository. chenhang pushed a commit to branch branch-4.14 in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
commit 10740fd5b83af488a4f20b6f8fab5722838a69ba Author: Matteo Merli <[email protected]> AuthorDate: Mon May 22 12:21:46 2023 -0700 Avoid compaction to trigger extra flushes DbLedgerStorage (#3959) * Avoid compaction to trigger extra flushes DbLedgerStorage * Expanded comment * Fixed test * Fixed direct io test (cherry picked from commit c924cfeb509c4e65e33a82ae88ad423017edb669) --- .../ldb/SingleDirectoryDbLedgerStorage.java | 25 ++++++++++++++++++++-- .../bookie/storage/ldb/DbLedgerStorageTest.java | 2 ++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java index dd2027e47b..23013e0139 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/storage/ldb/SingleDirectoryDbLedgerStorage.java @@ -755,9 +755,30 @@ public class SingleDirectoryDbLedgerStorage implements CompactableLedgerStorage @Override public void updateEntriesLocations(Iterable<EntryLocation> locations) throws IOException { - // Trigger a flush to have all the entries being compacted in the db storage - flush(); + // Before updating the DB with the new location for the compacted entries, we need to + // make sure that there is no ongoing flush() operation. + // If there were a flush, we could have the following situation, which is highly + // unlikely though possible: + // 1. Flush operation has written the write-cache content into entry-log files + // 2. The DB location index is not yet updated + // 3. Compaction is triggered and starts compacting some of the recent files + // 4. Compaction will write the "new location" into the DB + // 5. The pending flush() will overwrite the DB with the "old location", pointing + // to a file that no longer exists + // + // To avoid this race condition, we need that all the entries that are potentially + // included in the compaction round to have all the indexes already flushed into + // the DB. + // The easiest lightweight way to achieve this is to wait for any pending + // flush operation to be completed before updating the index with the compacted + // entries, by blocking on the flushMutex. + flushMutex.lock(); + flushMutex.unlock(); + // We don't need to keep the flush mutex locked here while updating the DB. + // It's fine to have a concurrent flush operation at this point, because we + // know that none of the entries being flushed was included in the compaction + // round that we are dealing with. entryLocationIndex.updateLocations(locations); } diff --git a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java index f5d3cefe6c..a39d380e6d 100644 --- a/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java +++ b/bookkeeper-server/src/test/java/org/apache/bookkeeper/bookie/storage/ldb/DbLedgerStorageTest.java @@ -222,6 +222,7 @@ public class DbLedgerStorageTest { entry3.writeBytes("entry-3".getBytes()); storage.addEntry(entry3); + // Simulate bookie compaction SingleDirectoryDbLedgerStorage singleDirStorage = ((DbLedgerStorage) storage).getLedgerStorageList().get(0); EntryLogger entryLogger = singleDirStorage.getEntryLogger(); @@ -232,6 +233,7 @@ public class DbLedgerStorageTest { newEntry3.writeBytes("new-entry-3".getBytes()); long location = entryLogger.addEntry(4L, newEntry3, false); + storage.flush(); List<EntryLocation> locations = Lists.newArrayList(new EntryLocation(4, 3, location)); singleDirStorage.updateEntriesLocations(locations);
