This is an automated email from the ASF dual-hosted git repository. yong pushed a commit to branch branch-4.14 in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
commit e86635c1f26c82e082cfe07f9ff92e52bfeeb541 Author: shustsud <[email protected]> AuthorDate: Mon Oct 25 10:40:14 2021 +0900 Add error handling to readLedgerMetadata in over-replicated ledger GC (#2844) ### Motivation For each ledger whose metadata is not in ZK, following stack trace will be output: ``` 15:30:17.925 [GarbageCollectorThread-11-1] ERROR o.a.b.b.ScanAndCompareGarbageCollector - Exception when iterating through the ledgers to check for over-replication java.util.concurrent.ExecutionException: org.apache.bookkeeper.client.BKException$BKNoSuchLedgerExistsException: No such ledger exists at java.util.concurrent.CompletableFuture.reportGet(CompletableFuture.java:357) at java.util.concurrent.CompletableFuture.get(CompletableFuture.java:1908) at org.apache.bookkeeper.bookie.ScanAndCompareGarbageCollector.removeOverReplicatedledgers(ScanAndCompareGarbageCollector.java:199) at org.apache.bookkeeper.bookie.ScanAndCompareGarbageCollector.gc(ScanAndCompareGarbageCollector.java:120) at org.apache.bookkeeper.bookie.GarbageCollectorThread.doGcLedgers(GarbageCollectorThread.java:372) at org.apache.bookkeeper.bookie.GarbageCollectorThread.runWithFlags(GarbageCollectorThread.java:323) at org.apache.bookkeeper.bookie.GarbageCollectorThread.safeRun(GarbageCollectorThread.java:301) at org.apache.bookkeeper.common.util.SafeRunnable.run(SafeRunnable.java:36) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308) at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180) at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30) at java.lang.Thread.run(Thread.java:748) Caused by: org.apache.bookkeeper.client.BKException$BKNoSuchLedgerExistsException: No such ledger exists at org.apache.bookkeeper.meta.AbstractZkLedgerManager$3.processResult(AbstractZkLedgerManager.java:397) at org.apache.bookkeeper.zookeeper.ZooKeeperClient$19$1.processResult(ZooKeeperClient.java:994) at org.apache.zookeeper.ClientCnxn$EventThread.processEvent(ClientCnxn.java:575) at org.apache.zookeeper.ClientCnxn$EventThread.run(ClientCnxn.java:508) ``` It is noisy, makes the size of log files large and finally causes OOM during log rotation. So we should suppress the stacktrace. (This problem is due to [#2813](https://github.com/apache/bookkeeper/pull/2813).) ### Changes Add error handling to readLedgerMetadata in over-replicated ledger GC in order to suppress the stacktrace. (cherry picked from commit bd5c50bf331c28e6a9db2b8d2b186b86342dbd6b) --- .../bookkeeper/bookie/ScanAndCompareGarbageCollector.java | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java index 72fd797..e99a407 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java @@ -234,9 +234,19 @@ public class ScanAndCompareGarbageCollector implements GarbageCollector { // check ledger ensembles before creating lock nodes. // this is to reduce the number of lock node creations and deletions in ZK. // the ensemble check is done again after the lock node is created. - // also, check if the ledger is being replicated already by the replication worker Versioned<LedgerMetadata> preCheckMetadata = ledgerManager.readLedgerMetadata(ledgerId).get(); - if (!isNotBookieIncludedInLedgerEnsembles(preCheckMetadata) || lum.isLedgerBeingReplicated(ledgerId)) { + if (!isNotBookieIncludedInLedgerEnsembles(preCheckMetadata)) { + latch.countDown(); + continue; + } + } catch (Throwable t) { + latch.countDown(); + continue; + } + + try { + // check if the ledger is being replicated already by the replication worker + if (lum.isLedgerBeingReplicated(ledgerId)) { latch.countDown(); continue; }
