This is an automated email from the ASF dual-hosted git repository.
lhotari pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
The following commit(s) were added to refs/heads/master by this push:
new 417ff1651f add rate limit for zk read rate in gc. (#4645)
417ff1651f is described below
commit 417ff1651fed3d37abc4b3d4e57f8eac950dce9e
Author: Wenzhi Feng <[email protected]>
AuthorDate: Tue Aug 12 16:37:25 2025 +0800
add rate limit for zk read rate in gc. (#4645)
* add rate limit for gc.
* fix checkstyle.
* fix conf name and acqurie.
* rename gcZkOpRateLimit to gcMetadataOpRateLimit.
* rename gcZkOpRateLimit to gcMetadataOpRateLimit.
* add return label.
* add test code.
* document conf.
---------
Co-authored-by: fengwenzhi <[email protected]>
---
.../bookie/ScanAndCompareGarbageCollector.java | 7 ++++
.../bookkeeper/conf/ServerConfiguration.java | 19 +++++++++
.../org/apache/bookkeeper/meta/GcLedgersTest.java | 48 ++++++++++++++++++++++
conf/bk_server.conf | 3 ++
site3/website/docs/reference/config.md | 1 +
5 files changed, 78 insertions(+)
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java
index 34a54c3e4e..4cdedb36b2 100644
---
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java
+++
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/ScanAndCompareGarbageCollector.java
@@ -24,6 +24,7 @@ package org.apache.bookkeeper.bookie;
import static org.apache.bookkeeper.common.concurrent.FutureUtils.result;
import com.google.common.collect.Sets;
+import com.google.common.util.concurrent.RateLimiter;
import java.io.IOException;
import java.net.URI;
import java.util.List;
@@ -84,6 +85,7 @@ public class ScanAndCompareGarbageCollector implements
GarbageCollector {
private int activeLedgerCounter;
private StatsLogger statsLogger;
private final int maxConcurrentRequests;
+ private final RateLimiter gcMetadataOpRateLimiter;
public ScanAndCompareGarbageCollector(LedgerManager ledgerManager,
CompactableLedgerStorage ledgerStorage,
ServerConfiguration conf, StatsLogger statsLogger) throws
IOException {
@@ -103,6 +105,7 @@ public class ScanAndCompareGarbageCollector implements
GarbageCollector {
enableGcOverReplicatedLedger,
gcOverReplicatedLedgerIntervalMillis, maxConcurrentRequests);
verifyMetadataOnGc = conf.getVerifyMetadataOnGC();
+ this.gcMetadataOpRateLimiter =
RateLimiter.create(conf.getGcMetadataOpRateLimit());
this.activeLedgerCounter = 0;
}
@@ -153,6 +156,7 @@ public class ScanAndCompareGarbageCollector implements
GarbageCollector {
Versioned<LedgerMetadata> metadata = null;
while (!done) {
start = end + 1;
+ gcMetadataOpRateLimiter.acquire();
if (ledgerRangeIterator.hasNext()) {
LedgerRange lRange = ledgerRangeIterator.next();
ledgersInMetadata = lRange.getLedgers();
@@ -175,6 +179,7 @@ public class ScanAndCompareGarbageCollector implements
GarbageCollector {
metadata = null;
int rc = BKException.Code.OK;
try {
+ gcMetadataOpRateLimiter.acquire();
metadata =
result(ledgerManager.readLedgerMetadata(bkLid), zkOpTimeoutMs,
TimeUnit.MILLISECONDS);
} catch (BKException | TimeoutException e) {
@@ -236,6 +241,7 @@ public class ScanAndCompareGarbageCollector implements
GarbageCollector {
// check ledger ensembles before creating lock nodes.
// this is to reduce the number of lock node creations and
deletions in ZK.
// the ensemble check is done again after the lock node is
created.
+ gcMetadataOpRateLimiter.acquire();
Versioned<LedgerMetadata> preCheckMetadata =
ledgerManager.readLedgerMetadata(ledgerId).get();
if (!isNotBookieIncludedInLedgerEnsembles(preCheckMetadata)) {
latch.countDown();
@@ -261,6 +267,7 @@ public class ScanAndCompareGarbageCollector implements
GarbageCollector {
// current bookie again and, in that case, we cannot remove
the ledger from local storage
lum.acquireUnderreplicatedLedger(ledgerId);
semaphore.acquire();
+ gcMetadataOpRateLimiter.acquire();
ledgerManager.readLedgerMetadata(ledgerId)
.whenComplete((metadata, exception) -> {
try {
diff --git
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
index 4da2ee8db8..79daeb3f5d 100644
---
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
+++
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java
@@ -114,6 +114,7 @@ public class ServerConfiguration extends
AbstractConfiguration<ServerConfigurati
protected static final String GC_OVERREPLICATED_LEDGER_WAIT_TIME =
"gcOverreplicatedLedgerWaitTime";
protected static final String
GC_OVERREPLICATED_LEDGER_MAX_CONCURRENT_REQUESTS =
"gcOverreplicatedLedgerMaxConcurrentRequests";
+ protected static final String GC_METADATA_OP_RATE_LIMIT =
"gcMetadataOpRateLimit";
protected static final String USE_TRANSACTIONAL_COMPACTION =
"useTransactionalCompaction";
protected static final String VERIFY_METADATA_ON_GC = "verifyMetadataOnGC";
protected static final String GC_ENTRYLOGMETADATA_CACHE_ENABLED =
"gcEntryLogMetadataCacheEnabled";
@@ -481,6 +482,24 @@ public class ServerConfiguration extends
AbstractConfiguration<ServerConfigurati
return this;
}
+ /**
+ * Get the rate limit of metadata operations in garbage collection.
+ * @return rate limit of metadata operations in garbage collection
+ */
+ public int getGcMetadataOpRateLimit() {
+ return this.getInt(GC_METADATA_OP_RATE_LIMIT, 1000);
+ }
+
+ /**
+ * Set the rate limit of metadata operations in garbage collection.
+ * @param gcRateLimit
+ * @return server configuration
+ */
+ public ServerConfiguration setGcMetadataOpRateLimit(int gcRateLimit) {
+ this.setProperty(GC_METADATA_OP_RATE_LIMIT,
Integer.toString(gcRateLimit));
+ return this;
+ }
+
/**
* Get whether to use transactional compaction and using a separate log
for compaction or not.
*
diff --git
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/GcLedgersTest.java
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/GcLedgersTest.java
index fec74a8202..058dc8ce34 100644
---
a/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/GcLedgersTest.java
+++
b/bookkeeper-server/src/test/java/org/apache/bookkeeper/meta/GcLedgersTest.java
@@ -36,6 +36,7 @@ import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashSet;
+import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
@@ -342,6 +343,53 @@ public class GcLedgersTest extends LedgerManagerTestCase {
assertEquals("Should have cleaned first ledger" + first, (long) first,
(long) cleaned.get(0));
}
+
+ /**
+ * Verifies that the garbage collector respects the configured rate limit
for metadata operations.
+ * @throws Exception
+ */
+ @Test
+ public void testGcMetadataOpRateLimit() throws Exception {
+ int numLedgers = 2000;
+ int numRemovedLedgers = 800;
+ final Set<Long> createdLedgers = new HashSet<Long>();
+ createLedgers(numLedgers, createdLedgers);
+
+ ServerConfiguration conf = new ServerConfiguration(baseConf);
+ int customRateLimit = 200;
+ conf.setGcMetadataOpRateLimit(customRateLimit);
+ // set true to verify metadata on gc
+ conf.setVerifyMetadataOnGc(true);
+
+ final GarbageCollector garbageCollector = new
ScanAndCompareGarbageCollector(
+ getLedgerManager(), new MockLedgerStorage(), conf,
NullStatsLogger.INSTANCE);
+
+ // delete created ledgers to simulate the garbage collection scenario
+ Iterator<Long> createdLedgersIterator = createdLedgers.iterator();
+ for (int i = 0; i < numRemovedLedgers &&
createdLedgersIterator.hasNext(); i++) {
+ long ledgerId = createdLedgersIterator.next();
+ try {
+ removeLedger(ledgerId);
+ } catch (Exception e) {
+ LOG.error("Failed to remove ledger {}", ledgerId, e);
+ }
+ }
+
+ long startTime = System.currentTimeMillis();
+ garbageCollector.gc(new GarbageCollector.GarbageCleaner() {
+ @Override
+ public void clean(long ledgerId) {
+ }
+ });
+ long endTime = System.currentTimeMillis();
+ long duration = endTime - startTime;
+ long minExpectedTime = (numRemovedLedgers * 1000L) / customRateLimit;
+
+ LOG.info("GC operation with rate limit {} took {} ms, theoretical
minimum time: {} ms",
+ customRateLimit, duration, minExpectedTime);
+ assertTrue("GC operation should be rate limited", duration >=
minExpectedTime * 0.7);
+ }
+
/*
* in this scenario no ledger is created, so ledgeriterator's hasNext call
would return false and next would be
* null. GarbageCollector.gc is expected to behave normally
diff --git a/conf/bk_server.conf b/conf/bk_server.conf
index 98b5264aca..d680e750cd 100644
--- a/conf/bk_server.conf
+++ b/conf/bk_server.conf
@@ -601,6 +601,9 @@ ledgerDirectories=/tmp/bk-data
# interval if there is enough disk capacity.
# gcWaitTime=1000
+# The rate limit of metadata operations in garbage collection.
+#gcMetadataOpRateLimit=1000
+
# How long the interval to trigger next garbage collection of overreplicated
# ledgers, in milliseconds [Default: 1 day]. This should not be run very
frequently
# since we read the metadata for all the ledgers on the bookie from zk
diff --git a/site3/website/docs/reference/config.md
b/site3/website/docs/reference/config.md
index 9fbe6ac636..d9508c3488 100644
--- a/site3/website/docs/reference/config.md
+++ b/site3/website/docs/reference/config.md
@@ -191,6 +191,7 @@ The table below lists parameters that you can set to
configure bookies. All conf
| Parameter | Description | Default
| --------- | ----------- | ------- |
| gcWaitTime | How long the interval to trigger next garbage collection, in
milliseconds. Since garbage collection is running in background, too frequent
gc will heart performance. It is better to give a higher number of gc interval
if there is enough disk capacity. | 1000 |
+| gcMetadataOpRateLimit | Rate limit for metadata operations in garbage
collection, in operations per second. This is used to limit the rate of
metadata operations during garbage collection to avoid overwhelming the
metadata service. | 1000 |
| gcOverreplicatedLedgerWaitTime | How long the interval to trigger next
garbage collection of overreplicated ledgers, in milliseconds. This should not
be run very frequently since we read the metadata for all the ledgers on the
bookie from zk. | 86400000 |
| gcOverreplicatedLedgerMaxConcurrentRequests | Max number of concurrent
requests in garbage collection of overreplicated ledgers. | 1000 |
| isForceGCAllowWhenNoSpace | Whether force compaction is allowed when the
disk is full or almost full. Forcing GC may get some space back, but may also
fill up disk space more quickly. This is because new log files are created
before GC, while old garbage log files are deleted after GC. | false |