This is an automated email from the ASF dual-hosted git repository. chenhang pushed a commit to branch branch-4.14 in repository https://gitbox.apache.org/repos/asf/bookkeeper.git
commit 9f89a395fb4e9d74f6e7f5660b0125c5170f0ed7 Author: Hang Chen <[email protected]> AuthorDate: Thu Mar 16 11:28:48 2023 +0800 Add small files check in garbage collection (#3631) When we use `TransactionalEntryLogCompactor` to compact the entry log files, it will generate a lot of small entry log files, and for those files, the file usage is usually greater than 90%, which can not be compacted unless the file usage decreased.  We introduce the entry log file size check during compaction, and the checker is controlled by `gcEntryLogSizeRatio`. If the total entry log file size is less than `gcEntryLogSizeRatio * logSizeLimit`, the entry log file will be compacted even though the file usage is greater than 90%. This feature is disabled by default and the `gcEntryLogSizeRatio` default value is `0.0` (cherry picked from commit 2fad33bfcf24a72f7fdf103969ed4b0aa26778a2) --- .../org/apache/bookkeeper/bookie/GarbageCollectorThread.java | 12 +++++++++--- .../java/org/apache/bookkeeper/conf/ServerConfiguration.java | 10 ++++++++++ conf/bk_server.conf | 8 ++++++++ 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java index 60a6cbdceb..6afc7e4bfa 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java @@ -466,15 +466,21 @@ public class GarbageCollectorThread extends SafeRunnable { long timeDiff = 0; for (EntryLogMetadata meta : logsToCompact) { - int bucketIndex = calculateUsageIndex(numBuckets, meta.getUsage()); + double usage = meta.getUsage(); + if (conf.isUseTargetEntryLogSizeForGc() && usage < 1.0d) { + usage = (double) meta.getRemainingSize() / Math.max(meta.getTotalSize(), conf.getEntryLogSizeLimit()); + } + int bucketIndex = calculateUsageIndex(numBuckets, usage); entryLogUsageBuckets[bucketIndex]++; if (timeDiff < maxTimeMillis) { end = System.currentTimeMillis(); timeDiff = end - start; } - if (meta.getUsage() >= threshold || (maxTimeMillis > 0 && timeDiff > maxTimeMillis) || !running) { - // We allow the usage limit calculation to continue so that we get a accurate + if ((usage >= threshold + || (maxTimeMillis > 0 && timeDiff >= maxTimeMillis) + || !running)) { + // We allow the usage limit calculation to continue so that we get an accurate // report of where the usage was prior to running compaction. continue; } diff --git a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java index 18c7e32112..427812aeec 100644 --- a/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java +++ b/bookkeeper-server/src/main/java/org/apache/bookkeeper/conf/ServerConfiguration.java @@ -113,6 +113,7 @@ public class ServerConfiguration extends AbstractConfiguration<ServerConfigurati protected static final String GC_OVERREPLICATED_LEDGER_WAIT_TIME = "gcOverreplicatedLedgerWaitTime"; protected static final String USE_TRANSACTIONAL_COMPACTION = "useTransactionalCompaction"; protected static final String VERIFY_METADATA_ON_GC = "verifyMetadataOnGC"; + protected static final String USE_TARGET_ENTRYLOG_SIZE_FOR_GC = "useTargetEntryLogSizeForGc"; // Scrub Parameters protected static final String LOCAL_SCRUB_PERIOD = "localScrubInterval"; protected static final String LOCAL_SCRUB_RATE_LIMIT = "localScrubRateLimit"; @@ -459,6 +460,15 @@ public class ServerConfiguration extends AbstractConfiguration<ServerConfigurati return this; } + public boolean isUseTargetEntryLogSizeForGc() { + return getBoolean(USE_TARGET_ENTRYLOG_SIZE_FOR_GC, false); + } + + public ServerConfiguration setUseTargetEntryLogSizeForGc(boolean useTargetEntryLogSizeForGc) { + this.setProperty(USE_TARGET_ENTRYLOG_SIZE_FOR_GC, useTargetEntryLogSizeForGc); + return this; + } + /** * Get whether local scrub is enabled. * diff --git a/conf/bk_server.conf b/conf/bk_server.conf index 1d275586fa..f786a175c0 100755 --- a/conf/bk_server.conf +++ b/conf/bk_server.conf @@ -582,6 +582,14 @@ ledgerDirectories=/tmp/bk-data # True if the bookie should double check readMetadata prior to gc # verifyMetadataOnGC=false +# When judging whether an entry log file need to be compacted, we calculate the usage rate of the entry log file based +# on the actual size of the entry log file. However, if an entry log file is 1MB in size and 0.9MB of data is +# being used, this entry log file won't be compacted by garbage collector due to the high usage ratio, +# which will result in many small entry log files. +# We introduced the parameter `useTargetEntryLogSizeForGc` to determine whether to calculate entry log file usage +# based on the configured target entry log file size, which is configured by `logSizeLimit`. +# Default: useTargetEntryLogSizeForGc is false. +# useTargetEntryLogSizeForGc=false ############################################################################# ## Disk utilization #############################################################################
