This is an automated email from the ASF dual-hosted git repository.

shoothzj pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/bookkeeper.git


The following commit(s) were added to refs/heads/master by this push:
     new d0adb60fb7 improve: Enrich GC metrics to better analyze GC behavior 
and the time consumption of each phase. (#4384)
d0adb60fb7 is described below

commit d0adb60fb718b75f7935933952b5ccb7a5ff20c1
Author: YingQun Zhong <[email protected]>
AuthorDate: Thu Sep 12 15:18:00 2024 +0800

    improve: Enrich GC metrics to better analyze GC behavior and the time 
consumption of each phase. (#4384)
    
    ### Motivation
    Enrich GC metrics to better analyze GC behavior and the time consumption of 
each phase.
    
    In our online environment, we found that some clusters spend a lot of time 
scanning metadata during gc, so the newly added metrics can more conveniently 
help us analyze the cluster gc behavior
    
    this PR add 4 new metrics:
    bookie_GC_LEDGER_RUNTIME
    bookie_EXTRACT_META_RUNTIME
    bookie_COMPACT_RUNTIME
    bookie_ENTRY_LOG_COMPACT_RATIO
    
    ### Changes
    - bookie_GC_LEDGER_RUNTIME
    operation stats of doing gc ledgers based on metaStore, Time consumption 
for comparing ledger meta between local and metadata store (zk).
    
    - bookie_EXTRACT_META_RUNTIME
    Time consumption for extracting Meta from entryLogs.
    
    - bookie_COMPACT_RUNTIME
    Time consumption of entry log compaction.
    
    - bookie_ENTRY_LOG_COMPACT_RATIO
    Current proportion of compacted entry log files that have been executed 
(Provide reference for users when setting CompactionThreshold values, and 
configure more reasonable values)
    
    <img width="904" alt="image" 
src="https://github.com/apache/bookkeeper/assets/16517186/79870ed5-5191-429c-b6e3-e8a08054bcd1";>
    
    Co-authored-by: qunzhong <[email protected]>
---
 .../bookkeeper/bookie/BookKeeperServerStats.java   |  4 ++
 .../bookkeeper/bookie/GarbageCollectorThread.java  | 74 +++++++++++++++++-----
 .../bookie/stats/GarbageCollectorStats.java        | 73 ++++++++++++++++++++-
 3 files changed, 134 insertions(+), 17 deletions(-)

diff --git 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookKeeperServerStats.java
 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookKeeperServerStats.java
index 59ffd99e0d..d9505b44a8 100644
--- 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookKeeperServerStats.java
+++ 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/BookKeeperServerStats.java
@@ -154,6 +154,10 @@ public interface BookKeeperServerStats {
     String MINOR_COMPACTION_COUNT = "MINOR_COMPACTION_TOTAL";
     String ACTIVE_LEDGER_COUNT = "ACTIVE_LEDGER_TOTAL";
     String DELETED_LEDGER_COUNT = "DELETED_LEDGER_TOTAL";
+    String GC_LEDGER_RUNTIME = "GC_LEDGER_RUNTIME";
+    String COMPACT_RUNTIME = "COMPACT_RUNTIME";
+    String EXTRACT_META_RUNTIME = "EXTRACT_META_RUNTIME";
+    String ENTRY_LOG_COMPACT_RATIO = "ENTRY_LOG_COMPACT_RATIO";
 
     // Index Related Counters
     String INDEX_INMEM_ILLEGAL_STATE_RESET = "INDEX_INMEM_ILLEGAL_STATE_RESET";
diff --git 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java
 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java
index e58064cfd9..5126e79802 100644
--- 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java
+++ 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/GarbageCollectorThread.java
@@ -29,6 +29,7 @@ import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
 import io.netty.util.concurrent.DefaultThreadFactory;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.LinkedList;
 import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
@@ -59,6 +60,7 @@ import org.slf4j.LoggerFactory;
 public class GarbageCollectorThread implements Runnable {
     private static final Logger LOG = 
LoggerFactory.getLogger(GarbageCollectorThread.class);
     private static final int SECOND = 1000;
+    private static final int ENTRY_LOG_USAGE_SEGMENT_COUNT = 10;
     private static final long MINUTE = TimeUnit.MINUTES.toMillis(1);
 
     // Maps entry log files to the set of ledgers that comprise the file and 
the size usage per ledger
@@ -97,6 +99,8 @@ public class GarbageCollectorThread implements Runnable {
 
     private volatile long totalEntryLogSize;
     private volatile int numActiveEntryLogs;
+    private volatile double entryLogCompactRatio;
+    private volatile int[] currentEntryLogUsageBuckets;
 
     final CompactableLedgerStorage ledgerStorage;
 
@@ -172,12 +176,16 @@ public class GarbageCollectorThread implements Runnable {
 
         this.numActiveEntryLogs = 0;
         this.totalEntryLogSize = 0L;
+        this.entryLogCompactRatio = 0.0;
+        this.currentEntryLogUsageBuckets = new 
int[ENTRY_LOG_USAGE_SEGMENT_COUNT];
         this.garbageCollector = new 
ScanAndCompareGarbageCollector(ledgerManager, ledgerStorage, conf, statsLogger);
         this.gcStats = new GarbageCollectorStats(
             statsLogger,
             () -> numActiveEntryLogs,
             () -> totalEntryLogSize,
-            () -> garbageCollector.getNumActiveLedgers()
+            () -> garbageCollector.getNumActiveLedgers(),
+            () -> entryLogCompactRatio,
+            () -> currentEntryLogUsageBuckets
         );
 
         this.garbageCleaner = ledgerId -> {
@@ -413,12 +421,22 @@ public class GarbageCollectorThread implements Runnable {
             // this is used in extractMetaFromEntryLogs to calculate the usage 
of entry log
             doGcLedgers();
 
-            // Extract all of the ledger ID's that comprise all of the entry 
logs
-            // (except for the current new one which is still being written 
to).
-            extractMetaFromEntryLogs();
 
-            // gc entry logs
-            doGcEntryLogs();
+            long extractMetaStart = MathUtils.nowInNano();
+            try {
+                // Extract all of the ledger ID's that comprise all of the 
entry logs
+                // (except for the current new one which is still being 
written to).
+                extractMetaFromEntryLogs();
+
+                // gc entry logs
+                doGcEntryLogs();
+                gcStats.getExtractMetaRuntime()
+                        
.registerSuccessfulEvent(MathUtils.elapsedNanos(extractMetaStart), 
TimeUnit.NANOSECONDS);
+            } catch (EntryLogMetadataMapException e) {
+                gcStats.getExtractMetaRuntime()
+                        
.registerFailedEvent(MathUtils.elapsedNanos(extractMetaStart), 
TimeUnit.NANOSECONDS);
+                throw e;
+            }
 
             if (suspendMajor) {
                 LOG.info("Disk almost full, suspend major compaction to slow 
down filling disk.");
@@ -428,14 +446,20 @@ public class GarbageCollectorThread implements Runnable {
             }
 
             long curTime = System.currentTimeMillis();
+            long compactStart = MathUtils.nowInNano();
             if (((isForceMajorCompactionAllow && force) || 
(enableMajorCompaction
                     && (force || curTime - lastMajorCompactionTime > 
majorCompactionInterval)))
                     && (!suspendMajor)) {
                 // enter major compaction
-                LOG.info("Enter major compaction, suspendMajor {}", 
suspendMajor);
+                LOG.info("Enter major compaction, suspendMajor {}, 
lastMajorCompactionTime {}", suspendMajor,
+                        lastMajorCompactionTime);
                 majorCompacting.set(true);
                 try {
                     doCompactEntryLogs(majorCompactionThreshold, 
majorCompactionMaxTimeMillis);
+                } catch (EntryLogMetadataMapException e) {
+                    gcStats.getCompactRuntime()
+                            
.registerFailedEvent(MathUtils.elapsedNanos(compactStart), 
TimeUnit.NANOSECONDS);
+                    throw e;
                 } finally {
                     lastMajorCompactionTime = System.currentTimeMillis();
                     // and also move minor compaction time
@@ -447,23 +471,29 @@ public class GarbageCollectorThread implements Runnable {
                     && (force || curTime - lastMinorCompactionTime > 
minorCompactionInterval)))
                     && (!suspendMinor)) {
                 // enter minor compaction
-                LOG.info("Enter minor compaction, suspendMinor {}", 
suspendMinor);
+                LOG.info("Enter minor compaction, suspendMinor {}, 
lastMinorCompactionTime {}", suspendMinor,
+                        lastMinorCompactionTime);
                 minorCompacting.set(true);
                 try {
                     doCompactEntryLogs(minorCompactionThreshold, 
minorCompactionMaxTimeMillis);
+                }  catch (EntryLogMetadataMapException e) {
+                    gcStats.getCompactRuntime()
+                            
.registerFailedEvent(MathUtils.elapsedNanos(compactStart), 
TimeUnit.NANOSECONDS);
+                    throw e;
                 } finally {
                     lastMinorCompactionTime = System.currentTimeMillis();
                     gcStats.getMinorCompactionCounter().inc();
                     minorCompacting.set(false);
                 }
             }
+            gcStats.getCompactRuntime()
+                    
.registerSuccessfulEvent(MathUtils.elapsedNanos(compactStart), 
TimeUnit.NANOSECONDS);
             gcStats.getGcThreadRuntime().registerSuccessfulEvent(
                     MathUtils.nowInNano() - threadStart, TimeUnit.NANOSECONDS);
         } catch (EntryLogMetadataMapException e) {
             LOG.error("Error in entryLog-metadatamap, Failed to complete 
GC/Compaction due to entry-log {}",
                     e.getMessage(), e);
-            gcStats.getGcThreadRuntime().registerFailedEvent(
-                    MathUtils.nowInNano() - threadStart, TimeUnit.NANOSECONDS);
+            
gcStats.getGcThreadRuntime().registerFailedEvent(MathUtils.elapsedNanos(threadStart),
 TimeUnit.NANOSECONDS);
         } finally {
             if (force && forceGarbageCollection.compareAndSet(true, false)) {
                 LOG.info("{} Set forceGarbageCollection to false after force 
GC to make it forceGC-able again.",
@@ -477,7 +507,16 @@ public class GarbageCollectorThread implements Runnable {
      * Do garbage collection ledger index files.
      */
     private void doGcLedgers() {
-        garbageCollector.gc(garbageCleaner);
+        long gcLedgersStart = MathUtils.nowInNano();
+        try {
+            garbageCollector.gc(garbageCleaner);
+            gcStats.getGcLedgerRuntime()
+                    
.registerSuccessfulEvent(MathUtils.elapsedNanos(gcLedgersStart), 
TimeUnit.NANOSECONDS);
+        } catch (Throwable t) {
+            LOG.warn("Exception when doing gc ledger.", t);
+            gcStats.getGcLedgerRuntime()
+                    
.registerFailedEvent(MathUtils.elapsedNanos(gcLedgersStart), 
TimeUnit.NANOSECONDS);
+        }
     }
 
     /**
@@ -550,7 +589,7 @@ public class GarbageCollectorThread implements Runnable {
     void doCompactEntryLogs(double threshold, long maxTimeMillis) throws 
EntryLogMetadataMapException {
         LOG.info("Do compaction to compact those files lower than {}", 
threshold);
 
-        final int numBuckets = 10;
+        final int numBuckets = ENTRY_LOG_USAGE_SEGMENT_COUNT;
         int[] entryLogUsageBuckets = new int[numBuckets];
         int[] compactedBuckets = new int[numBuckets];
 
@@ -585,7 +624,8 @@ public class GarbageCollectorThread implements Runnable {
 
             compactableBuckets.get(bucketIndex).add(meta.getEntryLogId());
         });
-
+        currentEntryLogUsageBuckets = entryLogUsageBuckets;
+        gcStats.setEntryLogUsageBuckets(currentEntryLogUsageBuckets);
         LOG.info(
                 "Compaction: entry log usage buckets before compaction [10% 
20% 30% 40% 50% 60% 70% 80% 90% 100%] = {}",
                 entryLogUsageBuckets);
@@ -649,9 +689,11 @@ public class GarbageCollectorThread implements Runnable {
                 LOG.debug("Compaction ran for {}ms but was limited by {}ms", 
timeDiff, maxTimeMillis);
             }
         }
-        LOG.info(
-                "Compaction: entry log usage buckets[10% 20% 30% 40% 50% 60% 
70% 80% 90% 100%] = {}, compacted {}",
-                entryLogUsageBuckets, compactedBuckets);
+        int totalEntryLogNum = Arrays.stream(entryLogUsageBuckets).sum();
+        int compactedEntryLogNum = Arrays.stream(compactedBuckets).sum();
+        this.entryLogCompactRatio = totalEntryLogNum == 0 ? 0 : (double) 
compactedEntryLogNum / totalEntryLogNum;
+        LOG.info("Compaction: entry log usage buckets[10% 20% 30% 40% 50% 60% 
70% 80% 90% 100%] = {}, compacted {}, "
+                + "compacted entry log ratio {}", entryLogUsageBuckets, 
compactedBuckets, entryLogCompactRatio);
     }
 
     /**
diff --git 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/GarbageCollectorStats.java
 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/GarbageCollectorStats.java
index f9f1e31fee..0b88a5effe 100644
--- 
a/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/GarbageCollectorStats.java
+++ 
b/bookkeeper-server/src/main/java/org/apache/bookkeeper/bookie/stats/GarbageCollectorStats.java
@@ -24,7 +24,11 @@ import static 
org.apache.bookkeeper.bookie.BookKeeperServerStats.ACTIVE_ENTRY_LO
 import static 
org.apache.bookkeeper.bookie.BookKeeperServerStats.ACTIVE_LEDGER_COUNT;
 import static org.apache.bookkeeper.bookie.BookKeeperServerStats.BOOKIE_SCOPE;
 import static 
org.apache.bookkeeper.bookie.BookKeeperServerStats.CATEGORY_SERVER;
+import static 
org.apache.bookkeeper.bookie.BookKeeperServerStats.COMPACT_RUNTIME;
 import static 
org.apache.bookkeeper.bookie.BookKeeperServerStats.DELETED_LEDGER_COUNT;
+import static 
org.apache.bookkeeper.bookie.BookKeeperServerStats.ENTRY_LOG_COMPACT_RATIO;
+import static 
org.apache.bookkeeper.bookie.BookKeeperServerStats.EXTRACT_META_RUNTIME;
+import static 
org.apache.bookkeeper.bookie.BookKeeperServerStats.GC_LEDGER_RUNTIME;
 import static 
org.apache.bookkeeper.bookie.BookKeeperServerStats.MAJOR_COMPACTION_COUNT;
 import static 
org.apache.bookkeeper.bookie.BookKeeperServerStats.MINOR_COMPACTION_COUNT;
 import static 
org.apache.bookkeeper.bookie.BookKeeperServerStats.RECLAIMED_COMPACTION_SPACE_BYTES;
@@ -102,11 +106,36 @@ public class GarbageCollectorStats {
         help = "Current number of active ledgers"
     )
     private final Gauge<Integer> activeLedgerCountGauge;
+    @StatsDoc(
+            name = GC_LEDGER_RUNTIME,
+            help = "Operation stats of doing gc ledgers base on metaStore"
+    )
+    private final OpStatsLogger gcLedgerRuntime;
+    @StatsDoc(
+            name = COMPACT_RUNTIME,
+            help = "Operation stats of entry log compaction"
+    )
+    private final OpStatsLogger compactRuntime;
+    @StatsDoc(
+            name = EXTRACT_META_RUNTIME,
+            help = "Operation stats of extracting Meta from entryLogs"
+    )
+    private final OpStatsLogger extractMetaRuntime;
+    @StatsDoc(
+        name = ENTRY_LOG_COMPACT_RATIO,
+        help = "Current proportion of compacted entry log files that have been 
executed"
+    )
+    private final Gauge<Double> entryLogCompactRatioGauge;
+    private volatile int[] entryLogUsageBuckets;
+    private final Gauge<Integer>[] entryLogUsageBucketsLeGauges;
+
 
     public GarbageCollectorStats(StatsLogger statsLogger,
                                  Supplier<Integer> activeEntryLogCountSupplier,
                                  Supplier<Long> 
activeEntryLogSpaceBytesSupplier,
-                                 Supplier<Integer> activeLedgerCountSupplier) {
+                                 Supplier<Integer> activeLedgerCountSupplier,
+                                 Supplier<Double> entryLogCompactRatioSupplier,
+                                 Supplier<int[]> usageBuckets) {
         this.statsLogger = statsLogger;
 
         this.minorCompactionCounter = 
statsLogger.getCounter(MINOR_COMPACTION_COUNT);
@@ -116,6 +145,10 @@ public class GarbageCollectorStats {
         this.reclaimFailedToDelete = 
statsLogger.getCounter(RECLAIM_FAILED_TO_DELETE);
         this.gcThreadRuntime = statsLogger.getOpStatsLogger(THREAD_RUNTIME);
         this.deletedLedgerCounter = 
statsLogger.getCounter(DELETED_LEDGER_COUNT);
+        this.gcLedgerRuntime = statsLogger.getOpStatsLogger(GC_LEDGER_RUNTIME);
+        this.compactRuntime = statsLogger.getOpStatsLogger(COMPACT_RUNTIME);
+        this.extractMetaRuntime = 
statsLogger.getOpStatsLogger(EXTRACT_META_RUNTIME);
+        this.entryLogUsageBuckets = usageBuckets.get();
 
         this.activeEntryLogCountGauge = new Gauge<Integer>() {
             @Override
@@ -153,6 +186,44 @@ public class GarbageCollectorStats {
             }
         };
         statsLogger.registerGauge(ACTIVE_LEDGER_COUNT, activeLedgerCountGauge);
+        this.entryLogCompactRatioGauge = new Gauge<Double>() {
+            @Override
+            public Double getDefaultValue() {
+                return 0.0;
+            }
+
+            @Override
+            public Double getSample() {
+                return entryLogCompactRatioSupplier.get();
+            }
+        };
+        statsLogger.registerGauge(ENTRY_LOG_COMPACT_RATIO, 
entryLogCompactRatioGauge);
+
+        this.entryLogUsageBucketsLeGauges = new 
Gauge[entryLogUsageBuckets.length];
+        for (int i = 0; i < entryLogUsageBucketsLeGauges.length; i++) {
+            entryLogUsageBucketsLeGauges[i] =
+                    
registerEntryLogUsageBucketsLeGauge("entry_log_usage_buckets_le_" + (i + 1) * 
10, i);
+        }
+    }
+
+    private Gauge<Integer> registerEntryLogUsageBucketsLeGauge(String name, 
int index) {
+        Gauge<Integer> gauge = new Gauge<Integer>() {
+            @Override
+            public Integer getDefaultValue() {
+                return 0;
+            }
+
+            @Override
+            public Integer getSample() {
+                return entryLogUsageBuckets[index];
+            }
+        };
+        statsLogger.registerGauge(name, gauge);
+        return gauge;
     }
 
+
+    public void setEntryLogUsageBuckets(int[] usageBuckets) {
+        entryLogUsageBuckets = usageBuckets;
+    }
 }

Reply via email to