This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 31d282aa458 [feature](journal) Add log and metric to improve the 
observability of journal batch (#30401)
31d282aa458 is described below

commit 31d282aa45835fac45280bb6b191326f13cc724d
Author: walter <[email protected]>
AuthorDate: Fri Jan 26 21:14:59 2024 +0800

    [feature](journal) Add log and metric to improve the observability of 
journal batch (#30401)
---
 .../java/org/apache/doris/journal/JournalBatch.java |  3 +++
 .../apache/doris/journal/bdbje/BDBJEJournal.java    | 21 +++++++++++++++++++++
 .../java/org/apache/doris/metric/MetricRepo.java    |  8 +++++++-
 3 files changed, 31 insertions(+), 1 deletion(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/journal/JournalBatch.java 
b/fe/fe-core/src/main/java/org/apache/doris/journal/JournalBatch.java
index 0fc0ccf9355..12d62b68717 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/journal/JournalBatch.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/journal/JournalBatch.java
@@ -41,6 +41,9 @@ public class JournalBatch {
     //
     // The writable data will be serialized and saved in the journal batch 
with an internal
     // representation, so it is safety to update the data object once this 
function returned.
+    //
+    // If the batch is too large, it may cause a latency spike. Generally, we 
recommend controlling
+    // the number of batch entities to less than 32 and the batch data size to 
less than 640KB.
     public void addJournal(short op, Writable data) throws IOException {
         if (op == OperationType.OP_TIMESTAMP) {
             // OP_TIMESTAMP is not supported, see `BDBJEJournal.write` for 
details.
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java 
b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
index 95efbfa3780..9f90a3e2a11 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java
@@ -49,6 +49,7 @@ import com.sleepycat.je.rep.ReplicaWriteException;
 import com.sleepycat.je.rep.ReplicatedEnvironment;
 import com.sleepycat.je.rep.RollbackException;
 import com.sleepycat.je.rep.TimeConsistencyPolicy;
+import org.apache.commons.lang3.time.StopWatch;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
 
@@ -60,6 +61,7 @@ import java.util.List;
 import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
 
+
 /*
  * This is the bdb implementation of Journal interface.
  * First, we open() this journal, then read from or write to the bdb 
environment
@@ -132,9 +134,11 @@ public class BDBJEJournal implements Journal { // 
CHECKSTYLE IGNORE THIS LINE: B
         // Write the journals to bdb.
         for (int i = 0; i < RETRY_TIME; i++) {
             Transaction txn = null;
+            StopWatch watch = StopWatch.createStarted();
             try {
                 // The default config is constructed from the configs of 
environment.
                 txn = 
bdbEnvironment.getReplicatedEnvironment().beginTransaction(null, null);
+                dataSize = 0;
                 for (int j = 0; j < entitySize; ++j) {
                     JournalBatch.Entity entity = entities.get(j);
                     DatabaseEntry theKey = idToKey(firstId + j);
@@ -152,6 +156,18 @@ public class BDBJEJournal implements Journal { // 
CHECKSTYLE IGNORE THIS LINE: B
                 if (MetricRepo.isInit) {
                     MetricRepo.COUNTER_EDIT_LOG_SIZE_BYTES.increase(dataSize);
                     
MetricRepo.COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES.increase(dataSize);
+                    MetricRepo.HISTO_JOURNAL_BATCH_SIZE.update(entitySize);
+                    MetricRepo.HISTO_JOURNAL_BATCH_DATA_SIZE.update(dataSize);
+                }
+
+                if (entitySize > 32) {
+                    LOG.warn("write bdb journal batch is too large, batch size 
{}, the first journal id {}, "
+                            + "data size {}", entitySize, firstId, dataSize);
+                }
+
+                if (dataSize > 640 * 1024) {  // 640KB
+                    LOG.warn("write bdb journal batch data is too large, data 
size {}, the first journal id {}, "
+                            + "batch size {}", dataSize, firstId, entitySize);
                 }
 
                 return firstId;
@@ -188,6 +204,11 @@ public class BDBJEJournal implements Journal { // 
CHECKSTYLE IGNORE THIS LINE: B
                 if (txn != null) {
                     txn.abort();
                 }
+                watch.stop();
+                if (watch.getTime() > 100000) {  // 100ms
+                    LOG.warn("write bdb is too slow, cost {}ms, the first 
journal id, batch size {}, data size{}",
+                            watch.getTime(), firstId, entitySize, dataSize);
+                }
             }
         }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java 
b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
index 667290132aa..91e38b8cdf1 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java
@@ -101,6 +101,8 @@ public final class MetricRepo {
     public static LongCounterMetric COUNTER_EDIT_LOG_CLEAN_SUCCESS;
     public static LongCounterMetric COUNTER_EDIT_LOG_CLEAN_FAILED;
     public static Histogram HISTO_EDIT_LOG_WRITE_LATENCY;
+    public static Histogram HISTO_JOURNAL_BATCH_SIZE;
+    public static Histogram HISTO_JOURNAL_BATCH_DATA_SIZE;
 
     public static LongCounterMetric COUNTER_IMAGE_WRITE_SUCCESS;
     public static LongCounterMetric COUNTER_IMAGE_WRITE_FAILED;
@@ -486,7 +488,11 @@ public final class MetricRepo {
         COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES.addLabel(new MetricLabel("type", 
"current_bytes"));
         DORIS_METRIC_REGISTER.addMetrics(COUNTER_CURRENT_EDIT_LOG_SIZE_BYTES);
         HISTO_EDIT_LOG_WRITE_LATENCY = METRIC_REGISTER.histogram(
-            MetricRegistry.name("editlog", "write", "latency", "ms"));
+                MetricRegistry.name("editlog", "write", "latency", "ms"));
+        HISTO_JOURNAL_BATCH_SIZE = METRIC_REGISTER.histogram(
+                MetricRegistry.name("journal", "write", "batch_size"));
+        HISTO_JOURNAL_BATCH_DATA_SIZE = METRIC_REGISTER.histogram(
+                MetricRegistry.name("journal", "write", "batch_data_size"));
 
         // edit log clean
         COUNTER_EDIT_LOG_CLEAN_SUCCESS = new 
LongCounterMetric("edit_log_clean", MetricUnit.OPERATIONS,


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to