This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new cd7f1657ff7 branch-4.1: [fix](cloud) Skip skewed warmup rowset latency 
samples #62941 (#63084)
cd7f1657ff7 is described below

commit cd7f1657ff70427dee6491af3d98802ed2577841
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Sat May 9 15:33:48 2026 +0800

    branch-4.1: [fix](cloud) Skip skewed warmup rowset latency samples #62941 
(#63084)
    
    Cherry-picked from #62941
    
    Co-authored-by: bobhan1 <[email protected]>
---
 be/src/cloud/cloud_internal_service.cpp | 60 +++++++++++++++++++++++----------
 1 file changed, 42 insertions(+), 18 deletions(-)

diff --git a/be/src/cloud/cloud_internal_service.cpp 
b/be/src/cloud/cloud_internal_service.cpp
index 47166c7ae32..b17d32cb9a1 100644
--- a/be/src/cloud/cloud_internal_service.cpp
+++ b/be/src/cloud/cloud_internal_service.cpp
@@ -20,6 +20,7 @@
 #include <bthread/countdown_event.h>
 
 #include <algorithm>
+#include <optional>
 #include <thread>
 
 #include "cloud/cloud_storage_engine.h"
@@ -88,6 +89,22 @@ FileCacheType cache_type_to_pb(io::FileCacheType type) {
     return FileCacheType::NORMAL;
 }
 
+static int64_t current_unix_time_us() {
+    return std::chrono::duration_cast<std::chrono::microseconds>(
+                   std::chrono::system_clock::now().time_since_epoch())
+            .count();
+}
+
+static std::optional<int64_t> warm_up_rowset_cross_host_latency_us(int64_t 
start_unix_ts_us,
+                                                                   int64_t 
end_unix_ts_us) {
+    // The start timestamp is generated by the caller BE. Mixed-version 
callers may omit it, and
+    // system clocks across BEs are not guaranteed to be ordered.
+    if (start_unix_ts_us <= 0 || end_unix_ts_us < start_unix_ts_us) {
+        return std::nullopt;
+    }
+    return end_unix_ts_us - start_unix_ts_us;
+}
+
 static void add_file_cache_block_meta_to_response(
         PGetFileCacheMetaResponse* resp, int64_t tablet_id, const std::string& 
rowset_id,
         int32_t segment_id, const std::string& file_name,
@@ -414,15 +431,17 @@ void handle_segment_download_done(Status st, int64_t 
tablet_id, const RowsetId&
     if (st.ok()) {
         g_file_cache_event_driven_warm_up_finished_segment_num << 1;
         g_file_cache_event_driven_warm_up_finished_segment_size << 
segment_size;
-        int64_t now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
-                                 
std::chrono::system_clock::now().time_since_epoch())
-                                 .count();
+        int64_t now_ts = current_unix_time_us();
         g_file_cache_warm_up_rowset_last_finish_unix_ts.set_value(now_ts);
-        g_file_cache_warm_up_rowset_latency << (now_ts - request_ts);
+        auto rowset_latency_us = 
warm_up_rowset_cross_host_latency_us(request_ts, now_ts);
+        if (rowset_latency_us.has_value()) {
+            g_file_cache_warm_up_rowset_latency << *rowset_latency_us;
+        }
         g_file_cache_warm_up_rowset_handle_to_finish_latency << (now_ts - 
handle_ts);
-        if (request_ts > 0 && now_ts - request_ts > 
config::warm_up_rowset_slow_log_ms * 1000) {
+        if (rowset_latency_us.has_value() &&
+            *rowset_latency_us > config::warm_up_rowset_slow_log_ms * 1000) {
             g_file_cache_warm_up_rowset_slow_count << 1;
-            LOG(INFO) << "warm up rowset took " << now_ts - request_ts
+            LOG(INFO) << "warm up rowset took " << *rowset_latency_us
                       << " us, tablet_id: " << tablet_id << ", rowset_id: " << 
rowset_id.to_string()
                       << ", segment_id: " << segment_id;
         }
@@ -466,15 +485,17 @@ void handle_inverted_index_download_done(Status st, 
int64_t tablet_id, const Row
     if (st.ok()) {
         g_file_cache_event_driven_warm_up_finished_index_num << 1;
         g_file_cache_event_driven_warm_up_finished_index_size << idx_size;
-        int64_t now_ts = std::chrono::duration_cast<std::chrono::microseconds>(
-                                 
std::chrono::system_clock::now().time_since_epoch())
-                                 .count();
+        int64_t now_ts = current_unix_time_us();
         g_file_cache_warm_up_rowset_last_finish_unix_ts.set_value(now_ts);
-        g_file_cache_warm_up_rowset_latency << (now_ts - request_ts);
+        auto rowset_latency_us = 
warm_up_rowset_cross_host_latency_us(request_ts, now_ts);
+        if (rowset_latency_us.has_value()) {
+            g_file_cache_warm_up_rowset_latency << *rowset_latency_us;
+        }
         g_file_cache_warm_up_rowset_handle_to_finish_latency << (now_ts - 
handle_ts);
-        if (request_ts > 0 && now_ts - request_ts > 
config::warm_up_rowset_slow_log_ms * 1000) {
+        if (rowset_latency_us.has_value() &&
+            *rowset_latency_us > config::warm_up_rowset_slow_log_ms * 1000) {
             g_file_cache_warm_up_rowset_slow_count << 1;
-            LOG(INFO) << "warm up rowset took " << now_ts - request_ts
+            LOG(INFO) << "warm up rowset took " << *rowset_latency_us
                       << " us, tablet_id: " << tablet_id << ", rowset_id: " << 
rowset_id.to_string()
                       << ", segment_id: " << segment_id;
         }
@@ -543,15 +564,18 @@ void 
CloudInternalServiceImpl::warm_up_rowset(google::protobuf::RpcController* c
         auto tablet = res.value();
         auto tablet_meta = tablet->tablet_meta();
 
-        int64_t handle_ts = 
std::chrono::duration_cast<std::chrono::microseconds>(
-                                    
std::chrono::system_clock::now().time_since_epoch())
-                                    .count();
+        int64_t handle_ts = current_unix_time_us();
         g_file_cache_warm_up_rowset_last_handle_unix_ts.set_value(handle_ts);
         int64_t request_ts = request->has_unix_ts_us() ? request->unix_ts_us() 
: 0;
-        g_file_cache_warm_up_rowset_request_to_handle_latency << (handle_ts - 
request_ts);
-        if (request_ts > 0 && handle_ts - request_ts > 
config::warm_up_rowset_slow_log_ms * 1000) {
+        auto request_to_handle_latency_us =
+                warm_up_rowset_cross_host_latency_us(request_ts, handle_ts);
+        if (request_to_handle_latency_us.has_value()) {
+            g_file_cache_warm_up_rowset_request_to_handle_latency << 
*request_to_handle_latency_us;
+        }
+        if (request_to_handle_latency_us.has_value() &&
+            *request_to_handle_latency_us > config::warm_up_rowset_slow_log_ms 
* 1000) {
             g_file_cache_warm_up_rowset_request_to_handle_slow_count << 1;
-            LOG(INFO) << "warm up rowset (request to handle) took " << 
handle_ts - request_ts
+            LOG(INFO) << "warm up rowset (request to handle) took " << 
*request_to_handle_latency_us
                       << " us, tablet_id: " << rs_meta.tablet_id()
                       << ", rowset_id: " << rowset_id.to_string();
         }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to