This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new a8ae171bdf2 [enhancement](filecache) add more debug info via 
VLOG_DEBUG (#58043) (#61194)
a8ae171bdf2 is described below

commit a8ae171bdf2689ecf0120dfaf6d3a761d347fae7
Author: zhengyu <[email protected]>
AuthorDate: Tue May 26 21:53:45 2026 +0800

    [enhancement](filecache) add more debug info via VLOG_DEBUG (#58043) 
(#61194)
    
    ### What problem does this PR solve?
    
    Issue Number: close #xxx
    
    Related PR: #xxx
    
    Problem Summary:
    
    ### Release note
    
    None
    
    ### Check List (For Author)
    
    - Test <!-- At least one of them must be included. -->
        - [ ] Regression test
        - [ ] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No code files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
        - [ ] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
        - [ ] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
    
    ---------
    
    Signed-off-by: zhengyu <[email protected]>
---
 be/src/cloud/cloud_backend_service.cpp |  8 +++++++-
 be/src/cloud/cloud_tablet.cpp          |  3 +++
 be/src/cloud/cloud_tablet_mgr.cpp      |  3 +++
 be/src/cloud/cloud_warm_up_manager.cpp |  2 ++
 be/src/io/cache/block_file_cache.cpp   | 37 +++++++++++++++++++++++++++-------
 be/src/io/cache/block_file_cache.h     |  3 ++-
 be/src/io/fs/s3_file_reader.cpp        |  4 ++++
 7 files changed, 51 insertions(+), 9 deletions(-)

diff --git a/be/src/cloud/cloud_backend_service.cpp 
b/be/src/cloud/cloud_backend_service.cpp
index 4865a4ae06d..92c7a2b3871 100644
--- a/be/src/cloud/cloud_backend_service.cpp
+++ b/be/src/cloud/cloud_backend_service.cpp
@@ -32,6 +32,7 @@
 #include "runtime/stream_load/stream_load_context.h"
 #include "runtime/stream_load/stream_load_recorder.h"
 #include "util/brpc_client_cache.h" // BrpcClientCache
+#include "util/stack_util.h"
 #include "util/thrift_server.h"
 
 namespace doris {
@@ -213,8 +214,13 @@ void 
CloudBackendService::warm_up_cache_async(TWarmUpCacheAsyncResponse& respons
             return;
         }
         PGetFileCacheMetaRequest brpc_request;
+        std::stringstream ss;
         std::for_each(request.tablet_ids.cbegin(), request.tablet_ids.cend(),
-                      [&](int64_t tablet_id) { 
brpc_request.add_tablet_ids(tablet_id); });
+                      [&](int64_t tablet_id) {
+                          brpc_request.add_tablet_ids(tablet_id);
+                          ss << tablet_id << ",";
+                      });
+        VLOG_DEBUG << "tablets set: " << ss.str() << " stack: " << 
get_stack_trace();
 
         auto run_rpc = [this, brpc_stub,
                         brpc_addr](PGetFileCacheMetaRequest request_copy) -> 
Status {
diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp
index db620468fcb..f16e155c446 100644
--- a/be/src/cloud/cloud_tablet.cpp
+++ b/be/src/cloud/cloud_tablet.cpp
@@ -63,6 +63,7 @@
 #include "olap/tablet_schema.h"
 #include "olap/txn_manager.h"
 #include "util/debug_points.h"
+#include "util/stack_util.h"
 #include "vec/common/schema_util.h"
 
 namespace doris {
@@ -421,6 +422,8 @@ void CloudTablet::add_rowsets(std::vector<RowsetSharedPtr> 
to_add, bool version_
         return;
     }
 
+    VLOG_DEBUG << "add_rowsets tablet_id=" << tablet_id() << " stack: " << 
get_stack_trace();
+
     auto add_rowsets_directly = [=, this](std::vector<RowsetSharedPtr>& 
rowsets) {
         for (auto& rs : rowsets) {
             if (warmup_delta_data) {
diff --git a/be/src/cloud/cloud_tablet_mgr.cpp 
b/be/src/cloud/cloud_tablet_mgr.cpp
index 7c1a1518fe6..bb186adc51d 100644
--- a/be/src/cloud/cloud_tablet_mgr.cpp
+++ b/be/src/cloud/cloud_tablet_mgr.cpp
@@ -29,6 +29,7 @@
 #include "common/status.h"
 #include "olap/lru_cache.h"
 #include "runtime/memory/cache_policy.h"
+#include "util/stack_util.h"
 
 namespace doris {
 uint64_t g_tablet_report_inactive_duration_ms = 0;
@@ -176,6 +177,8 @@ Result<std::shared_ptr<CloudTablet>> 
CloudTabletMgr::get_tablet(int64_t tablet_i
         TabletMap& tablet_map;
     };
 
+    VLOG_DEBUG << "get_tablet tablet_id=" << tablet_id << " stack: " << 
get_stack_trace();
+
     auto tablet_id_str = std::to_string(tablet_id);
     CacheKey key(tablet_id_str);
     auto* handle = _cache->lookup(key);
diff --git a/be/src/cloud/cloud_warm_up_manager.cpp 
b/be/src/cloud/cloud_warm_up_manager.cpp
index b63af0592d2..62fd6735365 100644
--- a/be/src/cloud/cloud_warm_up_manager.cpp
+++ b/be/src/cloud/cloud_warm_up_manager.cpp
@@ -42,6 +42,7 @@
 #include "runtime/client_cache.h"
 #include "runtime/exec_env.h"
 #include "util/brpc_client_cache.h" // BrpcClientCache
+#include "util/stack_util.h"
 #include "util/thrift_rpc_helper.h"
 #include "util/time.h"
 
@@ -216,6 +217,7 @@ void CloudWarmUpManager::handle_jobs() {
                 std::make_shared<bthread::CountdownEvent>(0);
 
         for (int64_t tablet_id : cur_job->tablet_ids) {
+            VLOG_DEBUG << "Warm up tablet " << tablet_id << " stack: " << 
get_stack_trace();
             if (_cur_job_id == 0) { // The job is canceled
                 break;
             }
diff --git a/be/src/io/cache/block_file_cache.cpp 
b/be/src/io/cache/block_file_cache.cpp
index 50ad7b98370..620603ffed7 100644
--- a/be/src/io/cache/block_file_cache.cpp
+++ b/be/src/io/cache/block_file_cache.cpp
@@ -49,6 +49,7 @@
 #include "io/cache/mem_file_cache_storage.h"
 #include "util/concurrency_stats.h"
 #include "util/runtime_profile.h"
+#include "util/stack_util.h"
 #include "util/stopwatch.hpp"
 #include "util/thread.h"
 #include "util/time.h"
@@ -1016,6 +1017,9 @@ size_t BlockFileCache::try_release() {
         std::lock_guard lc(cell->file_block->_mutex);
         remove_size += file_block->range().size();
         remove(file_block, cache_lock, lc);
+        VLOG_DEBUG << "try_release " << _cache_base_path
+                   << " hash=" << file_block->get_hash_value().to_string()
+                   << " offset=" << file_block->offset();
     }
     *_evict_by_try_release << remove_size;
     LOG(INFO) << "Released " << trash.size() << " blocks in file cache " << 
_cache_base_path;
@@ -1055,12 +1059,16 @@ const LRUQueue& BlockFileCache::get_queue(FileCacheType 
type) const {
 }
 
 void BlockFileCache::remove_file_blocks(std::vector<FileBlockCell*>& to_evict,
-                                        std::lock_guard<std::mutex>& 
cache_lock, bool sync) {
+                                        std::lock_guard<std::mutex>& 
cache_lock, bool sync,
+                                        std::string& reason) {
     auto remove_file_block_if = [&](FileBlockCell* cell) {
         FileBlockSPtr file_block = cell->file_block;
         if (file_block) {
             std::lock_guard block_lock(file_block->_mutex);
             remove(file_block, cache_lock, block_lock, sync);
+            VLOG_DEBUG << "remove_file_blocks"
+                       << " hash=" << file_block->get_hash_value().to_string()
+                       << " offset=" << file_block->offset() << " reason=" << 
reason;
         }
     };
     std::for_each(to_evict.begin(), to_evict.end(), remove_file_block_if);
@@ -1323,6 +1331,7 @@ bool BlockFileCache::remove_if_ttl_file_blocks(const 
UInt128Wrapper& file_key, b
 // remove specific cache synchronously, for critical operations
 // if in use, cache meta will be deleted after use and the block file is then 
deleted asynchronously
 void BlockFileCache::remove_if_cached(const UInt128Wrapper& file_key) {
+    std::string reason = "remove_if_cached";
     SCOPED_CACHE_LOCK(_mutex, this);
     bool is_ttl_file = remove_if_ttl_file_blocks(file_key, true, cache_lock, 
true);
     if (!is_ttl_file) {
@@ -1337,7 +1346,7 @@ void BlockFileCache::remove_if_cached(const 
UInt128Wrapper& file_key) {
                 }
             }
         }
-        remove_file_blocks(to_remove, cache_lock, true);
+        remove_file_blocks(to_remove, cache_lock, true, reason);
     }
 }
 
@@ -1345,6 +1354,7 @@ void BlockFileCache::remove_if_cached(const 
UInt128Wrapper& file_key) {
 // cache meta is deleted synchronously if not in use, and the block file is 
deleted asynchronously
 // if in use, cache meta will be deleted after use and the block file is then 
deleted asynchronously
 void BlockFileCache::remove_if_cached_async(const UInt128Wrapper& file_key) {
+    std::string reason = "remove_if_cached_async";
     SCOPED_CACHE_LOCK(_mutex, this);
     bool is_ttl_file = remove_if_ttl_file_blocks(file_key, true, cache_lock, 
/*sync*/ false);
     if (!is_ttl_file) {
@@ -1361,7 +1371,7 @@ void BlockFileCache::remove_if_cached_async(const 
UInt128Wrapper& file_key) {
                 }
             }
         }
-        remove_file_blocks(to_remove, cache_lock, false);
+        remove_file_blocks(to_remove, cache_lock, false, reason);
     }
 }
 
@@ -1458,7 +1468,9 @@ bool 
BlockFileCache::try_reserve_from_other_queue_by_time_interval(
         *(_evict_by_time_metrics_matrix[cache_type][cur_type]) << 
remove_size_per_type;
     }
     bool is_sync_removal = !evict_in_advance;
-    remove_file_blocks(to_evict, cache_lock, is_sync_removal);
+    std::string reason = std::string("try_reserve_by_time ") +
+                         " evict_in_advance=" + (evict_in_advance ? "true" : 
"false");
+    remove_file_blocks(to_evict, cache_lock, is_sync_removal, reason);
 
     return !is_overflow(removed_size, size, cur_cache_size, evict_in_advance);
 }
@@ -1501,7 +1513,9 @@ bool BlockFileCache::try_reserve_from_other_queue_by_size(
         *(_evict_by_size_metrics_matrix[cache_type][cur_type]) << 
cur_removed_size;
     }
     bool is_sync_removal = !evict_in_advance;
-    remove_file_blocks(to_evict, cache_lock, is_sync_removal);
+    std::string reason = std::string("try_reserve_by_size") +
+                         " evict_in_advance=" + (evict_in_advance ? "true" : 
"false");
+    remove_file_blocks(to_evict, cache_lock, is_sync_removal, reason);
     return !is_overflow(removed_size, size, cur_cache_size, evict_in_advance);
 }
 
@@ -1548,7 +1562,10 @@ bool BlockFileCache::try_reserve_for_lru(const 
UInt128Wrapper& hash,
         find_evict_candidates(queue, size, cur_cache_size, removed_size, 
to_evict, cache_lock,
                               cur_removed_size, evict_in_advance);
         bool is_sync_removal = !evict_in_advance;
-        remove_file_blocks(to_evict, cache_lock, is_sync_removal);
+        std::string reason = std::string("try_reserve for cache type ") +
+                             cache_type_to_string(context.cache_type) +
+                             " evict_in_advance=" + (evict_in_advance ? "true" 
: "false");
+        remove_file_blocks(to_evict, cache_lock, is_sync_removal, reason);
         *(_evict_by_self_lru_metrics_matrix[context.cache_type]) << 
cur_removed_size;
 
         if (is_overflow(removed_size, size, cur_cache_size, evict_in_advance)) 
{
@@ -1583,6 +1600,11 @@ void BlockFileCache::remove(FileBlockSPtr file_block, T& 
cache_lock, U& block_lo
     *_queue_evict_size_metrics[static_cast<int>(file_block->cache_type())]
             << file_block->range().size();
     *_total_evict_size_metrics << file_block->range().size();
+
+    VLOG_DEBUG << "Removing file block from cache. hash: " << hash.to_string()
+               << ", offset: " << offset << ", size: " << 
file_block->range().size()
+               << ", type: " << cache_type_to_string(type);
+
     if (file_block->state_unlock(block_lock) == FileBlock::State::DOWNLOADED) {
         FileCacheKey key;
         key.hash = hash;
@@ -2379,7 +2401,8 @@ bool BlockFileCache::try_reserve_during_async_load(size_t 
size,
     if (index_queue_size != 0) {
         collect_eliminate_fragments(get_queue(FileCacheType::INDEX));
     }
-    remove_file_blocks(to_evict, cache_lock, true);
+    std::string reason = "async load";
+    remove_file_blocks(to_evict, cache_lock, true, reason);
 
     return !_disk_resource_limit_mode || removed_size >= size;
 }
diff --git a/be/src/io/cache/block_file_cache.h 
b/be/src/io/cache/block_file_cache.h
index f73e593c57e..742ec7aaae4 100644
--- a/be/src/io/cache/block_file_cache.h
+++ b/be/src/io/cache/block_file_cache.h
@@ -478,7 +478,8 @@ private:
     bool is_overflow(size_t removed_size, size_t need_size, size_t 
cur_cache_size,
                      bool evict_in_advance) const;
 
-    void remove_file_blocks(std::vector<FileBlockCell*>&, 
std::lock_guard<std::mutex>&, bool sync);
+    void remove_file_blocks(std::vector<FileBlockCell*>&, 
std::lock_guard<std::mutex>&, bool sync,
+                            std::string& reason);
 
     void remove_file_blocks_and_clean_time_maps(std::vector<FileBlockCell*>&,
                                                 std::lock_guard<std::mutex>&);
diff --git a/be/src/io/fs/s3_file_reader.cpp b/be/src/io/fs/s3_file_reader.cpp
index 091768532b5..a20d448fa6d 100644
--- a/be/src/io/fs/s3_file_reader.cpp
+++ b/be/src/io/fs/s3_file_reader.cpp
@@ -32,6 +32,7 @@
 #include <utility>
 
 #include "common/compiler_util.h" // IWYU pragma: keep
+#include "io/cache/block_file_cache.h"
 #include "io/fs/err_utils.h"
 #include "io/fs/obj_storage_client.h"
 #include "io/fs/s3_common.h"
@@ -116,6 +117,9 @@ Status S3FileReader::read_at_impl(size_t offset, Slice 
result, size_t* bytes_rea
     size_t bytes_req = result.size;
     char* to = result.data;
     bytes_req = std::min(bytes_req, _file_size - offset);
+    VLOG_DEBUG << fmt::format("S3FileReader::read_at_impl offset={} size={} 
path={} hash={}",
+                              offset, result.size, _path.native(),
+                              
io::BlockFileCache::hash(_path.native()).to_string());
     VLOG_DEBUG << "enter s3 read_at_impl, off=" << offset << " n=" << bytes_req
                << " req=" << result.size << " file size=" << _file_size;
     if (UNLIKELY(bytes_req == 0)) {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to