This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new ed6e36df57c branch-4.1: [fix](filecache) add async lru update 
machanism and fix partial hit in cache reader(pick#61083) (#64234)
ed6e36df57c is described below

commit ed6e36df57c0bdf90f7f06ba5f2477e11529902b
Author: zhengyu <[email protected]>
AuthorDate: Tue Jun 9 10:20:31 2026 +0800

    branch-4.1: [fix](filecache) add async lru update machanism and fix partial 
hit in cache reader(pick#61083) (#64234)
    
    Original PR: https://github.com/apache/doris/pull/61083
    Picked to: branch-4.1
    Pick branch: freemandealer:pick-branch-4.1-pr-61083
    
    Validation:
    - git diff --check
    - build-support/check-format.sh with clang-format 16
    
    Notes:
    The first two original commits were already present in branch-4.1 and
    were skipped as empty; picked the remaining commits.
---
 be/src/common/config.cpp                           |  2 +-
 be/src/io/cache/block_file_cache.h                 |  3 +++
 .../cached_remote_file_reader_lock_wait_test.cpp   | 29 +++++++++++++++++-----
 .../io/cache/cached_remote_file_reader_test.cpp    |  1 +
 4 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index 37f47066640..90618889956 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1181,7 +1181,7 @@ DEFINE_mInt32(file_cache_evict_in_advance_interval_ms, 
"1000");
 DEFINE_mInt64(file_cache_evict_in_advance_batch_bytes, "31457280"); // 30MB
 DEFINE_mInt64(file_cache_evict_in_advance_recycle_keys_num_threshold, "1000");
 
-DEFINE_mBool(enable_read_cache_file_directly, "false");
+DEFINE_mBool(enable_read_cache_file_directly, "true");
 DEFINE_mBool(file_cache_enable_evict_from_other_queue_by_size, "true");
 // If true, evict the ttl cache using LRU when full.
 // Otherwise, only expiration can evict ttl and new data won't add to cache 
when full.
diff --git a/be/src/io/cache/block_file_cache.h 
b/be/src/io/cache/block_file_cache.h
index dae25bfaa52..50924101817 100644
--- a/be/src/io/cache/block_file_cache.h
+++ b/be/src/io/cache/block_file_cache.h
@@ -316,6 +316,9 @@ public:
 
     // for be UTs
     std::map<std::string, double> get_stats_unsafe();
+    [[nodiscard]] size_t need_update_lru_blocks_size_unsafe() const {
+        return _need_update_lru_blocks.size();
+    }
 
     using AccessRecord =
             std::unordered_map<AccessKeyAndOffset, LRUQueue::Iterator, 
KeyAndOffsetHash>;
diff --git a/be/test/io/cache/cached_remote_file_reader_lock_wait_test.cpp 
b/be/test/io/cache/cached_remote_file_reader_lock_wait_test.cpp
index 6b788534167..97b846a1db2 100644
--- a/be/test/io/cache/cached_remote_file_reader_lock_wait_test.cpp
+++ b/be/test/io/cache/cached_remote_file_reader_lock_wait_test.cpp
@@ -86,6 +86,7 @@ private:
 
 struct LockWaitSummary {
     int64_t total_ns {0};
+    int64_t trimmed_total_ns {0};
     int64_t max_ns {0};
     int64_t p50_ns {0};
     int64_t p95_ns {0};
@@ -109,6 +110,9 @@ LockWaitSummary summarize_lock_wait(std::vector<int64_t>* 
values) {
     }
     std::sort(values->begin(), values->end());
     summary.total_ns = std::accumulate(values->begin(), values->end(), int64_t 
{0});
+    const size_t trimmed_count = std::max<size_t>(1, values->size() * 95 / 
100);
+    summary.trimmed_total_ns =
+            std::accumulate(values->begin(), values->begin() + trimmed_count, 
int64_t {0});
     summary.max_ns = values->back();
     summary.p50_ns = get_percentile_value(*values, 0.50);
     summary.p95_ns = get_percentile_value(*values, 0.95);
@@ -136,6 +140,7 @@ struct LockWaitWorkloadResult {
     size_t warmup_failed_reads {0};
     size_t failed_reads {0};
     size_t sample_count {0};
+    size_t pending_lru_update_blocks {0};
 };
 
 size_t calc_thread_count() {
@@ -233,6 +238,7 @@ protected:
         FileReaderOptions opts;
         opts.cache_type = FileCachePolicy::FILE_BLOCK_CACHE;
         opts.is_doris_table = true;
+        opts.tablet_id = 10086;
 
         for (size_t i = 0; i < config.file_count; ++i) {
             std::string path =
@@ -326,6 +332,7 @@ protected:
 
         result.sample_count = merged_samples.size();
         result.summary = summarize_lock_wait(&merged_samples);
+        result.pending_lru_update_blocks = 
_cache->need_update_lru_blocks_size_unsafe();
         return result;
     }
 
@@ -370,15 +377,19 @@ TEST_F(CachedRemoteFileReaderLockWaitTest,
     EXPECT_GT(result.summary.non_zero_samples, 0);
 }
 
-TEST_F(CachedRemoteFileReaderLockWaitTest, 
AsyncTouchOnGetOrSetReducesLockWait) {
+TEST_F(CachedRemoteFileReaderLockWaitTest, 
AsyncTouchOnGetOrSetDefersLruUpdate) {
     const bool original_direct_read = config::enable_read_cache_file_directly;
     const bool original_async_touch = 
config::enable_file_cache_async_touch_on_get_or_set;
-    Defer defer {[original_direct_read, original_async_touch] {
+    const auto original_update_interval_ms =
+            config::file_cache_background_block_lru_update_interval_ms;
+    Defer defer {[original_direct_read, original_async_touch, 
original_update_interval_ms] {
         config::enable_read_cache_file_directly = original_direct_read;
         config::enable_file_cache_async_touch_on_get_or_set = 
original_async_touch;
+        config::file_cache_background_block_lru_update_interval_ms = 
original_update_interval_ms;
     }};
 
     config::enable_read_cache_file_directly = false;
+    config::file_cache_background_block_lru_update_interval_ms = 60 * 60 * 
1000;
 
     LockWaitWorkloadConfig workload;
     workload.file_count = 1536;
@@ -406,22 +417,28 @@ TEST_F(CachedRemoteFileReaderLockWaitTest, 
AsyncTouchOnGetOrSetReducesLockWait)
     EXPECT_EQ(async_result.sample_count, workload.thread_count * 
workload.ops_per_thread);
 
     LOG(INFO) << "sync_touch lock wait: total_ns=" << 
sync_result.summary.total_ns
+              << " trimmed_total_ns=" << sync_result.summary.trimmed_total_ns
               << " avg_ns=" << sync_result.summary.avg_ns
               << " p95_ns=" << sync_result.summary.p95_ns
               << " p99_ns=" << sync_result.summary.p99_ns
-              << " non_zero_samples=" << sync_result.summary.non_zero_samples;
+              << " non_zero_samples=" << sync_result.summary.non_zero_samples
+              << " pending_lru_update_blocks=" << 
sync_result.pending_lru_update_blocks;
     LOG(INFO) << "async_touch lock wait: total_ns=" << 
async_result.summary.total_ns
+              << " trimmed_total_ns=" << async_result.summary.trimmed_total_ns
               << " avg_ns=" << async_result.summary.avg_ns
               << " p95_ns=" << async_result.summary.p95_ns
               << " p99_ns=" << async_result.summary.p99_ns
-              << " non_zero_samples=" << async_result.summary.non_zero_samples;
+              << " non_zero_samples=" << async_result.summary.non_zero_samples
+              << " pending_lru_update_blocks=" << 
async_result.pending_lru_update_blocks;
 
     EXPECT_GT(sync_result.summary.total_ns, 0);
     EXPECT_GT(async_result.summary.total_ns, 0);
     EXPECT_GT(sync_result.summary.non_zero_samples, 0);
     EXPECT_GT(async_result.summary.non_zero_samples, 0);
-    EXPECT_LT(async_result.summary.total_ns, sync_result.summary.total_ns);
-    EXPECT_LT(async_result.summary.p95_ns, sync_result.summary.p95_ns);
+    // Aggregate timing comparisons are scheduler-sensitive on busy CI hosts. 
Verify the
+    // deterministic behavior instead: async touch defers hit-block LRU 
updates out of get_or_set.
+    EXPECT_EQ(sync_result.pending_lru_update_blocks, 0);
+    EXPECT_GT(async_result.pending_lru_update_blocks, 0);
 }
 
 } // namespace doris::io
diff --git a/be/test/io/cache/cached_remote_file_reader_test.cpp 
b/be/test/io/cache/cached_remote_file_reader_test.cpp
index 39da1bfb6fd..84cce46c998 100644
--- a/be/test/io/cache/cached_remote_file_reader_test.cpp
+++ b/be/test/io/cache/cached_remote_file_reader_test.cpp
@@ -54,6 +54,7 @@ TEST_F(BlockFileCacheTest,
     io::FileReaderOptions opts;
     opts.cache_type = io::cache_type_from_string("file_block_cache");
     opts.is_doris_table = true;
+    opts.tablet_id = 10086;
 
     {
         FileReaderSPtr local_reader;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to