This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.1 by this push:
     new 93100ef475a branch-4.1: [fix](filecache) avoid crash when late holder 
cleanup sees removed cache cell #62437 (#63956)
93100ef475a is described below

commit 93100ef475a03b850e23bb51d03bdac404c531ca
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Jun 2 19:18:55 2026 +0800

    branch-4.1: [fix](filecache) avoid crash when late holder cleanup sees 
removed cache cell #62437 (#63956)
    
    Cherry-picked from #62437
    
    Co-authored-by: zhengyu <[email protected]>
---
 be/src/io/cache/block_file_cache.cpp            |  32 ++++-
 be/test/io/cache/block_file_cache_test.cpp      | 173 ++++++++++++++++++++++++
 be/test/io/cache/block_file_cache_test_common.h |  12 +-
 3 files changed, 215 insertions(+), 2 deletions(-)

diff --git a/be/src/io/cache/block_file_cache.cpp 
b/be/src/io/cache/block_file_cache.cpp
index 1b49a592440..49d5a0ae0c4 100644
--- a/be/src/io/cache/block_file_cache.cpp
+++ b/be/src/io/cache/block_file_cache.cpp
@@ -1419,7 +1419,37 @@ void BlockFileCache::remove(FileBlockSPtr file_block, T& 
cache_lock, U& block_lo
     auto tablet_id = file_block->tablet_id();
     auto* cell = get_cell(hash, offset, cache_lock);
     file_block->cell = nullptr;
-    DCHECK(cell);
+    // Holder cleanup can race with prior cache metadata cleanup. In that case,
+    // skip the duplicate remove instead of touching a detached or replaced 
cell.
+    if (cell == nullptr) {
+        LOG(WARNING) << "remove skipped because cache cell is missing. hash=" 
<< hash.to_string()
+                     << " offset=" << offset << " size=" << 
file_block->range().size()
+                     << " type=" << cache_type_to_string(type)
+                     << " state=" << 
FileBlock::state_to_string(file_block->state_unsafe())
+                     << " expiration_time=" << expiration_time << " sync=" << 
sync;
+        return;
+    }
+    if (cell->file_block.get() != file_block.get()) {
+        auto* cell_file_block = cell->file_block.get();
+        LOG(WARNING)
+                << "remove skipped because cache cell points to a different 
file block. hash="
+                << hash.to_string() << " offset=" << offset
+                << " size=" << file_block->range().size() << " type=" << 
cache_type_to_string(type)
+                << " state=" << 
FileBlock::state_to_string(file_block->state_unsafe())
+                << " expiration_time=" << expiration_time << " sync=" << sync 
<< " cell_block_hash="
+                << (cell_file_block ? 
cell_file_block->get_hash_value().to_string() : "<null>")
+                << " cell_block_offset="
+                << (cell_file_block ? 
std::to_string(cell_file_block->offset()) : "<null>")
+                << " cell_block_size="
+                << (cell_file_block ? 
std::to_string(cell_file_block->range().size()) : "<null>")
+                << " cell_block_type="
+                << (cell_file_block ? 
cache_type_to_string(cell_file_block->cache_type())
+                                    : "<null>")
+                << " cell_block_state="
+                << (cell_file_block ? 
FileBlock::state_to_string(cell_file_block->state_unsafe())
+                                    : "<null>");
+        return;
+    }
     DCHECK(cell->queue_iterator);
     if (cell->queue_iterator) {
         auto& queue = get_queue(file_block->cache_type());
diff --git a/be/test/io/cache/block_file_cache_test.cpp 
b/be/test/io/cache/block_file_cache_test.cpp
index d31c6fa3cbf..091d2792b89 100644
--- a/be/test/io/cache/block_file_cache_test.cpp
+++ b/be/test/io/cache/block_file_cache_test.cpp
@@ -2723,6 +2723,179 @@ TEST_F(BlockFileCacheTest, remove_directly) {
     }
 }
 
+TEST_F(BlockFileCacheTest, late_holder_remove_skips_missing_cache_cell) {
+    if (fs::exists(cache_base_path)) {
+        fs::remove_all(cache_base_path);
+    }
+    fs::create_directories(cache_base_path);
+
+    io::FileCacheSettings settings;
+    settings.query_queue_size = 30;
+    settings.query_queue_elements = 5;
+    settings.index_queue_size = 30;
+    settings.index_queue_elements = 5;
+    settings.disposable_queue_size = 30;
+    settings.disposable_queue_elements = 5;
+    settings.capacity = 90;
+    settings.max_file_block_size = 30;
+    settings.max_query_cache_size = 0;
+
+    io::BlockFileCache cache(cache_base_path, settings);
+    ASSERT_TRUE(cache.initialize());
+    wait_until_cache_ready(cache);
+
+    io::CacheContext context;
+    ReadStatistics rstats;
+    context.stats = &rstats;
+    context.cache_type = io::FileCacheType::NORMAL;
+
+    auto key = 
io::BlockFileCache::hash("late-holder-remove-skips-missing-cache-cell");
+    auto holder = std::make_unique<FileBlocksHolder>(cache.get_or_set(key, 0, 
5, context));
+    auto blocks = fromHolder(*holder);
+    ASSERT_EQ(blocks.size(), 1);
+
+    auto file_block = blocks[0];
+    ASSERT_EQ(file_block->get_or_set_downloader(), 
io::FileBlock::get_caller_id());
+    download(file_block);
+    file_block->set_deleting();
+
+    ASSERT_EQ(cache._cur_cache_size, 5);
+    {
+        auto* cache_ptr = &cache;
+        SCOPED_CACHE_LOCK(cache_ptr->_mutex, cache_ptr);
+        auto file_it = cache._files.find(key);
+        ASSERT_NE(file_it, cache._files.end());
+        auto cell_it = file_it->second.find(0);
+        ASSERT_NE(cell_it, file_it->second.end());
+        auto& cell = cell_it->second;
+        ASSERT_TRUE(cell.queue_iterator.has_value());
+
+        auto& queue = cache.get_queue(file_block->cache_type());
+        queue.remove(*cell.queue_iterator, cache_lock);
+        cache._cur_cache_size -= file_block->range().size();
+
+        file_it->second.erase(cell_it);
+        if (file_it->second.empty()) {
+            cache._files.erase(file_it);
+        }
+    }
+
+    blocks.clear();
+    ASSERT_EQ(file_block.use_count(), 2);
+
+    holder.reset();
+
+    EXPECT_EQ(cache._cur_cache_size, 0);
+    EXPECT_EQ(cache.get_file_blocks_num(io::FileCacheType::NORMAL), 0);
+
+    file_block.reset();
+    if (fs::exists(cache_base_path)) {
+        fs::remove_all(cache_base_path);
+    }
+}
+
+TEST_F(BlockFileCacheTest, late_holder_remove_skips_replaced_cache_cell) {
+    if (fs::exists(cache_base_path)) {
+        fs::remove_all(cache_base_path);
+    }
+    fs::create_directories(cache_base_path);
+
+    io::FileCacheSettings settings;
+    settings.query_queue_size = 30;
+    settings.query_queue_elements = 5;
+    settings.index_queue_size = 30;
+    settings.index_queue_elements = 5;
+    settings.disposable_queue_size = 30;
+    settings.disposable_queue_elements = 5;
+    settings.capacity = 90;
+    settings.max_file_block_size = 30;
+    settings.max_query_cache_size = 0;
+
+    io::BlockFileCache cache(cache_base_path, settings);
+    ASSERT_TRUE(cache.initialize());
+    wait_until_cache_ready(cache);
+
+    io::CacheContext context;
+    ReadStatistics rstats;
+    context.stats = &rstats;
+    context.cache_type = io::FileCacheType::NORMAL;
+
+    auto key = 
io::BlockFileCache::hash("late-holder-remove-skips-replaced-cache-cell");
+    auto old_holder = std::make_unique<FileBlocksHolder>(cache.get_or_set(key, 
0, 5, context));
+    auto old_blocks = fromHolder(*old_holder);
+    ASSERT_EQ(old_blocks.size(), 1);
+
+    auto old_file_block = old_blocks[0];
+    ASSERT_EQ(old_file_block->get_or_set_downloader(), 
io::FileBlock::get_caller_id());
+    download(old_file_block);
+    old_file_block->set_deleting();
+
+    ASSERT_EQ(cache._cur_cache_size, 5);
+    {
+        auto* cache_ptr = &cache;
+        SCOPED_CACHE_LOCK(cache_ptr->_mutex, cache_ptr);
+        auto file_it = cache._files.find(key);
+        ASSERT_NE(file_it, cache._files.end());
+        auto cell_it = file_it->second.find(0);
+        ASSERT_NE(cell_it, file_it->second.end());
+        auto& cell = cell_it->second;
+        ASSERT_TRUE(cell.queue_iterator.has_value());
+
+        auto& queue = cache.get_queue(old_file_block->cache_type());
+        queue.remove(*cell.queue_iterator, cache_lock);
+        cache._cur_cache_size -= old_file_block->range().size();
+
+        file_it->second.erase(cell_it);
+        if (file_it->second.empty()) {
+            cache._files.erase(file_it);
+        }
+    }
+
+    old_blocks.clear();
+    ASSERT_EQ(old_file_block.use_count(), 2);
+
+    auto new_holder = std::make_unique<FileBlocksHolder>(cache.get_or_set(key, 
0, 5, context));
+    auto new_blocks = fromHolder(*new_holder);
+    ASSERT_EQ(new_blocks.size(), 1);
+
+    auto new_file_block = new_blocks[0];
+    ASSERT_NE(new_file_block.get(), old_file_block.get());
+    ASSERT_EQ(cache._cur_cache_size, 5);
+    {
+        auto* cache_ptr = &cache;
+        SCOPED_CACHE_LOCK(cache_ptr->_mutex, cache_ptr);
+        auto* cell = cache.get_cell(key, 0, cache_lock);
+        ASSERT_NE(cell, nullptr);
+        ASSERT_EQ(cell->file_block.get(), new_file_block.get());
+    }
+
+    old_holder.reset();
+
+    EXPECT_EQ(old_file_block->cell, nullptr);
+    EXPECT_EQ(cache._cur_cache_size, 5);
+    EXPECT_EQ(cache.get_file_blocks_num(io::FileCacheType::NORMAL), 1);
+    {
+        auto* cache_ptr = &cache;
+        SCOPED_CACHE_LOCK(cache_ptr->_mutex, cache_ptr);
+        auto* cell = cache.get_cell(key, 0, cache_lock);
+        ASSERT_NE(cell, nullptr);
+        EXPECT_EQ(cell->file_block.get(), new_file_block.get());
+    }
+
+    new_blocks.clear();
+    new_file_block->set_deleting();
+    new_file_block.reset();
+    new_holder.reset();
+    old_file_block.reset();
+
+    EXPECT_EQ(cache._cur_cache_size, 0);
+    EXPECT_EQ(cache.get_file_blocks_num(io::FileCacheType::NORMAL), 0);
+
+    if (fs::exists(cache_base_path)) {
+        fs::remove_all(cache_base_path);
+    }
+}
+
 TEST_F(BlockFileCacheTest, test_factory_1) {
     std::string cache_path2 = caches_dir / "cache2" / "";
     std::string cache_path3 = caches_dir / "cache3" / "";
diff --git a/be/test/io/cache/block_file_cache_test_common.h 
b/be/test/io/cache/block_file_cache_test_common.h
index de29f5543d7..17de4bb814b 100644
--- a/be/test/io/cache/block_file_cache_test_common.h
+++ b/be/test/io/cache/block_file_cache_test_common.h
@@ -94,6 +94,16 @@ extern void complete_into_memory(const io::FileBlocksHolder& 
holder);
 extern void test_file_cache(io::FileCacheType cache_type);
 extern void test_file_cache_memory_storage(io::FileCacheType cache_type);
 
+inline void wait_until_cache_ready(io::BlockFileCache& cache) {
+    for (int i = 0; i < 100; ++i) {
+        if (cache.get_async_open_success()) {
+            break;
+        }
+        std::this_thread::sleep_for(std::chrono::milliseconds(1));
+    }
+    ASSERT_TRUE(cache.get_async_open_success());
+}
+
 class BlockFileCacheTest : public testing::Test {
 public:
     static void SetUpTestSuite() {
@@ -133,4 +143,4 @@ private:
     inline static std::unique_ptr<FileCacheFactory> factory = 
std::make_unique<FileCacheFactory>();
 };
 
-} // end of namespace doris::io
\ No newline at end of file
+} // end of namespace doris::io


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to