github-actions[bot] commented on code in PR #64186:
URL: https://github.com/apache/doris/pull/64186#discussion_r3369747439


##########
be/src/io/cache/block_file_cache.cpp:
##########
@@ -941,6 +870,129 @@ FileBlocksHolder BlockFileCache::get_or_set(const 
UInt128Wrapper& hash, size_t o
     return FileBlocksHolder(std::move(file_blocks));
 }
 
+void BlockFileCache::enqueue_hash_for_cleanup(const UInt128Wrapper& hash) {
+    if (_storage->get_type() != DISK) {
+        return;
+    }
+    bool ret = _recycle_hashes.enqueue(hash);
+    if (!ret) {
+        LOG_WARNING("Failed to push recycle hash to queue, hash={}", 
hash.to_string());
+    }
+}
+
+CacheContext BlockFileCache::make_cell_context(
+        const UInt128Wrapper& hash, const CacheContext& context,
+        std::lock_guard<std::mutex>& /* cache_lock */) const {
+    CacheContext result = context;
+    if (result.storage_expiration_time < 0) {
+        result.storage_expiration_time = result.expiration_time;
+    }
+
+    auto file_it = _files.find(hash);
+    if (file_it != _files.end() && !file_it->second.empty()) {
+        const auto& file_block = file_it->second.begin()->second.file_block;
+        result.cache_type = file_block->cache_type();
+        result.expiration_time = file_block->expiration_time();
+        if (context.storage_expiration_time < 0) {
+            result.storage_expiration_time = 
file_block->storage_expiration_time();
+        }
+    } else if (auto ttl_it = _key_to_time.find(hash); ttl_it != 
_key_to_time.end()) {
+        result.cache_type = FileCacheType::TTL;
+        result.expiration_time = ttl_it->second;
+    }
+
+    if (result.expiration_time == 0 && result.cache_type == 
FileCacheType::TTL) {
+        result.cache_type = FileCacheType::NORMAL;
+    } else if (result.expiration_time != 0 && result.cache_type != 
FileCacheType::TTL) {
+        result.cache_type = FileCacheType::TTL;
+    }
+    return result;
+}
+
+void BlockFileCache::erase_key_expiration_time(const UInt128Wrapper& hash, 
uint64_t expiration_time,
+                                               std::lock_guard<std::mutex>& /* 
cache_lock */) {
+    auto time_to_key_iter = _time_to_key.equal_range(expiration_time);
+    while (time_to_key_iter.first != time_to_key_iter.second) {
+        if (time_to_key_iter.first->second == hash) {
+            time_to_key_iter.first = 
_time_to_key.erase(time_to_key_iter.first);
+            break;
+        }
+        time_to_key_iter.first++;
+    }
+}
+
+Status BlockFileCache::update_cell_logical_meta(FileBlockCell& cell, 
FileCacheType new_type,
+                                                uint64_t new_expiration_time,
+                                                std::lock_guard<std::mutex>& 
cache_lock) {
+    auto origin_type = cell.file_block->cache_type();
+    
RETURN_IF_ERROR(cell.file_block->update_expiration_time(new_expiration_time));
+    if (origin_type == new_type) {
+        return Status::OK();
+    }
+    if (new_type == FileCacheType::TTL || origin_type == FileCacheType::TTL) {
+        
RETURN_IF_ERROR(cell.file_block->change_cache_type_between_ttl_and_others(new_type));
+    } else {
+        RETURN_IF_ERROR(cell.file_block->update_logical_cache_type(new_type));
+    }
+
+    if (cell.queue_iterator) {
+        auto& origin_queue = get_queue(origin_type);
+        origin_queue.remove(cell.queue_iterator.value(), cache_lock);
+        _lru_recorder->record_queue_event(origin_type, CacheLRULogType::REMOVE,
+                                          cell.file_block->get_hash_value(),
+                                          cell.file_block->offset(), 
cell.size());
+    }
+    auto& new_queue = get_queue(new_type);
+    cell.queue_iterator =
+            new_queue.add(cell.file_block->get_hash_value(), 
cell.file_block->offset(),
+                          cell.file_block->range().size(), cache_lock);
+    _lru_recorder->record_queue_event(new_type, CacheLRULogType::ADD,
+                                      cell.file_block->get_hash_value(), 
cell.file_block->offset(),
+                                      cell.size());
+    if (origin_type == FileCacheType::TTL) {
+        _cur_ttl_size -= cell.size();
+    }
+    if (new_type == FileCacheType::TTL) {
+        _cur_ttl_size += cell.size();
+    }
+    return Status::OK();
+}
+
+void BlockFileCache::update_hash_logical_meta(const UInt128Wrapper& hash, 
FileCacheType new_type,
+                                              uint64_t new_expiration_time,
+                                              std::lock_guard<std::mutex>& 
cache_lock) {
+    if (auto iter = _files.find(hash); iter != _files.end()) {
+        for (auto& [_, cell] : iter->second) {
+            auto st = update_cell_logical_meta(cell, new_type, 
new_expiration_time, cache_lock);
+            if (!st.ok()) {
+                LOG_WARNING("Failed to update logical file cache meta. 
hash={}, expiration_time={}",
+                            hash.to_string(), new_expiration_time)
+                        .error(st);
+            }
+        }
+    }
+
+    if (auto iter = _key_to_time.find(hash); iter != _key_to_time.end()) {
+        erase_key_expiration_time(hash, iter->second, cache_lock);
+        if (new_expiration_time == 0) {
+            _key_to_time.erase(iter);
+        } else {
+            iter->second = new_expiration_time;
+            _time_to_key.insert(std::make_pair(new_expiration_time, hash));
+        }
+    } else if (new_expiration_time != 0) {
+        _key_to_time[hash] = new_expiration_time;
+        _time_to_key.insert(std::make_pair(new_expiration_time, hash));

Review Comment:
   This now records TTL metadata even when `hash` has no cached blocks. The old 
`modify_expiration_time()` only inserted `_key_to_time` in the branch where 
`_files.find(hash)` succeeded; with this helper, the `else if` runs for any 
positive expiration. In production, `CloudTablet::sync_meta()` calls 
`modify_expiration_time()` for every segment/index key on TTL changes and 
`FileCacheFactory::get_by_path()` always returns a cache shard, so uncached 
rowset files can populate `_key_to_time` without any `_files` entry. Later 
`make_cell_context()` treats that stale entry as authoritative and new cache 
blocks for the hash are created as TTL with this expiration, even though no 
cached block existed when the metadata change was applied. Please keep the 
previous guard by only creating a new `_key_to_time` entry when `_files` 
contains the hash, or have `modify_expiration_time()` return without side 
effects for absent hashes.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to