freemandealer commented on code in PR #64496:
URL: https://github.com/apache/doris/pull/64496#discussion_r3412479792
##########
be/src/io/cache/block_file_cache.cpp:
##########
@@ -2189,6 +2140,154 @@ void BlockFileCache::run_background_ttl_gc() {
}
}
+size_t BlockFileCache::repair_duplicate_ttl_dirs_once() {
+ if (!config::enable_file_cache_ttl_repair_checker ||
+ _storage->get_type() != FileCacheStorageType::DISK ||
!_async_open_done) {
+ return 0;
+ }
+
+ const int64_t max_scan_prefix_dirs = std::max<int64_t>(
+ 0,
config::file_cache_ttl_repair_checker_max_scan_prefix_dirs_per_round);
+ if (max_scan_prefix_dirs == 0) {
+ return 0;
+ }
+
+ int64_t duration_ns = 0;
+ std::vector<FileCacheDuplicateKeyDirs> duplicate_dirs;
+ size_t next_prefix_index = _ttl_repair_checker_next_prefix_index;
+ size_t scanned_prefix_dirs = 0;
+ {
+ SCOPED_RAW_TIMER(&duration_ns);
+ auto st =
_storage->list_duplicate_key_dirs(_ttl_repair_checker_next_prefix_index,
+ max_scan_prefix_dirs,
&duplicate_dirs,
+ &next_prefix_index,
&scanned_prefix_dirs);
+ if (!st.ok()) {
+ LOG(WARNING) << "Failed to scan duplicate ttl dirs for file cache
" << _cache_base_path
+ << ", error=" << st;
+ *_ttl_repair_checker_latency_us << (duration_ns / 1000);
+ return 0;
+ }
+ _ttl_repair_checker_next_prefix_index = next_prefix_index;
+ }
+ *_ttl_repair_checker_scanned_prefix_dirs << scanned_prefix_dirs;
+ *_ttl_repair_checker_suspect_hashes << duplicate_dirs.size();
+
+ const int64_t max_repairs =
+ std::max<int64_t>(0,
config::file_cache_ttl_repair_checker_max_repairs_per_round);
+ const int64_t repair_sleep_ms =
+ std::max<int64_t>(0,
config::file_cache_ttl_repair_checker_repair_sleep_ms);
+ if (max_repairs == 0) {
+ *_ttl_repair_checker_latency_us << (duration_ns / 1000);
+ return 0;
+ }
+
+ const auto sleep_per_repair = std::chrono::milliseconds(repair_sleep_ms);
+ size_t repaired_dirs = 0;
+ size_t skipped_hashes = 0;
+ for (const auto& duplicate : duplicate_dirs) {
+ if (_close || repaired_dirs >= static_cast<size_t>(max_repairs)) {
+ break;
+ }
+
+ uint64_t canonical_expiration_time = 0;
+ bool should_repair = false;
+ {
+ SCOPED_CACHE_LOCK(_mutex, this);
+ auto file_iter = _files.find(duplicate.hash);
+ if (file_iter == _files.end() || file_iter->second.empty()) {
+ ++skipped_hashes;
+ continue;
+ }
+
+ bool has_canonical = false;
+ bool has_unstable_block = false;
+ bool has_multiple_expiration_times = false;
+ for (auto& [_, cell] : file_iter->second) {
+ std::lock_guard block_lock(cell.file_block->_mutex);
+ if (cell.file_block->state_unlock(block_lock) !=
FileBlock::State::DOWNLOADED) {
+ has_unstable_block = true;
+ break;
+ }
+
+ const auto block_expiration_time =
cell.file_block->expiration_time();
+ if (!has_canonical) {
+ canonical_expiration_time = block_expiration_time;
+ has_canonical = true;
+ } else if (canonical_expiration_time != block_expiration_time)
{
+ has_multiple_expiration_times = true;
+ break;
+ }
+ }
+
+ should_repair =
+ has_canonical && !has_unstable_block &&
!has_multiple_expiration_times &&
+ std::find(duplicate.expiration_times.begin(),
duplicate.expiration_times.end(),
+ canonical_expiration_time) !=
duplicate.expiration_times.end();
+ if (!should_repair) {
+ ++skipped_hashes;
+ }
+ }
+
+ if (!should_repair) {
+ continue;
+ }
+
+ for (auto expiration_time : duplicate.expiration_times) {
+ if (_close || repaired_dirs >= static_cast<size_t>(max_repairs)) {
+ break;
+ }
Review Comment:
this is a trade off for avoid lock overhead
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]