This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 88530bf9437 [improve](path gc) Execute path gc interval adaptive to
disk size (#34538)
88530bf9437 is described below
commit 88530bf9437e60190f198252ab82f53fa53d4c10
Author: deardeng <[email protected]>
AuthorDate: Fri May 10 21:32:34 2024 +0800
[improve](path gc) Execute path gc interval adaptive to disk size (#34538)
---
be/src/olap/olap_server.cpp | 40 +++++++++++++++++++++++++++++++++++-----
be/src/olap/storage_engine.h | 2 ++
2 files changed, 37 insertions(+), 5 deletions(-)
diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp
index 8a687a64a92..f4eb28233a8 100644
--- a/be/src/olap/olap_server.cpp
+++ b/be/src/olap/olap_server.cpp
@@ -398,20 +398,50 @@ void
StorageEngine::_unused_rowset_monitor_thread_callback() {
} while
(!_stop_background_threads_latch.wait_for(std::chrono::seconds(interval)));
}
+int32_t StorageEngine::_auto_get_interval_by_disk_capacity(DataDir* data_dir) {
+ double disk_used = data_dir->get_usage(0);
+ double remain_used = 1 - disk_used;
+ DCHECK(remain_used >= 0 && remain_used <= 1);
+ DCHECK(config::path_gc_check_interval_second >= 0);
+ int32_t ret = 0;
+ if (remain_used > 0.9) {
+ // if config::path_gc_check_interval_second == 24h
+ ret = config::path_gc_check_interval_second;
+ } else if (remain_used > 0.7) {
+ // 12h
+ ret = config::path_gc_check_interval_second / 2;
+ } else if (remain_used > 0.5) {
+ // 6h
+ ret = config::path_gc_check_interval_second / 4;
+ } else if (remain_used > 0.3) {
+ // 4h
+ ret = config::path_gc_check_interval_second / 6;
+ } else {
+ // 3h
+ ret = config::path_gc_check_interval_second / 8;
+ }
+ return ret;
+}
+
void StorageEngine::_path_gc_thread_callback(DataDir* data_dir) {
LOG(INFO) << "try to start path gc thread!";
- int32_t interval = config::path_gc_check_interval_second;
+ int32_t last_exec_time = 0;
do {
- LOG(INFO) << "try to perform path gc!";
- data_dir->perform_path_gc();
+ int32_t current_time = time(nullptr);
- interval = config::path_gc_check_interval_second;
+ int32_t interval = _auto_get_interval_by_disk_capacity(data_dir);
if (interval <= 0) {
LOG(WARNING) << "path gc thread check interval config is illegal:"
<< interval
<< "will be forced set to half hour";
interval = 1800; // 0.5 hour
}
- } while
(!_stop_background_threads_latch.wait_for(std::chrono::seconds(interval)));
+ if (current_time - last_exec_time >= interval) {
+ LOG(INFO) << "try to perform path gc! disk remain [" << 1 -
data_dir->get_usage(0)
+ << "] internal [" << interval << "]";
+ data_dir->perform_path_gc();
+ last_exec_time = time(nullptr);
+ }
+ } while
(!_stop_background_threads_latch.wait_for(std::chrono::seconds(5)));
LOG(INFO) << "stop path gc thread!";
}
diff --git a/be/src/olap/storage_engine.h b/be/src/olap/storage_engine.h
index 60ef681c3c5..860886bc9b7 100644
--- a/be/src/olap/storage_engine.h
+++ b/be/src/olap/storage_engine.h
@@ -383,6 +383,8 @@ private:
int _get_and_set_next_disk_index(int64 partition_id, TStorageMedium::type
storage_medium);
+ int32_t _auto_get_interval_by_disk_capacity(DataDir* data_dir);
+
private:
EngineOptions _options;
std::mutex _store_lock;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]