This is an automated email from the ASF dual-hosted git repository.
zouxinyi pushed a commit to branch branch-1.1-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.1-lts by this push:
new 15296fa35b [improvement](tcmalloc) add moderate mode and avoid oom
(#14650)
15296fa35b is described below
commit 15296fa35be75c700d6d872f9d0373355e6bb2fb
Author: Yongqiang YANG <[email protected]>
AuthorDate: Mon Nov 28 23:22:18 2022 +0800
[improvement](tcmalloc) add moderate mode and avoid oom (#14650)
ReleaseToSystem aggressively when there are little free memory.
From #14374.
---
be/src/common/daemon.cpp | 115 ++++++++++++++++++++++----
be/src/runtime/memory/mem_tracker_limiter.cpp | 2 +
be/src/runtime/memory/mem_tracker_limiter.h | 6 ++
be/src/service/doris_main.cpp | 21 ++---
4 files changed, 115 insertions(+), 29 deletions(-)
diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp
index ed5fe06ffa..fc49853bb5 100644
--- a/be/src/common/daemon.cpp
+++ b/be/src/common/daemon.cpp
@@ -70,28 +70,109 @@ bool k_doris_exit = false;
void Daemon::tcmalloc_gc_thread() {
// TODO All cache GC wish to be supported
- size_t tc_use_memory_min = MemInfo::mem_limit();
+ // Limit size of tcmalloc cache via release_rate and max_cache_percent.
+ // We adjust release_rate according to memory_pressure, which is usage
percent of memory.
+ int64_t max_cache_percent = 60;
+ double release_rates[10] = {1.0, 1.0, 1.0, 5.0, 5.0, 20.0, 50.0, 100.0,
500.0, 2000.0};
+ int64_t pressure_limit = 90;
+ bool is_performance_mode = false;
+ size_t physical_limit_bytes = std::min(MemInfo::hard_mem_limit(),
MemInfo::mem_limit());
+
if (config::memory_mode == std::string("performance")) {
- tc_use_memory_min = std::max(tc_use_memory_min / 10 * 9,
- tc_use_memory_min - size_t(10) * 1024 *
1024 * 1024);
- } else {
- tc_use_memory_min = tc_use_memory_min >> 1;
+ max_cache_percent = 100;
+ pressure_limit = 90;
+ is_performance_mode = true;
+ physical_limit_bytes = std::min(MemInfo::mem_limit(),
MemInfo::physical_mem());
+ } else if (config::memory_mode == std::string("compact")) {
+ max_cache_percent = 20;
+ pressure_limit = 80;
}
- while
(!_stop_background_threads_latch.wait_for(MonoDelta::FromSeconds(10))) {
- size_t used_size = 0;
- size_t free_size = 0;
+ int last_ms = 0;
+ const int kMaxLastMs = 30000;
+ const int kIntervalMs = 10;
+ size_t init_aggressive_decommit = 0;
+ size_t current_aggressive_decommit = 0;
+ size_t expected_aggressive_decommit = 0;
+ int64_t last_memory_pressure = 0;
+
+
MallocExtension::instance()->GetNumericProperty("tcmalloc.aggressive_memory_decommit",
+ &init_aggressive_decommit);
+ current_aggressive_decommit = init_aggressive_decommit;
+
+ while
(!_stop_background_threads_latch.wait_for(MonoDelta::FromMilliseconds(kIntervalMs)))
{
+ size_t tc_used_bytes = 0;
+ size_t tc_alloc_bytes = 0;
+ size_t rss = PerfCounters::get_vm_rss();
+
+
MallocExtension::instance()->GetNumericProperty("generic.total_physical_bytes",
+ &tc_alloc_bytes);
+
MallocExtension::instance()->GetNumericProperty("generic.current_allocated_bytes",
+ &tc_used_bytes);
+ int64_t tc_cached_bytes = tc_alloc_bytes - tc_used_bytes;
+ int64_t to_free_bytes =
+ (int64_t)tc_cached_bytes - (tc_used_bytes * max_cache_percent
/ 100);
+
+ int64_t memory_pressure = 0;
+ int64_t alloc_bytes = std::max(rss, tc_alloc_bytes);
+ memory_pressure = alloc_bytes * 100 / physical_limit_bytes;
+
+ expected_aggressive_decommit = init_aggressive_decommit;
+ if (memory_pressure > pressure_limit) {
+ // We are reaching oom, so release cache aggressively.
+ // Ideally, we should reuse cache and not allocate from system any
more,
+ // however, it is hard to set limit on cache of tcmalloc and doris
+ // use mmap in vectorized mode.
+ if (last_memory_pressure <= pressure_limit) {
+ int64_t min_free_bytes = alloc_bytes - physical_limit_bytes *
9 / 10;
+ to_free_bytes = std::max(to_free_bytes, min_free_bytes);
+ to_free_bytes = std::max(to_free_bytes, tc_cached_bytes * 30 /
100);
+ to_free_bytes = std::min(to_free_bytes, tc_cached_bytes);
+ expected_aggressive_decommit = 1;
+ } else {
+ // release rate is enough.
+ to_free_bytes = 0;
+ }
+ last_ms = kMaxLastMs;
+ } else if (memory_pressure > (pressure_limit - 10)) {
+ if (last_memory_pressure <= (pressure_limit - 10)) {
+ to_free_bytes = std::max(to_free_bytes, tc_cached_bytes * 10 /
100);
+ } else {
+ to_free_bytes = 0;
+ }
+ }
+
+ int release_rate_index = memory_pressure / 10;
+ double release_rate = 1.0;
+ if (release_rate_index >= sizeof(release_rates)) {
+ release_rate = 2000.0;
+ } else {
+ release_rate = release_rates[release_rate_index];
+ }
+ MallocExtension::instance()->SetMemoryReleaseRate(release_rate);
-
MallocExtension::instance()->GetNumericProperty("generic.current_allocated_bytes",
- &used_size);
-
MallocExtension::instance()->GetNumericProperty("tcmalloc.pageheap_free_bytes",
&free_size);
- size_t alloc_size = used_size + free_size;
+ if ((current_aggressive_decommit != expected_aggressive_decommit) &&
!is_performance_mode) {
+
MallocExtension::instance()->SetNumericProperty("tcmalloc.aggressive_memory_decommit",
+
expected_aggressive_decommit);
+ current_aggressive_decommit = expected_aggressive_decommit;
+ }
- if (alloc_size > tc_use_memory_min) {
- size_t max_free_size = alloc_size * 20 / 100;
- if (free_size > max_free_size) {
- MallocExtension::instance()->ReleaseToSystem(free_size -
max_free_size);
- }
+ last_memory_pressure = memory_pressure;
+ if (to_free_bytes > 0) {
+ last_ms += kIntervalMs;
+ if (last_ms >= kMaxLastMs) {
+ LOG(INFO) << "generic.current_allocated_bytes " <<
tc_used_bytes
+ << ", generic.total_physical_bytes " <<
tc_alloc_bytes << ", rss " << rss
+ << ", max_cache_percent " << max_cache_percent << ",
release_rate "
+ << release_rate << ", memory_pressure " <<
memory_pressure
+ << ", physical_limit_bytes " << physical_limit_bytes
<< ", to_free_bytes "
+ << to_free_bytes << ", current_aggressive_decommit "
+ << current_aggressive_decommit;
+ MallocExtension::instance()->ReleaseToSystem(to_free_bytes);
+ last_ms = 0;
+ }
+ } else {
+ last_ms = 0;
}
}
}
diff --git a/be/src/runtime/memory/mem_tracker_limiter.cpp
b/be/src/runtime/memory/mem_tracker_limiter.cpp
index 48a2102e22..35680fc17f 100644
--- a/be/src/runtime/memory/mem_tracker_limiter.cpp
+++ b/be/src/runtime/memory/mem_tracker_limiter.cpp
@@ -29,6 +29,8 @@
namespace doris {
+bool MemTrackerLimiter::_oom_avoidance {true};
+
MemTrackerLimiter::MemTrackerLimiter(int64_t byte_limit, const std::string&
label,
const std::shared_ptr<MemTrackerLimiter>&
parent,
RuntimeProfile* profile) {
diff --git a/be/src/runtime/memory/mem_tracker_limiter.h
b/be/src/runtime/memory/mem_tracker_limiter.h
index 73aaa8e500..75261e6471 100644
--- a/be/src/runtime/memory/mem_tracker_limiter.h
+++ b/be/src/runtime/memory/mem_tracker_limiter.h
@@ -67,6 +67,9 @@ public:
public:
static bool sys_mem_exceed_limit_check(int64_t bytes) {
+ if (!_oom_avoidance) {
+ return false;
+ }
// Limit process memory usage using the actual physical memory of the
process in `/proc/self/status`.
// This is independent of the consumption value of the mem tracker,
which counts the virtual memory
// of the process malloc.
@@ -116,6 +119,8 @@ public:
// Returns the lowest limit for this tracker limiter and its ancestors.
Returns -1 if there is no limit.
int64_t get_lowest_limit() const;
+ static void disable_oom_avoidance() { _oom_avoidance = false; }
+
public:
// up to (but not including) end_tracker.
// This happens when we want to update tracking on a particular mem
tracker but the consumption
@@ -263,6 +268,7 @@ private:
// In some cases, in order to avoid the cumulative error of the upper
global tracker,
// the consumption of the current tracker is reset to zero.
bool _reset_zero = false;
+ static bool _oom_avoidance;
};
inline void MemTrackerLimiter::consume(int64_t bytes) {
diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp
index 4556cf939b..90bcf91dca 100644
--- a/be/src/service/doris_main.cpp
+++ b/be/src/service/doris_main.cpp
@@ -322,20 +322,17 @@ int main(int argc, char** argv) {
#if !defined(__SANITIZE_ADDRESS__) && !defined(ADDRESS_SANITIZER) &&
!defined(LEAK_SANITIZER) && \
!defined(THREAD_SANITIZER) && !defined(USE_JEMALLOC)
// Change the total TCMalloc thread cache size if necessary.
- size_t total_thread_cache_bytes;
- if
(!MallocExtension::instance()->GetNumericProperty("tcmalloc.max_total_thread_cache_bytes",
-
&total_thread_cache_bytes)) {
- fprintf(stderr, "Failed to get TCMalloc total thread cache size.\n");
- }
const size_t kDefaultTotalThreadCacheBytes = 1024 * 1024 * 1024;
- if (total_thread_cache_bytes < kDefaultTotalThreadCacheBytes) {
- if (!MallocExtension::instance()->SetNumericProperty(
- "tcmalloc.max_total_thread_cache_bytes",
kDefaultTotalThreadCacheBytes)) {
- fprintf(stderr, "Failed to change TCMalloc total thread cache
size.\n");
- return -1;
- }
+ if
(!MallocExtension::instance()->SetNumericProperty("tcmalloc.max_total_thread_cache_bytes",
+
kDefaultTotalThreadCacheBytes)) {
+ fprintf(stderr, "Failed to change TCMalloc total thread cache
size.\n");
+ return -1;
+ }
+ #endif
+
+ if (doris::config::memory_mode == std::string("performance")) {
+ doris::MemTrackerLimiter::disable_oom_avoidance();
}
-#endif
std::vector<doris::StorePath> paths;
auto olap_res =
doris::parse_conf_store_paths(doris::config::storage_root_path, &paths);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]