[doris] branch branch-1.1-lts updated: [improvement](tcmalloc) add moderate mode and avoid oom (#14650)

zouxinyi Mon, 28 Nov 2022 07:22:31 -0800

This is an automated email from the ASF dual-hosted git repository.

zouxinyi pushed a commit to branch branch-1.1-lts
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/branch-1.1-lts by this push:
     new 15296fa35b [improvement](tcmalloc) add moderate mode and avoid oom 
(#14650)
15296fa35b is described below

commit 15296fa35be75c700d6d872f9d0373355e6bb2fb
Author: Yongqiang YANG <[email protected]>
AuthorDate: Mon Nov 28 23:22:18 2022 +0800

    [improvement](tcmalloc) add moderate mode and avoid oom (#14650)
    
    ReleaseToSystem aggressively when there are little free memory.
    
    From #14374.
---
 be/src/common/daemon.cpp                      | 115 ++++++++++++++++++++++----
 be/src/runtime/memory/mem_tracker_limiter.cpp |   2 +
 be/src/runtime/memory/mem_tracker_limiter.h   |   6 ++
 be/src/service/doris_main.cpp                 |  21 ++---
 4 files changed, 115 insertions(+), 29 deletions(-)

diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp
index ed5fe06ffa..fc49853bb5 100644
--- a/be/src/common/daemon.cpp
+++ b/be/src/common/daemon.cpp
@@ -70,28 +70,109 @@ bool k_doris_exit = false;
 void Daemon::tcmalloc_gc_thread() {
     // TODO All cache GC wish to be supported
 
-    size_t tc_use_memory_min = MemInfo::mem_limit();
+    // Limit size of tcmalloc cache via release_rate and max_cache_percent.
+    // We adjust release_rate according to memory_pressure, which is usage 
percent of memory.
+    int64_t max_cache_percent = 60;
+    double release_rates[10] = {1.0, 1.0, 1.0, 5.0, 5.0, 20.0, 50.0, 100.0, 
500.0, 2000.0};
+    int64_t pressure_limit = 90;
+    bool is_performance_mode = false;
+    size_t physical_limit_bytes = std::min(MemInfo::hard_mem_limit(), 
MemInfo::mem_limit());
+
     if (config::memory_mode == std::string("performance")) {
-        tc_use_memory_min = std::max(tc_use_memory_min / 10 * 9,
-                                     tc_use_memory_min - size_t(10) * 1024 * 
1024 * 1024);
-    } else {
-        tc_use_memory_min = tc_use_memory_min >> 1;
+        max_cache_percent = 100;
+        pressure_limit = 90;
+        is_performance_mode = true;
+        physical_limit_bytes = std::min(MemInfo::mem_limit(), 
MemInfo::physical_mem());
+    } else if (config::memory_mode == std::string("compact")) {
+        max_cache_percent = 20;
+        pressure_limit = 80;
     }
 
-    while 
(!_stop_background_threads_latch.wait_for(MonoDelta::FromSeconds(10))) {
-        size_t used_size = 0;
-        size_t free_size = 0;
+    int last_ms = 0;
+    const int kMaxLastMs = 30000;
+    const int kIntervalMs = 10;
+    size_t init_aggressive_decommit = 0;
+    size_t current_aggressive_decommit = 0;
+    size_t expected_aggressive_decommit = 0;
+    int64_t last_memory_pressure = 0;
+
+    
MallocExtension::instance()->GetNumericProperty("tcmalloc.aggressive_memory_decommit",
+                                                    &init_aggressive_decommit);
+    current_aggressive_decommit = init_aggressive_decommit;
+
+    while 
(!_stop_background_threads_latch.wait_for(MonoDelta::FromMilliseconds(kIntervalMs)))
 {
+        size_t tc_used_bytes = 0;
+        size_t tc_alloc_bytes = 0;
+        size_t rss = PerfCounters::get_vm_rss();
+
+        
MallocExtension::instance()->GetNumericProperty("generic.total_physical_bytes",
+                                                        &tc_alloc_bytes);
+         
MallocExtension::instance()->GetNumericProperty("generic.current_allocated_bytes",
+                                                        &tc_used_bytes);
+        int64_t tc_cached_bytes = tc_alloc_bytes - tc_used_bytes;
+        int64_t to_free_bytes =
+                (int64_t)tc_cached_bytes - (tc_used_bytes * max_cache_percent 
/ 100);
+
+        int64_t memory_pressure = 0;
+        int64_t alloc_bytes = std::max(rss, tc_alloc_bytes);
+        memory_pressure = alloc_bytes * 100 / physical_limit_bytes;
+
+        expected_aggressive_decommit = init_aggressive_decommit;
+        if (memory_pressure > pressure_limit) {
+            // We are reaching oom, so release cache aggressively.
+            // Ideally, we should reuse cache and not allocate from system any 
more,
+            // however, it is hard to set limit on cache of tcmalloc and doris
+            // use mmap in vectorized mode.
+            if (last_memory_pressure <= pressure_limit) {
+                int64_t min_free_bytes = alloc_bytes - physical_limit_bytes * 
9 / 10;
+                to_free_bytes = std::max(to_free_bytes, min_free_bytes);
+                to_free_bytes = std::max(to_free_bytes, tc_cached_bytes * 30 / 
100);
+                to_free_bytes = std::min(to_free_bytes, tc_cached_bytes);
+                expected_aggressive_decommit = 1;
+            } else {
+                // release rate is enough.
+                to_free_bytes = 0;
+            }
+            last_ms = kMaxLastMs;
+        } else if (memory_pressure > (pressure_limit - 10)) {
+            if (last_memory_pressure <= (pressure_limit - 10)) {
+                to_free_bytes = std::max(to_free_bytes, tc_cached_bytes * 10 / 
100);
+            } else {
+                to_free_bytes = 0;
+            }
+        }
+
+        int release_rate_index = memory_pressure / 10;
+        double release_rate = 1.0;
+        if (release_rate_index >= sizeof(release_rates)) {
+            release_rate = 2000.0;
+        } else {
+            release_rate = release_rates[release_rate_index];
+        }
+        MallocExtension::instance()->SetMemoryReleaseRate(release_rate);
 
-        
MallocExtension::instance()->GetNumericProperty("generic.current_allocated_bytes",
-                                                        &used_size);
-        
MallocExtension::instance()->GetNumericProperty("tcmalloc.pageheap_free_bytes", 
&free_size);
-        size_t alloc_size = used_size + free_size;
+        if ((current_aggressive_decommit != expected_aggressive_decommit) && 
!is_performance_mode) {
+            
MallocExtension::instance()->SetNumericProperty("tcmalloc.aggressive_memory_decommit",
+                                                            
expected_aggressive_decommit);
+            current_aggressive_decommit = expected_aggressive_decommit;
+        }
 
-        if (alloc_size > tc_use_memory_min) {
-            size_t max_free_size = alloc_size * 20 / 100;
-            if (free_size > max_free_size) {
-                MallocExtension::instance()->ReleaseToSystem(free_size - 
max_free_size);
-            }
+        last_memory_pressure = memory_pressure;
+        if (to_free_bytes > 0) {
+            last_ms += kIntervalMs;
+            if (last_ms >= kMaxLastMs) {
+                LOG(INFO) << "generic.current_allocated_bytes " << 
tc_used_bytes
+                          << ", generic.total_physical_bytes " << 
tc_alloc_bytes << ", rss " << rss
+                          << ", max_cache_percent " << max_cache_percent << ", 
release_rate "
+                          << release_rate << ", memory_pressure " << 
memory_pressure
+                          << ", physical_limit_bytes " << physical_limit_bytes 
<< ", to_free_bytes "
+                          << to_free_bytes << ", current_aggressive_decommit "
+                          << current_aggressive_decommit;
+                MallocExtension::instance()->ReleaseToSystem(to_free_bytes);
+                last_ms = 0;
+             }
+        } else {
+            last_ms = 0;
         }
     }
 }
diff --git a/be/src/runtime/memory/mem_tracker_limiter.cpp 
b/be/src/runtime/memory/mem_tracker_limiter.cpp
index 48a2102e22..35680fc17f 100644
--- a/be/src/runtime/memory/mem_tracker_limiter.cpp
+++ b/be/src/runtime/memory/mem_tracker_limiter.cpp
@@ -29,6 +29,8 @@
 
 namespace doris {
 
+bool MemTrackerLimiter::_oom_avoidance {true};
+
 MemTrackerLimiter::MemTrackerLimiter(int64_t byte_limit, const std::string& 
label,
                                      const std::shared_ptr<MemTrackerLimiter>& 
parent,
                                      RuntimeProfile* profile) {
diff --git a/be/src/runtime/memory/mem_tracker_limiter.h 
b/be/src/runtime/memory/mem_tracker_limiter.h
index 73aaa8e500..75261e6471 100644
--- a/be/src/runtime/memory/mem_tracker_limiter.h
+++ b/be/src/runtime/memory/mem_tracker_limiter.h
@@ -67,6 +67,9 @@ public:
 
 public:
     static bool sys_mem_exceed_limit_check(int64_t bytes) {
+        if (!_oom_avoidance) {
+            return false;
+        }
         // Limit process memory usage using the actual physical memory of the 
process in `/proc/self/status`.
         // This is independent of the consumption value of the mem tracker, 
which counts the virtual memory
         // of the process malloc.
@@ -116,6 +119,8 @@ public:
     // Returns the lowest limit for this tracker limiter and its ancestors. 
Returns -1 if there is no limit.
     int64_t get_lowest_limit() const;
 
+    static void disable_oom_avoidance() { _oom_avoidance = false; }
+
 public:
     // up to (but not including) end_tracker.
     // This happens when we want to update tracking on a particular mem 
tracker but the consumption
@@ -263,6 +268,7 @@ private:
     // In some cases, in order to avoid the cumulative error of the upper 
global tracker,
     // the consumption of the current tracker is reset to zero.
     bool _reset_zero = false;
+    static bool _oom_avoidance;
 };
 
 inline void MemTrackerLimiter::consume(int64_t bytes) {
diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp
index 4556cf939b..90bcf91dca 100644
--- a/be/src/service/doris_main.cpp
+++ b/be/src/service/doris_main.cpp
@@ -322,20 +322,17 @@ int main(int argc, char** argv) {
 #if !defined(__SANITIZE_ADDRESS__) && !defined(ADDRESS_SANITIZER) && 
!defined(LEAK_SANITIZER) && \
         !defined(THREAD_SANITIZER) && !defined(USE_JEMALLOC)
     // Change the total TCMalloc thread cache size if necessary.
-    size_t total_thread_cache_bytes;
-    if 
(!MallocExtension::instance()->GetNumericProperty("tcmalloc.max_total_thread_cache_bytes",
-                                                         
&total_thread_cache_bytes)) {
-        fprintf(stderr, "Failed to get TCMalloc total thread cache size.\n");
-    }
     const size_t kDefaultTotalThreadCacheBytes = 1024 * 1024 * 1024;
-    if (total_thread_cache_bytes < kDefaultTotalThreadCacheBytes) {
-        if (!MallocExtension::instance()->SetNumericProperty(
-                    "tcmalloc.max_total_thread_cache_bytes", 
kDefaultTotalThreadCacheBytes)) {
-            fprintf(stderr, "Failed to change TCMalloc total thread cache 
size.\n");
-            return -1;
-        }
+    if 
(!MallocExtension::instance()->SetNumericProperty("tcmalloc.max_total_thread_cache_bytes",
+                                                         
kDefaultTotalThreadCacheBytes)) {
+        fprintf(stderr, "Failed to change TCMalloc total thread cache 
size.\n");
+        return -1;
+    }
+ #endif
+ 
+    if (doris::config::memory_mode == std::string("performance")) {
+        doris::MemTrackerLimiter::disable_oom_avoidance();
     }
-#endif
  
     std::vector<doris::StorePath> paths;
     auto olap_res = 
doris::parse_conf_store_paths(doris::config::storage_root_path, &paths);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[doris] branch branch-1.1-lts updated: [improvement](tcmalloc) add moderate mode and avoid oom (#14650)

Reply via email to