This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new 4c627a11290 branch-4.1: [improvement](cgroup) inactive_file should be
treated as available memory to avoid query be cancelled #64347 (#64411)
4c627a11290 is described below
commit 4c627a1129053fb028d9ecdadd90d832bed765c8
Author: github-actions[bot]
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Tue Jun 16 14:23:26 2026 +0800
branch-4.1: [improvement](cgroup) inactive_file should be treated as
available memory to avoid query be cancelled #64347 (#64411)
Cherry-picked from #64347
Co-authored-by: yiguolei <[email protected]>
---
be/src/common/cgroup_memory_ctl.cpp | 34 ++++++++++++++++++----
be/src/util/cgroup_util.cpp | 7 +++++
be/src/util/mem_info.cpp | 16 +++++-----
.../meta_service_rate_limit_helper.cpp | 2 +-
4 files changed, 44 insertions(+), 15 deletions(-)
diff --git a/be/src/common/cgroup_memory_ctl.cpp
b/be/src/common/cgroup_memory_ctl.cpp
index dddcbd50338..9a70a615dcc 100644
--- a/be/src/common/cgroup_memory_ctl.cpp
+++ b/be/src/common/cgroup_memory_ctl.cpp
@@ -93,6 +93,8 @@ struct CgroupsV2Reader : CGroupMemoryCtl::ICgroupsReader {
return Status::CgroupError("Error reading {}: {}",
file_path.string(),
get_str_err_msg());
}
+ // This means no limit, for example, all process in linux will belong
to a cgroup, and
+ // the default value of the memory limit in memory.max file is "max",
which means no limit.
if (line == "max") {
*value = std::numeric_limits<int64_t>::max();
return Status::OK();
@@ -107,15 +109,37 @@ struct CgroupsV2Reader : CGroupMemoryCtl::ICgroupsReader {
std::unordered_map<std::string, int64_t> metrics_map;
CGroupUtil::read_int_metric_from_cgroup_file((_mount_file_dir /
"memory.stat"),
metrics_map);
- if (*value < metrics_map["inactive_file"]) {
- return Status::CgroupError("CgroupsV2Reader read_memory_usage
negative memory usage");
+ int64_t inactive_file =
+ metrics_map.contains("inactive_file") ?
metrics_map["inactive_file"] : 0;
+ int64_t active_file = metrics_map.contains("active_file") ?
metrics_map["active_file"] : 0;
+ int64_t slab_reclaimable =
+ metrics_map.contains("slab_reclaimable") ?
metrics_map["slab_reclaimable"] : 0;
+ if (inactive_file < 0 || active_file < 0 || slab_reclaimable < 0) {
+ // In this scenario, not return error, ignore it and print log.
+ LOG(WARNING) << "CgroupsV2Reader read_memory_usage missing
expected metrics in "
+ "memory.stat, inactive_file: "
+ << inactive_file << ", active_file: " << active_file
+ << ", slab_reclaimable: " << slab_reclaimable;
+ return Status::OK();
+ }
+
+ const int64_t reclaimable_usage = inactive_file + active_file +
slab_reclaimable;
+ if (*value < reclaimable_usage) {
+ LOG(WARNING)
+ << "CgroupsV2Reader read_memory_usage negative memory
usage, not - reclaimable "
+ "usage any more, just return memory.current: "
+ << *value << ", inactive_file: " << inactive_file
+ << ", active_file: " << active_file
+ << ", slab_reclaimable: " << slab_reclaimable;
+ // In this case, do not return an error, just ignore the negative
usage and continue.
+ // If return error, the upper system will use os available memory
instead of cgroup available memory, which may cause OOM more easily.
+ return Status::OK();
}
- // the reason why we subtract inactive_file described here:
+ // The reclaimable file cache described here should not be counted as
used memory:
//
https://github.com/ClickHouse/ClickHouse/issues/64652#issuecomment-2149630667
- *value -= metrics_map["inactive_file"];
// Part of "slab" that might be reclaimed, such as dentries and inodes.
// https://arthurchiao.art/blog/cgroupv2-zh/
- *value -= metrics_map["slab_reclaimable"];
+ *value -= reclaimable_usage;
return Status::OK();
}
diff --git a/be/src/util/cgroup_util.cpp b/be/src/util/cgroup_util.cpp
index cfda0a4fb77..cc52a60ed75 100644
--- a/be/src/util/cgroup_util.cpp
+++ b/be/src/util/cgroup_util.cpp
@@ -178,6 +178,9 @@ std::string CGroupUtil::cgroupv2_of_process() {
}
// With cgroups v2, there will be a *single* line with prefix "0::/"
// (see https://docs.kernel.org/admin-guide/cgroup-v2.html)
+ // such as 0::/user.slice/user-1005.slice/session-213906.scope this is the
cgroup name
+ // it should be combined with the default cgroup mount point to get the
full path to the cgroup, e.g.
+ // /sys/fs/cgroup/user.slice/user-1005.slice/session-213906.scope
std::string cgroup;
std::getline(cgroup_name_file, cgroup);
static const std::string v2_prefix = "0::/";
@@ -198,6 +201,7 @@ std::optional<std::string>
CGroupUtil::get_cgroupsv2_path(const std::string& sub
}
std::string cgroup = CGroupUtil::cgroupv2_of_process();
+ // /sys/fs/cgroup/user.slice/user-1005.slice/session-213906.scope
auto current_cgroup = cgroup.empty() ? default_cgroups_mount :
(default_cgroups_mount / cgroup);
// Return the bottom-most nested current memory file. If there is no such
file at the current
@@ -259,6 +263,9 @@ void CGroupUtil::read_int_metric_from_cgroup_file(
metrics_map[key] = value;
} else if (fields[2] == "kB") {
metrics_map[key] = value * 1024L;
+ } else {
+ LOG(WARNING) << "Unknown unit in cgroup file " <<
file_path.string()
+ << ", line: " << line;
}
}
}
diff --git a/be/src/util/mem_info.cpp b/be/src/util/mem_info.cpp
index 4b48d40d2d9..ebd6f4a5442 100644
--- a/be/src/util/mem_info.cpp
+++ b/be/src/util/mem_info.cpp
@@ -94,7 +94,7 @@ void MemInfo::refresh_proc_meminfo() {
if (meminfo.is_open()) {
meminfo.close();
}
-
+ _s_cgroup_mem_refresh_state = false;
// refresh cgroup memory
if (config::enable_use_cgroup_memory_info) {
if (_s_cgroup_mem_refresh_wait_times >= 0) {
@@ -119,12 +119,13 @@ void MemInfo::refresh_proc_meminfo() {
// cgroup mem limit is refreshed every 10 seconds,
// cgroup mem usage is refreshed together with memInfo every time,
which is very frequent.
+ // If _s_cgroup_mem_limit == max, it means get cgroup mem limit failed
OR the cgroup has no memory limit for example
+ // there is just "max" in memory.max file.
if (_s_cgroup_mem_limit != std::numeric_limits<int64_t>::max()) {
int64_t cgroup_mem_usage;
auto status =
CGroupMemoryCtl::find_cgroup_mem_usage(&cgroup_mem_usage);
if (!status.ok()) {
_s_cgroup_mem_usage = std::numeric_limits<int64_t>::min();
- _s_cgroup_mem_refresh_state = false;
LOG_EVERY_N(WARNING, 500)
<< "Refresh cgroup memory usage failed, cgroup mem
limit: "
<< _s_cgroup_mem_limit << ", " << status;
@@ -132,17 +133,14 @@ void MemInfo::refresh_proc_meminfo() {
_s_cgroup_mem_usage = cgroup_mem_usage;
_s_cgroup_mem_refresh_state = true;
}
- } else {
- _s_cgroup_mem_refresh_state = false;
}
- } else {
- _s_cgroup_mem_refresh_state = false;
}
// 1. calculate physical_mem
int64_t physical_mem = -1;
-
- physical_mem = _mem_info_bytes["MemTotal"];
+ if (_mem_info_bytes.find("MemTotal") != _mem_info_bytes.end()) {
+ physical_mem = _mem_info_bytes["MemTotal"];
+ }
if (_s_cgroup_mem_refresh_state) {
// In theory, always cgroup_mem_limit < physical_mem
if (physical_mem < 0) {
@@ -200,7 +198,7 @@ void MemInfo::refresh_proc_meminfo() {
// Process `MemAvailable = MemFree - LowWaterMark + (PageCache -
min(PageCache / 2, LowWaterMark))`,
// from `MemAvailable` in `/proc/meminfo`, calculated by OS.
// CgroupV2 `MemAvailable = cgroup_mem_limit - cgroup_mem_usage`,
- // `cgroup_mem_usage = memory.current - inactive_file -
slab_reclaimable`, in fact,
+ // `cgroup_mem_usage = memory.current - inactive_file - active_file -
slab_reclaimable`, in fact,
// there seems to be some memory that can be reused in
`cgroup_mem_usage`.
if (mem_available < 0) {
mem_available = _s_cgroup_mem_limit - _s_cgroup_mem_usage;
diff --git a/cloud/src/meta-service/meta_service_rate_limit_helper.cpp
b/cloud/src/meta-service/meta_service_rate_limit_helper.cpp
index 0f8c750be6c..9af2aead05c 100644
--- a/cloud/src/meta-service/meta_service_rate_limit_helper.cpp
+++ b/cloud/src/meta-service/meta_service_rate_limit_helper.cpp
@@ -360,7 +360,7 @@ std::optional<CgroupMemoryInfo> get_cgroup_memory_info() {
}
auto metrics = read_metrics_map(*dir / "memory.stat");
int64_t adjusted_usage = *usage;
- adjusted_usage -= metrics["inactive_file"];
+ adjusted_usage -= metrics["inactive_file"] +
metrics["active_file"];
adjusted_usage -= metrics["slab_reclaimable"];
adjusted_usage = std::max<int64_t>(0, adjusted_usage);
return CgroupMemoryInfo {limit_bytes, adjusted_usage};
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]