This is an automated email from the ASF dual-hosted git repository. maxyang pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/cloudberry.git
commit 723ee2822a0fc6841a0b7f4c9e0ebdfd8df99b74 Author: RMT <[email protected]> AuthorDate: Tue Dec 5 14:20:10 2023 +0800 Add one more hierarchy for resgroup cgroup root (#16732) Add one more hierarchy for resource group when use cgroup v2. Current leaf node in the gpdb cgroup hierarchy is: /sys/fs/cgroup/gpdb/<oid>, it's ok for gpdb workflow. But for some extensions which want to use gpdb cgroup hierarchy, it's not convenient. Extensions like plcontainer want create sub-cgroup under /sys/fs/cgroup/<oid> as new leaf node, it's not possible in current hierarchy, because of no internal processes constraint of cgroup v2. This commit use a new hierarchy to adopt extensions which want to use gpdb cgroup hierarchy, and the modification is tiny: move processes from /sys/fs/cgroup/<oid>/cgroup.procs to /sys/fs/cgroup/gpdb/<oid>/queries/cgroup.procs, and keep limitations in /sys/fs/cgroup/<oid>. With this modification, extensions which want to use gpdb cgroup hierarchy can create sub cgroup under /sys/fs/cgroup/gpdb/<oid>. For example, plcontainer will create a cgroup /sys/fs/cgroup/gpdb/<oid>/docker-12345 and put processes into it. --- src/backend/utils/resgroup/cgroup-ops-linux-v1.c | 10 +++++----- src/backend/utils/resgroup/cgroup-ops-linux-v2.c | 15 ++++++++++----- src/backend/utils/resgroup/cgroup.c | 8 +++++--- src/include/utils/cgroup.h | 4 +++- .../expected/resgroup/resgroup_auxiliary_tools_v2.out | 2 +- .../sql/resgroup/resgroup_auxiliary_tools_v2.sql | 2 +- 6 files changed, 25 insertions(+), 16 deletions(-) diff --git a/src/backend/utils/resgroup/cgroup-ops-linux-v1.c b/src/backend/utils/resgroup/cgroup-ops-linux-v1.c index fffba070a3f..0ad53ee35ab 100644 --- a/src/backend/utils/resgroup/cgroup-ops-linux-v1.c +++ b/src/backend/utils/resgroup/cgroup-ops-linux-v1.c @@ -652,11 +652,11 @@ createcgroup_v1(Oid group) { int retry = 0; - if (!createDir(group, CGROUP_COMPONENT_CPU) || - !createDir(group, CGROUP_COMPONENT_CPUACCT) || - !createDir(group, CGROUP_COMPONENT_MEMORY) || + if (!createDir(group, CGROUP_COMPONENT_CPU, "") || + !createDir(group, CGROUP_COMPONENT_CPUACCT, "") || + !createDir(group, CGROUP_COMPONENT_MEMORY, "") || (gp_resource_group_enable_cgroup_cpuset && - !createDir(group, CGROUP_COMPONENT_CPUSET))) + !createDir(group, CGROUP_COMPONENT_CPUSET, ""))) { CGROUP_ERROR("can't create cgroup for resource group '%d': %m", group); } @@ -705,7 +705,7 @@ create_default_cpuset_group_v1(void) CGroupComponentType component = CGROUP_COMPONENT_CPUSET; int retry = 0; - if (!createDir(DEFAULT_CPUSET_GROUP_ID, component)) + if (!createDir(DEFAULT_CPUSET_GROUP_ID, component, "")) { CGROUP_ERROR("can't create cpuset cgroup for resgroup '%d': %m", DEFAULT_CPUSET_GROUP_ID); diff --git a/src/backend/utils/resgroup/cgroup-ops-linux-v2.c b/src/backend/utils/resgroup/cgroup-ops-linux-v2.c index 69d2b1b0461..6b5f667fe7f 100644 --- a/src/backend/utils/resgroup/cgroup-ops-linux-v2.c +++ b/src/backend/utils/resgroup/cgroup-ops-linux-v2.c @@ -386,7 +386,8 @@ createcgroup_v2(Oid group) { int retry = 0; - if (!createDir(group, CGROUP_COMPONENT_PLAIN)) + if (!createDir(group, CGROUP_COMPONENT_PLAIN, "") || + !createDir(group, CGROUP_COMPONENT_PLAIN, CGROUPV2_LEAF_INDENTIFIER)) { CGROUP_ERROR("can't create cgroup for resource group '%d': %m", group); } @@ -418,7 +419,7 @@ create_default_cpuset_group_v2(void) CGroupComponentType component = CGROUP_COMPONENT_PLAIN; int retry = 0; - if (!createDir(DEFAULT_CPUSET_GROUP_ID, component)) + if (!createDir(DEFAULT_CPUSET_GROUP_ID, component, "")) { CGROUP_ERROR("can't create cpuset cgroup for resgroup '%d': %m", DEFAULT_CPUSET_GROUP_ID); @@ -466,6 +467,7 @@ create_default_cpuset_group_v2(void) static void attachcgroup_v2(Oid group, int pid, bool is_cpuset_enabled) { + char path_of_leaf[MAXPATHLEN]; /* * needn't write to file if the pid has already been written in. * Unless it has not been written or the group has changed or @@ -474,8 +476,9 @@ attachcgroup_v2(Oid group, int pid, bool is_cpuset_enabled) if (IsUnderPostmaster && group == currentGroupIdInCGroup) return; + pg_sprintf(path_of_leaf, "%s/cgroup.procs", CGROUPV2_LEAF_INDENTIFIER); writeInt64(group, BASEDIR_GPDB, CGROUP_COMPONENT_PLAIN, - "cgroup.procs", pid); + path_of_leaf, pid); /* * Do not assign the process to cgroup/memory for now. @@ -499,6 +502,7 @@ detachcgroup_v2(Oid group, CGroupComponentType component, int fd_dir) { char path[MAX_CGROUP_PATHLEN]; size_t path_size = sizeof(path); + char path_of_leaf[MAXPATHLEN]; char *buf; size_t buf_size; @@ -533,7 +537,8 @@ detachcgroup_v2(Oid group, CGroupComponentType component, int fd_dir) } \ } while (0) - buildPath(group, BASEDIR_GPDB, component, "cgroup.procs", path, path_size); + pg_sprintf(path_of_leaf, "%s/cgroup.procs", CGROUPV2_LEAF_INDENTIFIER); + buildPath(group, BASEDIR_GPDB, component, path_of_leaf, path, path_size); fdr = open(path, O_RDONLY); @@ -561,7 +566,7 @@ detachcgroup_v2(Oid group, CGroupComponentType component, int fd_dir) if (buf_len == 0) return; - buildPath(DEFAULTRESGROUP_OID, BASEDIR_GPDB, component, "cgroup.procs", + buildPath(DEFAULTRESGROUP_OID, BASEDIR_GPDB, component, path_of_leaf, path, path_size); fdw = open(path, O_WRONLY); diff --git a/src/backend/utils/resgroup/cgroup.c b/src/backend/utils/resgroup/cgroup.c index a996e31aa51..839090f39e4 100644 --- a/src/backend/utils/resgroup/cgroup.c +++ b/src/backend/utils/resgroup/cgroup.c @@ -285,12 +285,12 @@ lockDir(const char *path, bool block) * Create cgroup dir */ bool -createDir(Oid group, CGroupComponentType component) +createDir(Oid group, CGroupComponentType component, char *filename) { char path[MAX_CGROUP_PATHLEN]; size_t path_size = sizeof(path); - buildPath(group, BASEDIR_GPDB, component, "", path, path_size); + buildPath(group, BASEDIR_GPDB, component, filename, path, path_size); if (mkdir(path, 0755) && errno != EEXIST) return false; @@ -468,12 +468,14 @@ deleteDir(Oid group, CGroupComponentType component, const char *filename, bool u { char path[MAX_CGROUP_PATHLEN]; + char leaf_path[MAX_CGROUP_PATHLEN]; size_t path_size = sizeof(path); int retry = unassign ? 0 : MAX_RETRY - 1; int fd_dir; buildPath(group, BASEDIR_GPDB, component, "", path, path_size); + buildPath(group, BASEDIR_GPDB, component, CGROUPV2_LEAF_INDENTIFIER, leaf_path, path_size); /* * To prevent race condition between multiple processes we require a dir @@ -497,7 +499,7 @@ deleteDir(Oid group, CGroupComponentType component, const char *filename, bool u if (unassign) detachcgroup(group, component, fd_dir); - if (rmdir(path)) + if (rmdir(leaf_path) || rmdir(path)) { int err = errno; diff --git a/src/include/utils/cgroup.h b/src/include/utils/cgroup.h index b9c0fa249a2..39ef82aeb52 100644 --- a/src/include/utils/cgroup.h +++ b/src/include/utils/cgroup.h @@ -47,6 +47,8 @@ /* This is the default value about Linux Control Group */ #define DEFAULT_CPU_PERIOD_US 100000LL +/* The name of leaf cgroup when use cgroup v2 */ +#define CGROUPV2_LEAF_INDENTIFIER "queries" /* * Resource Group underlying component types. @@ -168,7 +170,7 @@ extern void setComponentDir(CGroupComponentType component, const char *dir); extern int lockDir(const char *path, bool block); /* Create cgroup dir. */ -extern bool createDir(Oid group, CGroupComponentType comp); +extern bool createDir(Oid group, CGroupComponentType comp, char *filename); /* Delete cgroup dir. */ extern bool deleteDir(Oid group, CGroupComponentType component, const char *filename, bool unassign, void (*detachcgroup) (Oid group, CGroupComponentType component, int fd_dir)); diff --git a/src/test/isolation2/expected/resgroup/resgroup_auxiliary_tools_v2.out b/src/test/isolation2/expected/resgroup/resgroup_auxiliary_tools_v2.out index e2e959374b2..93c1d1aa4df 100644 --- a/src/test/isolation2/expected/resgroup/resgroup_auxiliary_tools_v2.out +++ b/src/test/isolation2/expected/resgroup/resgroup_auxiliary_tools_v2.out @@ -118,7 +118,7 @@ sql = "select sess_id from pg_stat_activity where pid = '%d'" % pid result = plp sql = "select groupid from gp_toolkit.gp_resgroup_config where groupname='%s'" % groupname result = plpy.execute(sql) groupid = result[0]['groupid'] sql = "select hostname from gp_segment_configuration group by hostname" result = plpy.execute(sql) hosts = [_['hostname'] for _ in result] def get_result(host): stdout = subprocess.run(["ssh", "{}".format(host), "ps -ef | grep postgres | grep con{} | grep -v grep | awk '{{print $2}}'".format(session_id)], stdout=subprocess.PIPE, check=True).stdout session_pids = stdout.splitlines() -path = "/sys/fs/cgroup/gpdb/{}/cgroup.procs".format(groupid) stdout = subprocess.run(["ssh", "{}".format(host), "cat {}".format(path)], stdout=subprocess.PIPE, check=True).stdout cgroups_pids = stdout.splitlines() +path = "/sys/fs/cgroup/gpdb/{}/queries/cgroup.procs".format(groupid) stdout = subprocess.run(["ssh", "{}".format(host), "cat {}".format(path)], stdout=subprocess.PIPE, check=True).stdout cgroups_pids = stdout.splitlines() return set(session_pids).issubset(set(cgroups_pids)) for host in hosts: if not get_result(host): return False return True $$ LANGUAGE plpython3u; diff --git a/src/test/isolation2/sql/resgroup/resgroup_auxiliary_tools_v2.sql b/src/test/isolation2/sql/resgroup/resgroup_auxiliary_tools_v2.sql index 88f66c4f18d..e2d8db51888 100644 --- a/src/test/isolation2/sql/resgroup/resgroup_auxiliary_tools_v2.sql +++ b/src/test/isolation2/sql/resgroup/resgroup_auxiliary_tools_v2.sql @@ -259,7 +259,7 @@ $$ LANGUAGE plpython3u; stdout=subprocess.PIPE, check=True).stdout session_pids = stdout.splitlines() - path = "/sys/fs/cgroup/gpdb/{}/cgroup.procs".format(groupid) + path = "/sys/fs/cgroup/gpdb/{}/queries/cgroup.procs".format(groupid) stdout = subprocess.run(["ssh", "{}".format(host), "cat {}".format(path)], stdout=subprocess.PIPE, check=True).stdout cgroups_pids = stdout.splitlines() --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
