We already had this feature in cgroup-v1 freezer, so let's also have it for cgroup-v2. Difference in cgroup-v2 freezer is that there is no loop over tasks like in freezer_read() -> update_if_frozen(), cgroup_events_show() only checks CGRP_FROZEN flag, so we have to add our own loop over each descendant css and each task in it.
Note: freeze_jiffies is protected with css_set_lock together with CGRP_FREEZE bit. https://virtuozzo.atlassian.net/browse/VSTOR-118578 Signed-off-by: Pavel Tikhomirov <[email protected]> Feature: cgroup/freeze: enhance logging --- include/linux/cgroup-defs.h | 5 ++ include/linux/cgroup.h | 3 ++ kernel/cgroup/cgroup.c | 97 ++++++++++++++++++++++++++++++++++ kernel/cgroup/freezer.c | 8 ++- kernel/cgroup/legacy_freezer.c | 36 +------------ 5 files changed, 112 insertions(+), 37 deletions(-) diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index d68fca31e6e65..85a918ae4c5a9 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -424,6 +424,11 @@ struct cgroup_freezer_state { * Number of kernel threads in this cgroup */ int nr_kthreads; + + /* + * Jiffies of last freeze + */ + unsigned long freeze_jiffies; }; struct cgroup { diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 86ee6604a5207..2e01e484cbac7 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -876,4 +876,7 @@ struct cgroup *task_get_cgroup1(struct task_struct *tsk, int hierarchy_id); struct cgroup_of_peak *of_peak(struct kernfs_open_file *of); +void warn_freeze_timeout_task(struct cgroup *cgrp, int timeout, + struct task_struct *task); + #endif /* _LINUX_CGROUP_H */ diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 2f7db20f8a460..6df9ab9944036 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -61,6 +61,9 @@ #include <linux/psi.h> #include <net/sock.h> #include <linux/task_work.h> +#include <linux/ratelimit.h> +#include <linux/stacktrace.h> +#include <linux/sysctl.h> #include <linux/ve.h> #define CREATE_TRACE_POINTS @@ -4099,6 +4102,98 @@ static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of, return nbytes; } +#define MAX_STACK_TRACE_DEPTH 64 + +void warn_freeze_timeout_task(struct cgroup *cgrp, int timeout, + struct task_struct *task) +{ + char *buf __free(kfree) = NULL; + unsigned long *entries __free(kfree) = NULL; + unsigned long nr_entries, i; + pid_t tgid; + + buf = kmalloc(PATH_MAX, GFP_KERNEL); + if (!buf) + return; + + if (cgroup_path(cgrp, buf, PATH_MAX) < 0) + return; + + tgid = task_pid_nr_ns(task, &init_pid_ns); + printk(KERN_WARNING "Freeze of %s took %d sec, " + "due to unfreezable process %d:%s, stack:\n", + buf, timeout/HZ, tgid, task->comm); + + entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), + GFP_KERNEL); + if (!entries) + return; + nr_entries = stack_trace_save_tsk(task, entries, + MAX_STACK_TRACE_DEPTH, 0); + for (i = 0; i < nr_entries; i++) + printk(KERN_WARNING "[<%pK>] %pB\n", + (void *)entries[i], (void *)entries[i]); +} + +static void warn_freeze_timeout(struct cgroup *cgrp, int timeout) +{ + struct cgroup_subsys_state *css; + char *buf __free(kfree) = NULL; + + guard(rcu)(); + css_for_each_descendant_post(css, &cgrp->self) { + struct task_struct *task; + struct css_task_iter it; + + css_task_iter_start(css, 0, &it); + while ((task = css_task_iter_next(&it))) { + if (task->flags & PF_KTHREAD) + continue; + if (task->frozen) + continue; + + warn_freeze_timeout_task(cgrp, timeout, task); + css_task_iter_end(&it); + return; + } + css_task_iter_end(&it); + } + + buf = kmalloc(PATH_MAX, GFP_KERNEL); + if (!buf) + return; + + if (cgroup_path(cgrp, buf, PATH_MAX) < 0) + return; + + printk(KERN_WARNING "Freeze of %s took %d sec, " + "but no unfreezable process detected\n", buf, timeout/HZ); +} + +static void check_freeze_timeout(struct cgroup *cgrp) +{ + static DEFINE_RATELIMIT_STATE(freeze_timeout_rs, + DEFAULT_FREEZE_TIMEOUT, 1); + unsigned long freeze_jiffies; + int timeout; + + scoped_guard(spinlock, &css_set_lock) { + if (!test_bit(CGRP_FREEZE, &cgrp->flags) || + test_bit(CGRP_FROZEN, &cgrp->flags)) + return; + freeze_jiffies = READ_ONCE(cgrp->freezer.freeze_jiffies); + } + + timeout = READ_ONCE(sysctl_freeze_timeout); + if (!freeze_jiffies || freeze_jiffies + timeout > get_jiffies_64()) + return; + + if (!__ratelimit(&freeze_timeout_rs)) + return; + + warn_freeze_timeout(cgrp, timeout); +} + static int cgroup_events_show(struct seq_file *seq, void *v) { struct cgroup *cgrp = seq_css(seq)->cgroup; @@ -4106,6 +4201,8 @@ static int cgroup_events_show(struct seq_file *seq, void *v) seq_printf(seq, "populated %d\n", cgroup_is_populated(cgrp)); seq_printf(seq, "frozen %d\n", test_bit(CGRP_FROZEN, &cgrp->flags)); + check_freeze_timeout(cgrp); + return 0; } diff --git a/kernel/cgroup/freezer.c b/kernel/cgroup/freezer.c index a2d9e55257bcc..e6ef44d0ab4a8 100644 --- a/kernel/cgroup/freezer.c +++ b/kernel/cgroup/freezer.c @@ -3,6 +3,7 @@ #include <linux/sched.h> #include <linux/sched/task.h> #include <linux/sched/signal.h> +#include <linux/jiffies.h> #include "cgroup-internal.h" @@ -183,10 +184,13 @@ static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze) lockdep_assert_held(&cgroup_mutex); spin_lock_irq(&css_set_lock); - if (freeze) + if (freeze) { set_bit(CGRP_FREEZE, &cgrp->flags); - else + cgrp->freezer.freeze_jiffies = get_jiffies_64(); + } else { clear_bit(CGRP_FREEZE, &cgrp->flags); + cgrp->freezer.freeze_jiffies = 0; + } spin_unlock_irq(&css_set_lock); if (freeze) diff --git a/kernel/cgroup/legacy_freezer.c b/kernel/cgroup/legacy_freezer.c index b048f6b530688..a59e5a14f45ed 100644 --- a/kernel/cgroup/legacy_freezer.c +++ b/kernel/cgroup/legacy_freezer.c @@ -25,7 +25,6 @@ #include <linux/cpu.h> #include <linux/jiffies.h> #include <linux/ratelimit.h> -#include <linux/stacktrace.h> #include <linux/sysctl.h> /* @@ -244,11 +243,6 @@ static void check_freezer_timeout(struct cgroup_subsys_state *css, DEFAULT_FREEZE_TIMEOUT, 1); int __freeze_timeout = READ_ONCE(sysctl_freeze_timeout); struct freezer *freezer = css_freezer(css); - unsigned long nr_entries; - unsigned long *entries; - char *freezer_cg_name; - pid_t tgid; - int i; if (!freezer->freeze_jiffies || freezer->freeze_jiffies + __freeze_timeout > get_jiffies_64()) @@ -257,35 +251,7 @@ static void check_freezer_timeout(struct cgroup_subsys_state *css, if (!__ratelimit(&freeze_timeout_rs)) return; - freezer_cg_name = kmalloc(PATH_MAX, GFP_KERNEL); - if (!freezer_cg_name) - return; - - if (cgroup_path(css->cgroup, freezer_cg_name, PATH_MAX) < 0) - goto free_cg_name; - - tgid = task_pid_nr_ns(task, &init_pid_ns); - - printk(KERN_WARNING "Freeze of %s took %d sec, " - "due to unfreezable process %d:%s, stack:\n", - freezer_cg_name, __freeze_timeout/HZ, tgid, task->comm); - - entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), - GFP_KERNEL); - if (!entries) - goto free_cg_name; - - nr_entries = stack_trace_save_tsk(task, entries, - MAX_STACK_TRACE_DEPTH, 0); - - for (i = 0; i < nr_entries; i++) { - printk(KERN_WARNING "[<%pK>] %pB\n", - (void *)entries[i], (void *)entries[i]); - } - - kfree(entries); -free_cg_name: - kfree(freezer_cg_name); + warn_freeze_timeout_task(css->cgroup, __freeze_timeout, task); } /** -- 2.51.1 _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
