Add accounting to track cases that runtime isn't running out, and export the information in "/proc/<pid>/sched".
Specifically, the patch adds three members "nr_underrun_sched", "nr_underrun_block", and "nr_underrun_yield" in sched_dl_entity: -@nr_underrun_sched hints some scheduling issue. -@nr_underrun_block hints some block reason. E.g. long sleep. -@nr_underrun_yield hints the yield reason. This is helpful to spot/debug deadline issues, for example, I launched three 50% dl tasks on my dual-core machine, plus several buggy contrained dl tasks that Daniel is trying to address in "sched/deadline: Use the revised wakeup rule for suspending constrained dl tasks", then I observed one 50% deadline task's proc sched output: $ cat /proc/3389/sched |grep underrun dl.nr_underrun_sched : 981 dl.nr_underrun_block : 0 dl.nr_underrun_yield : 0 Very large "dl.nr_underrun_sched" hints it's very likely that there is some underlying scheduling issue. Note that we don't use CONFIG_SCHED_DEBUG as the accounting added has little overhead(also happens infrequently). Suggested-by: Steven Rostedt <rost...@goodmis.org> Signed-off-by: Xunlei Pang <xlp...@redhat.com> --- include/linux/sched.h | 10 ++++++++++ kernel/sched/core.c | 3 +++ kernel/sched/deadline.c | 12 +++++++++--- kernel/sched/debug.c | 3 +++ 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/include/linux/sched.h b/include/linux/sched.h index ba080e5..e17928f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -455,6 +455,16 @@ struct sched_dl_entity { * own bandwidth to be enforced, thus we need one timer per task. */ struct hrtimer dl_timer; + + /* + * Accounting for periods that run less than @dl_runtime: + * @nr_underrun_sched hints some scheduling issue. + * @nr_underrun_block hints some block reason. E.g. long sleep. + * @nr_underrun_yield hints the yield reason. + */ + u64 nr_underrun_sched; + u64 nr_underrun_block; + u64 nr_underrun_yield; }; union rcu_special { diff --git a/kernel/sched/core.c b/kernel/sched/core.c index bccd819..6214ada 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4004,6 +4004,9 @@ static struct task_struct *find_process_by_pid(pid_t pid) dl_se->dl_period = attr->sched_period ?: dl_se->dl_deadline; dl_se->flags = attr->sched_flags; dl_se->dl_bw = to_ratio(dl_se->dl_period, dl_se->dl_runtime); + dl_se->nr_underrun_sched = 0; + dl_se->nr_underrun_block = 0; + dl_se->nr_underrun_yield = 0; /* * Changing the parameters of a task is 'tricky' and we're not doing diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 5691149..a7ddc03 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -394,8 +394,10 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se, dl_se->runtime = pi_se->dl_runtime; } - if (dl_se->dl_yielded && dl_se->runtime > 0) + if (dl_se->dl_yielded && dl_se->runtime > 0) { dl_se->runtime = 0; + ++dl_se->nr_underrun_yield; + } /* * We keep moving the deadline away until we get some @@ -723,8 +725,10 @@ static inline void dl_check_constrained_dl(struct sched_dl_entity *dl_se) if (unlikely(dl_se->dl_boosted || !start_dl_timer(p))) return; dl_se->dl_throttled = 1; - if (dl_se->runtime > 0) + if (dl_se->runtime > 0) { dl_se->runtime = 0; + ++dl_se->nr_underrun_block; + } } } @@ -733,8 +737,10 @@ int dl_runtime_exceeded(struct rq *rq, struct sched_dl_entity *dl_se) { bool dmiss = dl_time_before(dl_se->deadline, rq_clock(rq)); - if (dmiss && dl_se->runtime > 0) + if (dmiss && dl_se->runtime > 0) { dl_se->runtime = 0; + ++dl_se->nr_underrun_sched; + } return (dl_se->runtime <= 0); } diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 38f0193..904b43f 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -957,6 +957,9 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) if (p->policy == SCHED_DEADLINE) { P(dl.runtime); P(dl.deadline); + P(dl.nr_underrun_sched); + P(dl.nr_underrun_block); + P(dl.nr_underrun_yield); } #undef PN_SCHEDSTAT #undef PN -- 1.8.3.1