From: Xuewen Yan <xuewen....@unisoc.com> when a task dequeued, it would update it's util and cfs_rq's util, the following calling relationship exists here:
update_load_avg() -> cfs_rq_util_change() -> cpufreq_update_util()-> sugov_update_[shared\|single] -> sugov_get_util() -> cpu_util_cfs(); util = max {rq->cfs.avg.util_avg,rq->cfs.avg.util_est.enqueued }; For those tasks alternate long and short runs scenarios, the rq->cfs.avg.util_est.enqueued may be bigger than rq->cfs.avg.util_avg. but because the _task_util_est(p) didn't be subtracted, this would cause schedutil use an old util_est.enqueued value. This could have an effect in task ramp-down and ramp-up scenarios. when the task dequeued, we hope the freq could be reduced as soon as possible. If the schedutil's value is the old util_est.enqueued, this may cause the cpu couldn't reduce it's freq. separate the util_est_dequeue() into util_est_dequeue() and util_est_update(), and dequeue the _task_util_est(p) before update util. Signed-off-by: Xuewen Yan <xuewen....@unisoc.com> Signed-off-by: Dietmar Eggemann <dietmar.eggem...@arm.com> --- Changes since v1: -change the util_est_dequeue/update to inline type -use unsigned int enqueued rather than util_est in util_est_dequeue -remove "cpu" var --- kernel/sched/fair.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ae7ceba..864d6b9 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3945,22 +3945,31 @@ static inline bool within_margin(int value, int margin) return ((unsigned int)(value + margin - 1) < (2 * margin - 1)); } -static void -util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) +static inline void util_est_dequeue(struct cfs_rq *cfs_rq, + struct task_struct *p) { - long last_ewma_diff; - struct util_est ue; - int cpu; + unsigned int enqueued; if (!sched_feat(UTIL_EST)) return; /* Update root cfs_rq's estimated utilization */ - ue.enqueued = cfs_rq->avg.util_est.enqueued; - ue.enqueued -= min_t(unsigned int, ue.enqueued, _task_util_est(p)); - WRITE_ONCE(cfs_rq->avg.util_est.enqueued, ue.enqueued); + enqueued = cfs_rq->avg.util_est.enqueued; + enqueued -= min_t(unsigned int, enqueued, _task_util_est(p)); + WRITE_ONCE(cfs_rq->avg.util_est.enqueued, enqueued); trace_sched_util_est_cfs_tp(cfs_rq); +} + +static inline void util_est_update(struct cfs_rq *cfs_rq, + struct task_struct *p, + bool task_sleep) +{ + long last_ewma_diff; + struct util_est ue; + + if (!sched_feat(UTIL_EST)) + return; /* * Skip update of task's estimated utilization when the task has not @@ -4001,8 +4010,7 @@ static inline bool within_margin(int value, int margin) * To avoid overestimation of actual task utilization, skip updates if * we cannot grant there is idle time in this CPU. */ - cpu = cpu_of(rq_of(cfs_rq)); - if (task_util(p) > capacity_orig_of(cpu)) + if (task_util(p) > capacity_orig_of(cpu_of(rq_of(cfs_rq)))) return; /* @@ -4085,7 +4093,10 @@ static inline int newidle_balance(struct rq *rq, struct rq_flags *rf) util_est_enqueue(struct cfs_rq *cfs_rq, struct task_struct *p) {} static inline void -util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p, +util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p) {} + +static inline void +util_est_update(struct cfs_rq *cfs_rq, struct task_struct *p, bool task_sleep) {} static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {} @@ -5589,6 +5600,8 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) int idle_h_nr_running = task_has_idle_policy(p); bool was_sched_idle = sched_idle_rq(rq); + util_est_dequeue(&rq->cfs, p); + for_each_sched_entity(se) { cfs_rq = cfs_rq_of(se); dequeue_entity(cfs_rq, se, flags); @@ -5639,7 +5652,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) rq->next_balance = jiffies; dequeue_throttle: - util_est_dequeue(&rq->cfs, p, task_sleep); + util_est_update(&rq->cfs, p, task_sleep); hrtick_update(rq); } -- 1.9.1