Hi Yuyang,

On Mon, Jul 27, 2015 at 02:43:25AM +0800, Yuyang Du wrote:
> Hi Boqun,
> 
> On Tue, Jul 21, 2015 at 06:29:56PM +0800, Boqun Feng wrote:
> > The point is that you have already tracked the sum of runnable_load_avg
> > and blocked_load_avg in cfs_rq->avg.load_avg. If you're going to track
> > part of the sum, you'd better track the one that's updated less
> > frequently, right?
> > 
> > Anyway, this idea just comes into my mind. I wonder which is udpated
> > less frequently myself too. ;-) So I ask to see whether there is
> > something we can improve.
> 
> Actually, this is not the point.
> 
> 1) blocked load is more "difficult" to track, hint, migrate.
> 
> 2) r(t1) - b(t2) is not anything, hint, t1 != t2

Please consider this patch below, which is not tested yet, just for
discussion. This patch is based on 1-5 in your patchset and going to
replace patch 6. Hope this could make my point clear.

Thanks anyway for being patient with me ;-)

Regards,
Boqun

========================================================================

Subject: [PATCH] sched: lazy blocked load tracking

With this patch, cfs_rq_runnable_load_avg can be implemented as follow:

static inline unsigned long cfs_rq_runnable_load_avg(struct cfs_rq *cfs_rq)
{
        u64 now = cfs_rq_clock_task(cfs_rq);
        decay_cfs_rq_blocked_load(now, cfs_rq);

        return max_t(long, cfs_rq->avg.load_avg - cfs_rq->blocked_load_avg, 0);
}

---
 kernel/sched/fair.c  | 41 +++++++++++++++++++++++++++++++++++++++++
 kernel/sched/sched.h |  4 ++++
 2 files changed, 45 insertions(+)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e977074..76beb81 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2625,6 +2625,20 @@ static __always_inline int __update_load_avg(u64 now, 
int cpu,
        return decayed;
 }
 
+static inline u64 decay_cfs_rq_blocked_load(u64 now, struct cfs_rq *cfs_rq)
+{
+       u64 decays;
+
+       now = now >> 20;
+       decays = now - cfs_rq->last_blocked_load_decays;
+       
+       cfs_rq->blocked_load_sum = decay_load(cfs_rq->blocked_load_sum, decays);
+       cfs_rq->blocked_load_avg = div_u64(cfs->blocked_load_sum, LOAD_AVG_MAX);
+       cfs_rq->last_blocked_load_update_time = now;
+
+       return decays;
+}
+
 #ifdef CONFIG_FAIR_GROUP_SCHED
 /*
  * Updating tg's load_avg is necessary before update_cfs_share (which is done)
@@ -2656,6 +2670,12 @@ static inline int update_cfs_rq_load_avg(u64 now, struct 
cfs_rq *cfs_rq)
                long r = atomic_long_xchg(&cfs_rq->removed_load_avg, 0);
                sa->load_avg = max_t(long, sa->load_avg - r, 0);
                sa->load_sum = max_t(s64, sa->load_sum - r * LOAD_AVG_MAX, 0);
+
+               decay_cfs_rq_blocked_load(sa->last_update_time, cfs_rq);
+               cfs_rq->blocked_load_avg = max_t(long,
+                               cfs_rq->blocked_load_avg - r, 0);
+               cfs_rq->blocked_load_sum = max_t(s64,
+                               cfs_rq->blocked_load_avg - r * LOAD_AVG_MAX, 0);
        }
 
        if (atomic_long_read(&cfs_rq->removed_util_avg)) {
@@ -2719,11 +2739,32 @@ enqueue_entity_load_avg(struct cfs_rq *cfs_rq, struct 
sched_entity *se)
                cfs_rq->avg.util_avg += sa->util_avg;
                cfs_rq->avg.util_sum += sa->util_sum;
        }
+       else {
+               decay_cfs_rq_blocked_load(now, cfs_rq);
+
+               cfs_rq->blocked_load_avg = max_t(long,
+                               cfs_rq->blocked_load_avg - sa->load_avg, 0);
+               cfs_rq->blocked_load_sum = max_t(long,
+                               cfs_rq->blocked_load_sum - sa->load_sum, 0);
+       }
 
        if (decayed || migrated)
                update_tg_load_avg(cfs_rq, 0);
 }
 
+static inline void
+dequeue_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
+{
+       u64 now = cfs_rq_clock_task(cfs_rq);
+
+       update_load_avg(se, 1);
+       update_cfs_rq_load_avg(now, cfs_rq);
+       decay_cfs_rq_blocked_load(now, cfs_rq);
+
+       cfs_rq->blocked_load_sum += se->avg.load_sum;
+       cfs_rq->blocked_load_avg += se->avg.load_avg;
+}
+
 /*
  * Task first catches up with cfs_rq, and then subtract
  * itself from the cfs_rq (task must be off the queue now).
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 4d139e0..f570306 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -368,6 +368,10 @@ struct cfs_rq {
         * CFS load tracking
         */
        struct sched_avg avg;
+
+       u64 last_blocked_load_decays;
+       u64 blocked_load_sum;
+       unsigned long blocked_load_avg;
 #ifdef CONFIG_FAIR_GROUP_SCHED
        unsigned long tg_load_avg_contrib;
 #endif
-- 
2.4.6

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to