On Thu, 2018-02-08 at 14:19 -0800, Rohit Jain wrote: > This patch makes idle_balance more dynamic as the sched_migration_cost > is now accounted on a sched_domain level. This in turn is done in > sd_init when we know what the topology relationships are. > > For introduction sakes cost of migration within the same core is set as > 0, across cores is 50 usec and across sockets is 500 usec. sysctl for > these variables are introduced in patch 2. > > Signed-off-by: Rohit Jain <rohit.k.j...@oracle.com> > --- > include/linux/sched/topology.h | 1 + > kernel/sched/fair.c | 6 +++--- > kernel/sched/topology.c | 5 +++++ > 3 files changed, 9 insertions(+), 3 deletions(-) > > diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h > index cf257c2..bcb4db2 100644 > --- a/include/linux/sched/topology.h > +++ b/include/linux/sched/topology.h > @@ -104,6 +104,7 @@ struct sched_domain { > u64 max_newidle_lb_cost; > unsigned long next_decay_max_lb_cost; > > + u64 sched_migration_cost; > u64 avg_scan_cost; /* select_idle_sibling */ > > #ifdef CONFIG_SCHEDSTATS > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index 2fe3aa8..61d3508 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -8782,8 +8782,7 @@ static int idle_balance(struct rq *this_rq, struct > rq_flags *rf) > */ > rq_unpin_lock(this_rq, rf); > > - if (this_rq->avg_idle < sysctl_sched_migration_cost || > - !this_rq->rd->overload) { > + if (!this_rq->rd->overload) { > rcu_read_lock(); > sd = rcu_dereference_check_sched_domain(this_rq->sd); > if (sd)
Unexplained/unrelated change. > @@ -8804,7 +8803,8 @@ static int idle_balance(struct rq *this_rq, struct > rq_flags *rf) > if (!(sd->flags & SD_LOAD_BALANCE)) > continue; > > - if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) { > + if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost + > + sd->sched_migration_cost) { > update_next_balance(sd, &next_balance); > break; > } Ditto. > diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c > index 034cbed..bcd8c64 100644 > --- a/kernel/sched/topology.c > +++ b/kernel/sched/topology.c > @@ -1148,12 +1148,14 @@ sd_init(struct sched_domain_topology_level *tl, > sd->flags |= SD_PREFER_SIBLING; > sd->imbalance_pct = 110; > sd->smt_gain = 1178; /* ~15% */ > + sd->sched_migration_cost = 0; > > } else if (sd->flags & SD_SHARE_PKG_RESOURCES) { > sd->flags |= SD_PREFER_SIBLING; > sd->imbalance_pct = 117; > sd->cache_nice_tries = 1; > sd->busy_idx = 2; > + sd->sched_migration_cost = 500000UL; > > #ifdef CONFIG_NUMA > } else if (sd->flags & SD_NUMA) { > @@ -1162,6 +1164,7 @@ sd_init(struct sched_domain_topology_level *tl, > sd->idle_idx = 2; > > sd->flags |= SD_SERIALIZE; > + sd->sched_migration_cost = 5000000UL; That's not 500us. -Mike