On Wed, Mar 13, 2019 at 11:08:26AM -0400, Phil Auld wrote: > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index 310d0637fe4b..90cc67bbf592 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -4859,19 +4859,51 @@ static enum hrtimer_restart > sched_cfs_slack_timer(struct hrtimer *timer) > return HRTIMER_NORESTART; > } > > +extern const u64 max_cfs_quota_period; > +int cfs_period_autotune_loop_limit = 8; > +int cfs_period_autotune_cushion_pct = 15; /* percentage added to period > recalculation */
static const ? > + > static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) > { > struct cfs_bandwidth *cfs_b = > container_of(timer, struct cfs_bandwidth, period_timer); > + s64 nsstart, nsnow, new_period; u64 > int overrun; > int idle = 0; > + int count = 0; > > raw_spin_lock(&cfs_b->lock); > + nsstart = ktime_to_ns(hrtimer_cb_get_time(timer)); we really should kill ktime :/ Anyway, you now do two indirect timer calls back to back :/ And this is unconditional overhead. > for (;;) { > overrun = hrtimer_forward_now(timer, cfs_b->period); > if (!overrun) > break; > > + if (++count > cfs_period_autotune_loop_limit) { > + ktime_t old_period = ktime_to_ns(cfs_b->period); > + > + nsnow = ktime_to_ns(hrtimer_cb_get_time(timer)); > + new_period = (nsnow - > nsstart)/cfs_period_autotune_loop_limit; > + > + /* Make sure new period will be larger than old. */ > + if (new_period < old_period) { > + new_period = old_period; > + } > + new_period += (new_period * > cfs_period_autotune_cushion_pct) / 100; Computers _suck_ at /100. And since you're free to pick the constant, pick a power of two, computers love those. > + > + if (new_period > max_cfs_quota_period) > + new_period = max_cfs_quota_period; > + > + cfs_b->period = ns_to_ktime(new_period); > + cfs_b->quota += (cfs_b->quota * ((new_period - > old_period) * 100)/old_period)/100; srsly!? Again, you can pick the constant to be anything, and you pick such a horrid number?! > + pr_warn_ratelimited( > + "cfs_period_timer[cpu%d]: period too short, > scaling up (new cfs_period_us %lld, cfs_quota_us = %lld)\n", > + smp_processor_id(), > cfs_b->period/NSEC_PER_USEC, cfs_b->quota/NSEC_PER_USEC); period was ktime_t, remember... > + And these here lines all all waaay too long. Also, is that complexity really needed? > + /* reset count so we don't come right back in here */ > + count = 0; > + } > + > idle = do_sched_cfs_period_timer(cfs_b, overrun); > } > if (idle) Would not something simpler like the below also work? diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index ea74d43924b2..b71557be6b42 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4885,6 +4885,8 @@ static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer) return HRTIMER_NORESTART; } +extern const u64 max_cfs_quota_period; + static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) { struct cfs_bandwidth *cfs_b = @@ -4892,6 +4894,7 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) unsigned long flags; int overrun; int idle = 0; + int count = 0; raw_spin_lock_irqsave(&cfs_b->lock, flags); for (;;) { @@ -4899,6 +4902,28 @@ static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer) if (!overrun) break; + if (++count > 3) { + u64 new, old = ktime_to_ns(cfs_b->period); + + new = (old * 147) / 128; /* ~115% */ + new = min(new, max_cfs_quota_period); + + cfs_b->period = ns_to_ktime(new); + + /* since max is 1s, this is limited to 1e9^2, which fits in u64 */ + cfs_b->quota *= new; + cfs_b->quota /= old; + + pr_warn_ratelimited( + "cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us %lld, cfs_quota_us = %lld)\n", + smp_processor_id(), + new/NSEC_PER_USEC, + cfs_b->quota/NSEC_PER_USEC); + + /* reset count so we don't come right back in here */ + count = 0; + } + idle = do_sched_cfs_period_timer(cfs_b, overrun, flags); } if (idle)