Re: [PATCH v3 2/4] sched/fair: Make CFS bandwidth controller burstable
On Fri, Mar 12, 2021 at 09:54:33PM +0800, changhuaixin wrote: > > On Mar 10, 2021, at 9:04 PM, Peter Zijlstra wrote: > > There's already an #ifdef block that contains that bandwidth_slice > > thing, see the previous hunk, so why create a new #ifdef here? > > > > Also, personally I think percentages are over-represented as members of > > Q. > > > Sorry, I don't quite understand the "members of Q". Is this saying that the > percentages > are over-designed here? You know the number groups (in order): N, Z, Q, R, C, H, O. Percent being 1/100 is a fraction and thus part of Q (and anything higher ofcourse). Some people seem to think percent is magical and special. It's just a fraction like the inifinite many others in Q. It's also a very crappy one when we consider computers. Basically I hate percentages, they're nothing special and often employed where they should not be.
Re: [PATCH v3 2/4] sched/fair: Make CFS bandwidth controller burstable
> On Mar 10, 2021, at 9:04 PM, Peter Zijlstra wrote: > > On Thu, Jan 21, 2021 at 07:04:51PM +0800, Huaixin Chang wrote: >> Accumulate unused quota from previous periods, thus accumulated >> bandwidth runtime can be used in the following periods. During >> accumulation, take care of runtime overflow. Previous non-burstable >> CFS bandwidth controller only assign quota to runtime, that saves a lot. >> >> A sysctl parameter sysctl_sched_cfs_bw_burst_onset_percent is introduced to >> denote how many percent of burst is given on setting cfs bandwidth. By >> default it is 0, which means on burst is allowed unless accumulated. >> >> Also, parameter sysctl_sched_cfs_bw_burst_enabled is introduced as a >> switch for burst. It is enabled by default. >> >> Signed-off-by: Huaixin Chang >> Signed-off-by: Shanpei Chen > > Identical invalid SoB chain. > >> Reported-by: kernel test robot > > What exactly did the robot report; the whole patch? A warning is reported by the robot. And I have fixed it in this series. I'll remove this line, since it seems unnecessary. > >> --- >> include/linux/sched/sysctl.h | 2 ++ >> kernel/sched/core.c | 31 + >> kernel/sched/fair.c | 47 >> >> kernel/sched/sched.h | 4 ++-- >> kernel/sysctl.c | 18 + >> 5 files changed, 88 insertions(+), 14 deletions(-) >> >> diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h >> index 3c31ba88aca5..3400828eaf2d 100644 >> --- a/include/linux/sched/sysctl.h >> +++ b/include/linux/sched/sysctl.h >> @@ -72,6 +72,8 @@ extern unsigned int >> sysctl_sched_uclamp_util_min_rt_default; >> >> #ifdef CONFIG_CFS_BANDWIDTH >> extern unsigned int sysctl_sched_cfs_bandwidth_slice; >> +extern unsigned int sysctl_sched_cfs_bw_burst_onset_percent; >> +extern unsigned int sysctl_sched_cfs_bw_burst_enabled; >> #endif >> >> #ifdef CONFIG_SCHED_AUTOGROUP >> diff --git a/kernel/sched/core.c b/kernel/sched/core.c >> index 48d3bad12be2..fecf0f05ef0c 100644 >> --- a/kernel/sched/core.c >> +++ b/kernel/sched/core.c >> @@ -66,6 +66,16 @@ const_debug unsigned int sysctl_sched_features = >> */ >> const_debug unsigned int sysctl_sched_nr_migrate = 32; >> >> +#ifdef CONFIG_CFS_BANDWIDTH >> +/* >> + * Percent of burst assigned to cfs_b->runtime on tg_set_cfs_bandwidth, >> + * 0 by default. >> + */ >> +unsigned int sysctl_sched_cfs_bw_burst_onset_percent; >> + >> +unsigned int sysctl_sched_cfs_bw_burst_enabled = 1; >> +#endif > > There's already an #ifdef block that contains that bandwidth_slice > thing, see the previous hunk, so why create a new #ifdef here? > > Also, personally I think percentages are over-represented as members of > Q. > Sorry, I don't quite understand the "members of Q". Is this saying that the percentages are over-designed here? >> @@ -7891,7 +7901,7 @@ static DEFINE_MUTEX(cfs_constraints_mutex); >> const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */ >> static const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */ >> /* More than 203 days if BW_SHIFT equals 20. */ >> -static const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC; >> +const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC; >> >> static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime); >> >> @@ -7900,7 +7910,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, >> u64 period, u64 quota, >> { >> int i, ret = 0, runtime_enabled, runtime_was_enabled; >> struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; >> -u64 buffer; >> +u64 buffer, burst_onset; >> >> if (tg == &root_task_group) >> return -EINVAL; >> @@ -7961,11 +7971,24 @@ static int tg_set_cfs_bandwidth(struct task_group >> *tg, u64 period, u64 quota, >> cfs_b->burst = burst; >> cfs_b->buffer = buffer; >> >> -__refill_cfs_bandwidth_runtime(cfs_b); >> +cfs_b->max_overrun = DIV_ROUND_UP_ULL(max_cfs_runtime, quota); >> +cfs_b->runtime = cfs_b->quota; >> + >> +/* burst_onset needed */ >> +if (cfs_b->quota != RUNTIME_INF && >> +sysctl_sched_cfs_bw_burst_enabled && >> +sysctl_sched_cfs_bw_burst_onset_percent > 0) { > > 'creative' indentation again... > > Also, this gives rise to the question as to why onset_percent is > separate from enabled. Odin noticed the precent thing, too. Maybe I will remove this and let cfsb start with 0 burst. In this way, this if statement can be removed too. > >> + >> +burst_onset = do_div(burst, 100) * >> +sysctl_sched_cfs_bw_burst_onset_percent; > > and again.. > >> + >> +cfs_b->runtime += burst_onset; >> +cfs_b->runtime = min(max_cfs_runtime, cfs_b->runtime); >> +} >> >> /* Restart the period timer (if active) to handle new period expiry: */ >> if (runtime_enabled) >> -start_cfs_bandwidth(cfs_b); >> +start_cfs_b
Re: [PATCH v3 2/4] sched/fair: Make CFS bandwidth controller burstable
On Thu, Jan 21, 2021 at 07:04:51PM +0800, Huaixin Chang wrote: > Accumulate unused quota from previous periods, thus accumulated > bandwidth runtime can be used in the following periods. During > accumulation, take care of runtime overflow. Previous non-burstable > CFS bandwidth controller only assign quota to runtime, that saves a lot. > > A sysctl parameter sysctl_sched_cfs_bw_burst_onset_percent is introduced to > denote how many percent of burst is given on setting cfs bandwidth. By > default it is 0, which means on burst is allowed unless accumulated. > > Also, parameter sysctl_sched_cfs_bw_burst_enabled is introduced as a > switch for burst. It is enabled by default. > > Signed-off-by: Huaixin Chang > Signed-off-by: Shanpei Chen Identical invalid SoB chain. > Reported-by: kernel test robot What exactly did the robot report; the whole patch? > --- > include/linux/sched/sysctl.h | 2 ++ > kernel/sched/core.c | 31 + > kernel/sched/fair.c | 47 > > kernel/sched/sched.h | 4 ++-- > kernel/sysctl.c | 18 + > 5 files changed, 88 insertions(+), 14 deletions(-) > > diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h > index 3c31ba88aca5..3400828eaf2d 100644 > --- a/include/linux/sched/sysctl.h > +++ b/include/linux/sched/sysctl.h > @@ -72,6 +72,8 @@ extern unsigned int sysctl_sched_uclamp_util_min_rt_default; > > #ifdef CONFIG_CFS_BANDWIDTH > extern unsigned int sysctl_sched_cfs_bandwidth_slice; > +extern unsigned int sysctl_sched_cfs_bw_burst_onset_percent; > +extern unsigned int sysctl_sched_cfs_bw_burst_enabled; > #endif > > #ifdef CONFIG_SCHED_AUTOGROUP > diff --git a/kernel/sched/core.c b/kernel/sched/core.c > index 48d3bad12be2..fecf0f05ef0c 100644 > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -66,6 +66,16 @@ const_debug unsigned int sysctl_sched_features = > */ > const_debug unsigned int sysctl_sched_nr_migrate = 32; > > +#ifdef CONFIG_CFS_BANDWIDTH > +/* > + * Percent of burst assigned to cfs_b->runtime on tg_set_cfs_bandwidth, > + * 0 by default. > + */ > +unsigned int sysctl_sched_cfs_bw_burst_onset_percent; > + > +unsigned int sysctl_sched_cfs_bw_burst_enabled = 1; > +#endif There's already an #ifdef block that contains that bandwidth_slice thing, see the previous hunk, so why create a new #ifdef here? Also, personally I think percentages are over-represented as members of Q. > @@ -7891,7 +7901,7 @@ static DEFINE_MUTEX(cfs_constraints_mutex); > const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */ > static const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */ > /* More than 203 days if BW_SHIFT equals 20. */ > -static const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC; > +const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC; > > static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime); > > @@ -7900,7 +7910,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, > u64 period, u64 quota, > { > int i, ret = 0, runtime_enabled, runtime_was_enabled; > struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; > - u64 buffer; > + u64 buffer, burst_onset; > > if (tg == &root_task_group) > return -EINVAL; > @@ -7961,11 +7971,24 @@ static int tg_set_cfs_bandwidth(struct task_group > *tg, u64 period, u64 quota, > cfs_b->burst = burst; > cfs_b->buffer = buffer; > > - __refill_cfs_bandwidth_runtime(cfs_b); > + cfs_b->max_overrun = DIV_ROUND_UP_ULL(max_cfs_runtime, quota); > + cfs_b->runtime = cfs_b->quota; > + > + /* burst_onset needed */ > + if (cfs_b->quota != RUNTIME_INF && > + sysctl_sched_cfs_bw_burst_enabled && > + sysctl_sched_cfs_bw_burst_onset_percent > 0) { 'creative' indentation again... Also, this gives rise to the question as to why onset_percent is separate from enabled. > + > + burst_onset = do_div(burst, 100) * > + sysctl_sched_cfs_bw_burst_onset_percent; and again.. > + > + cfs_b->runtime += burst_onset; > + cfs_b->runtime = min(max_cfs_runtime, cfs_b->runtime); > + } > > /* Restart the period timer (if active) to handle new period expiry: */ > if (runtime_enabled) > - start_cfs_bandwidth(cfs_b); > + start_cfs_bandwidth(cfs_b, 1); > > raw_spin_unlock_irq(&cfs_b->lock); > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c > index 6bb4f89259fd..abe6eb05fe09 100644 > --- a/kernel/sched/fair.c > +++ b/kernel/sched/fair.c > @@ -4598,10 +4598,23 @@ static inline u64 sched_cfs_bandwidth_slice(void) > * > * requires cfs_b->lock > */ > -void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) > +static void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b, > + u64 overrun) > { > - if (cfs_b->quota != R
[PATCH v3 2/4] sched/fair: Make CFS bandwidth controller burstable
Accumulate unused quota from previous periods, thus accumulated bandwidth runtime can be used in the following periods. During accumulation, take care of runtime overflow. Previous non-burstable CFS bandwidth controller only assign quota to runtime, that saves a lot. A sysctl parameter sysctl_sched_cfs_bw_burst_onset_percent is introduced to denote how many percent of burst is given on setting cfs bandwidth. By default it is 0, which means on burst is allowed unless accumulated. Also, parameter sysctl_sched_cfs_bw_burst_enabled is introduced as a switch for burst. It is enabled by default. Signed-off-by: Huaixin Chang Signed-off-by: Shanpei Chen Reported-by: kernel test robot --- include/linux/sched/sysctl.h | 2 ++ kernel/sched/core.c | 31 + kernel/sched/fair.c | 47 kernel/sched/sched.h | 4 ++-- kernel/sysctl.c | 18 + 5 files changed, 88 insertions(+), 14 deletions(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 3c31ba88aca5..3400828eaf2d 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -72,6 +72,8 @@ extern unsigned int sysctl_sched_uclamp_util_min_rt_default; #ifdef CONFIG_CFS_BANDWIDTH extern unsigned int sysctl_sched_cfs_bandwidth_slice; +extern unsigned int sysctl_sched_cfs_bw_burst_onset_percent; +extern unsigned int sysctl_sched_cfs_bw_burst_enabled; #endif #ifdef CONFIG_SCHED_AUTOGROUP diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 48d3bad12be2..fecf0f05ef0c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -66,6 +66,16 @@ const_debug unsigned int sysctl_sched_features = */ const_debug unsigned int sysctl_sched_nr_migrate = 32; +#ifdef CONFIG_CFS_BANDWIDTH +/* + * Percent of burst assigned to cfs_b->runtime on tg_set_cfs_bandwidth, + * 0 by default. + */ +unsigned int sysctl_sched_cfs_bw_burst_onset_percent; + +unsigned int sysctl_sched_cfs_bw_burst_enabled = 1; +#endif + /* * period over which we measure -rt task CPU usage in us. * default: 1s @@ -7891,7 +7901,7 @@ static DEFINE_MUTEX(cfs_constraints_mutex); const u64 max_cfs_quota_period = 1 * NSEC_PER_SEC; /* 1s */ static const u64 min_cfs_quota_period = 1 * NSEC_PER_MSEC; /* 1ms */ /* More than 203 days if BW_SHIFT equals 20. */ -static const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC; +const u64 max_cfs_runtime = MAX_BW * NSEC_PER_USEC; static int __cfs_schedulable(struct task_group *tg, u64 period, u64 runtime); @@ -7900,7 +7910,7 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota, { int i, ret = 0, runtime_enabled, runtime_was_enabled; struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth; - u64 buffer; + u64 buffer, burst_onset; if (tg == &root_task_group) return -EINVAL; @@ -7961,11 +7971,24 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota, cfs_b->burst = burst; cfs_b->buffer = buffer; - __refill_cfs_bandwidth_runtime(cfs_b); + cfs_b->max_overrun = DIV_ROUND_UP_ULL(max_cfs_runtime, quota); + cfs_b->runtime = cfs_b->quota; + + /* burst_onset needed */ + if (cfs_b->quota != RUNTIME_INF && + sysctl_sched_cfs_bw_burst_enabled && + sysctl_sched_cfs_bw_burst_onset_percent > 0) { + + burst_onset = do_div(burst, 100) * + sysctl_sched_cfs_bw_burst_onset_percent; + + cfs_b->runtime += burst_onset; + cfs_b->runtime = min(max_cfs_runtime, cfs_b->runtime); + } /* Restart the period timer (if active) to handle new period expiry: */ if (runtime_enabled) - start_cfs_bandwidth(cfs_b); + start_cfs_bandwidth(cfs_b, 1); raw_spin_unlock_irq(&cfs_b->lock); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 6bb4f89259fd..abe6eb05fe09 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -4598,10 +4598,23 @@ static inline u64 sched_cfs_bandwidth_slice(void) * * requires cfs_b->lock */ -void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b) +static void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b, + u64 overrun) { - if (cfs_b->quota != RUNTIME_INF) - cfs_b->runtime = cfs_b->quota; + u64 refill; + + if (cfs_b->quota != RUNTIME_INF) { + + if (!sysctl_sched_cfs_bw_burst_enabled) { + cfs_b->runtime = cfs_b->quota; + return; + } + + overrun = min(overrun, cfs_b->max_overrun); + refill = cfs_b->quota * overrun; + cfs_b->runtime += refill; + cfs_b->runtime = min(cfs_b->runtime, cfs_b->buffer); + } } static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct