Re: hardclock(9), roundrobin: make roundrobin() an independent clock interrupt
On Thu, Aug 10, 2023 at 07:32:05PM +0200, Martin Pieuchot wrote: > On 10/08/23(Thu) 12:18, Scott Cheloha wrote: > > On Thu, Aug 10, 2023 at 01:05:27PM +0200, Martin Pieuchot wrote: > > [...] > > > Can we get rid of `hardclock_period' and use a variable set to 100ms? > > > This should be tested on alpha which has a hz of 1024 but I'd argue this > > > is an improvement. > > > > Sure, that's cleaner. The updated patch below adds a new > > "roundrobin_period" variable initialized during clockintr_init(). > > I'd rather see this variable initialized in sched_bsd.c to 100ms without > depending on `hz'. Is is possible? My point is to untangle this completely > from `hz'. Yes, but we need to do that in a separate patch. This patch isolates roundrobin() from hardclock() without changing any other behavior. We can then use the isolated roundrobin() as a known-working starting point for e.g.: const uint32_t roundrobin_period = 1; /* 100ms */ Index: kern/sched_bsd.c === RCS file: /cvs/src/sys/kern/sched_bsd.c,v retrieving revision 1.79 diff -u -p -r1.79 sched_bsd.c --- kern/sched_bsd.c5 Aug 2023 20:07:55 - 1.79 +++ kern/sched_bsd.c11 Aug 2023 02:47:03 - @@ -54,9 +54,8 @@ #include #endif - +uint32_t roundrobin_period;/* [I] roundrobin period (ns) */ intlbolt; /* once a second sleep address */ -intrrticks_init; /* # of hardclock ticks per roundrobin() */ #ifdef MULTIPROCESSOR struct __mp_lock sched_lock; @@ -69,21 +68,23 @@ uint32_tdecay_aftersleep(uint32_t, uin * Force switch among equal priority processes every 100ms. */ void -roundrobin(struct cpu_info *ci) +roundrobin(struct clockintr *cl, void *cf) { + struct cpu_info *ci = curcpu(); struct schedstate_percpu *spc = >ci_schedstate; + uint64_t count; - spc->spc_rrticks = rrticks_init; + count = clockintr_advance(cl, roundrobin_period); if (ci->ci_curproc != NULL) { - if (spc->spc_schedflags & SPCF_SEENRR) { + if (spc->spc_schedflags & SPCF_SEENRR || count >= 2) { /* * The process has already been through a roundrobin * without switching and may be hogging the CPU. * Indicate that the process should yield. */ atomic_setbits_int(>spc_schedflags, - SPCF_SHOULDYIELD); + SPCF_SEENRR | SPCF_SHOULDYIELD); } else { atomic_setbits_int(>spc_schedflags, SPCF_SEENRR); @@ -695,8 +696,6 @@ scheduler_start(void) * its job. */ timeout_set(_to, schedcpu, _to); - - rrticks_init = hz / 10; schedcpu(_to); #ifndef SMALL_KERNEL Index: kern/kern_sched.c === RCS file: /cvs/src/sys/kern/kern_sched.c,v retrieving revision 1.84 diff -u -p -r1.84 kern_sched.c --- kern/kern_sched.c 5 Aug 2023 20:07:55 - 1.84 +++ kern/kern_sched.c 11 Aug 2023 02:47:03 - @@ -102,6 +102,12 @@ sched_init_cpu(struct cpu_info *ci) if (spc->spc_profclock == NULL) panic("%s: clockintr_establish profclock", __func__); } + if (spc->spc_roundrobin == NULL) { + spc->spc_roundrobin = clockintr_establish(>ci_queue, + roundrobin); + if (spc->spc_roundrobin == NULL) + panic("%s: clockintr_establish roundrobin", __func__); + } kthread_create_deferred(sched_kthreads_create, ci); Index: kern/kern_clockintr.c === RCS file: /cvs/src/sys/kern/kern_clockintr.c,v retrieving revision 1.30 diff -u -p -r1.30 kern_clockintr.c --- kern/kern_clockintr.c 5 Aug 2023 20:07:55 - 1.30 +++ kern/kern_clockintr.c 11 Aug 2023 02:47:03 - @@ -69,6 +69,7 @@ clockintr_init(u_int flags) KASSERT(hz > 0 && hz <= 10); hardclock_period = 10 / hz; + roundrobin_period = hardclock_period * 10; KASSERT(stathz >= 1 && stathz <= 10); @@ -204,6 +205,11 @@ clockintr_cpu_init(const struct intrcloc clockintr_stagger(spc->spc_profclock, profclock_period, multiplier, MAXCPUS); } + if (spc->spc_roundrobin->cl_expiration == 0) { + clockintr_stagger(spc->spc_roundrobin, hardclock_period, + multiplier, MAXCPUS); + } + clockintr_advance(spc->spc_roundrobin, roundrobin_period); if (reset_cq_intrclock) SET(cq->cq_flags, CQ_INTRCLOCK); Index: kern/kern_clock.c === RCS file:
Re: hardclock(9), roundrobin: make roundrobin() an independent clock interrupt
On 10/08/23(Thu) 12:18, Scott Cheloha wrote: > On Thu, Aug 10, 2023 at 01:05:27PM +0200, Martin Pieuchot wrote: > [...] > > Can we get rid of `hardclock_period' and use a variable set to 100ms? > > This should be tested on alpha which has a hz of 1024 but I'd argue this > > is an improvement. > > Sure, that's cleaner. The updated patch below adds a new > "roundrobin_period" variable initialized during clockintr_init(). I'd rather see this variable initialized in sched_bsd.c to 100ms without depending on `hz'. Is is possible? My point is to untangle this completely from `hz'. > Index: kern/sched_bsd.c > === > RCS file: /cvs/src/sys/kern/sched_bsd.c,v > retrieving revision 1.79 > diff -u -p -r1.79 sched_bsd.c > --- kern/sched_bsd.c 5 Aug 2023 20:07:55 - 1.79 > +++ kern/sched_bsd.c 10 Aug 2023 17:15:53 - > @@ -54,9 +54,8 @@ > #include > #endif > > - > +uint32_t roundrobin_period; /* [I] roundrobin period (ns) */ > int lbolt; /* once a second sleep address */ > -int rrticks_init; /* # of hardclock ticks per roundrobin() */ > > #ifdef MULTIPROCESSOR > struct __mp_lock sched_lock; > @@ -69,21 +68,23 @@ uint32_t decay_aftersleep(uint32_t, uin > * Force switch among equal priority processes every 100ms. > */ > void > -roundrobin(struct cpu_info *ci) > +roundrobin(struct clockintr *cl, void *cf) > { > + struct cpu_info *ci = curcpu(); > struct schedstate_percpu *spc = >ci_schedstate; > + uint64_t count; > > - spc->spc_rrticks = rrticks_init; > + count = clockintr_advance(cl, roundrobin_period); > > if (ci->ci_curproc != NULL) { > - if (spc->spc_schedflags & SPCF_SEENRR) { > + if (spc->spc_schedflags & SPCF_SEENRR || count >= 2) { > /* >* The process has already been through a roundrobin >* without switching and may be hogging the CPU. >* Indicate that the process should yield. >*/ > atomic_setbits_int(>spc_schedflags, > - SPCF_SHOULDYIELD); > + SPCF_SEENRR | SPCF_SHOULDYIELD); > } else { > atomic_setbits_int(>spc_schedflags, > SPCF_SEENRR); > @@ -695,8 +696,6 @@ scheduler_start(void) >* its job. >*/ > timeout_set(_to, schedcpu, _to); > - > - rrticks_init = hz / 10; > schedcpu(_to); > > #ifndef SMALL_KERNEL > Index: kern/kern_sched.c > === > RCS file: /cvs/src/sys/kern/kern_sched.c,v > retrieving revision 1.84 > diff -u -p -r1.84 kern_sched.c > --- kern/kern_sched.c 5 Aug 2023 20:07:55 - 1.84 > +++ kern/kern_sched.c 10 Aug 2023 17:15:53 - > @@ -102,6 +102,12 @@ sched_init_cpu(struct cpu_info *ci) > if (spc->spc_profclock == NULL) > panic("%s: clockintr_establish profclock", __func__); > } > + if (spc->spc_roundrobin == NULL) { > + spc->spc_roundrobin = clockintr_establish(>ci_queue, > + roundrobin); > + if (spc->spc_roundrobin == NULL) > + panic("%s: clockintr_establish roundrobin", __func__); > + } > > kthread_create_deferred(sched_kthreads_create, ci); > > Index: kern/kern_clockintr.c > === > RCS file: /cvs/src/sys/kern/kern_clockintr.c,v > retrieving revision 1.30 > diff -u -p -r1.30 kern_clockintr.c > --- kern/kern_clockintr.c 5 Aug 2023 20:07:55 - 1.30 > +++ kern/kern_clockintr.c 10 Aug 2023 17:15:53 - > @@ -69,6 +69,7 @@ clockintr_init(u_int flags) > > KASSERT(hz > 0 && hz <= 10); > hardclock_period = 10 / hz; > + roundrobin_period = hardclock_period * 10; > > KASSERT(stathz >= 1 && stathz <= 10); > > @@ -204,6 +205,11 @@ clockintr_cpu_init(const struct intrcloc > clockintr_stagger(spc->spc_profclock, profclock_period, > multiplier, MAXCPUS); > } > + if (spc->spc_roundrobin->cl_expiration == 0) { > + clockintr_stagger(spc->spc_roundrobin, hardclock_period, > + multiplier, MAXCPUS); > + } > + clockintr_advance(spc->spc_roundrobin, roundrobin_period); > > if (reset_cq_intrclock) > SET(cq->cq_flags, CQ_INTRCLOCK); > Index: kern/kern_clock.c > === > RCS file: /cvs/src/sys/kern/kern_clock.c,v > retrieving revision 1.111 > diff -u -p -r1.111 kern_clock.c > --- kern/kern_clock.c 5 Aug 2023 20:07:55 - 1.111 > +++ kern/kern_clock.c 10 Aug 2023 17:15:54 - > @@ -113,9 +113,6 @@ hardclock(struct clockframe *frame) > { > struct cpu_info
Re: hardclock(9), roundrobin: make roundrobin() an independent clock interrupt
On Thu, Aug 10, 2023 at 01:05:27PM +0200, Martin Pieuchot wrote: > On 05/08/23(Sat) 17:17, Scott Cheloha wrote: > > This is the next piece of the clock interrupt reorganization patch > > series. > > The round robin logic is here to make sure process doesn't hog a CPU. > The period to tell a process it should yield doesn't have to be tied > to the hardclock period. We want to be sure a process doesn't run more > than 100ms at a time. > Is the priority of this new clock interrupt the same as the hardlock? Yes. Clock interrupts on a given CPU are dispatched in order of expiration. If two clock interrupts on the same CPU have the same expiration value they are dispatched in FIFO order. > I don't understand what clockintr_advance() is doing. Maybe you could > write a manual for it? clockintr_advance() is a convenience wrapper for clockintr_schedule(). It reschedules periodic interrupts without drift. The manpage update is a work in progress. > I'm afraid we could wait 200ms now? Or what `count' of 2 mean? No. roundrobin() is still scheduled to run every 100ms. The code change ensures we properly account for situations where roundrobin() is so late that two or more roundrobin periods have elapsed: > @@ -69,21 +68,23 @@ uint32_t decay_aftersleep(uint32_t, uin > * Force switch among equal priority processes every 100ms. > */ > void > -roundrobin(struct cpu_info *ci) > +roundrobin(struct clockintr *cl, void *cf) > { > + struct cpu_info *ci = curcpu(); > struct schedstate_percpu *spc = >ci_schedstate; > + uint64_t count; > > - spc->spc_rrticks = rrticks_init; > + count = clockintr_advance(cl, roundrobin_period); > > if (ci->ci_curproc != NULL) { > - if (spc->spc_schedflags & SPCF_SEENRR) { > + if (spc->spc_schedflags & SPCF_SEENRR || count >= 2) { > /* >* The process has already been through a roundrobin >* without switching and may be hogging the CPU. >* Indicate that the process should yield. >*/ > atomic_setbits_int(>spc_schedflags, > - SPCF_SHOULDYIELD); > + SPCF_SEENRR | SPCF_SHOULDYIELD); > } else { > atomic_setbits_int(>spc_schedflags, > SPCF_SEENRR); In such a situation, we want to set both SPCF_SEENRR and SPCF_SHOULDYIELD on the thread. This simulates what would have happened under normal circumstances, i.e. the thread would have been interrupted by roundrobin() two separate times. > Same question for clockintr_stagger(). clockintr_stagger() adjusts the starting offset for the given clock interrupt. We use it to keep identical clock interrupts from expiring simultaneously across every CPU in the system. > Can we get rid of `hardclock_period' and use a variable set to 100ms? > This should be tested on alpha which has a hz of 1024 but I'd argue this > is an improvement. Sure, that's cleaner. The updated patch below adds a new "roundrobin_period" variable initialized during clockintr_init(). Index: kern/sched_bsd.c === RCS file: /cvs/src/sys/kern/sched_bsd.c,v retrieving revision 1.79 diff -u -p -r1.79 sched_bsd.c --- kern/sched_bsd.c5 Aug 2023 20:07:55 - 1.79 +++ kern/sched_bsd.c10 Aug 2023 17:15:53 - @@ -54,9 +54,8 @@ #include #endif - +uint32_t roundrobin_period;/* [I] roundrobin period (ns) */ intlbolt; /* once a second sleep address */ -intrrticks_init; /* # of hardclock ticks per roundrobin() */ #ifdef MULTIPROCESSOR struct __mp_lock sched_lock; @@ -69,21 +68,23 @@ uint32_tdecay_aftersleep(uint32_t, uin * Force switch among equal priority processes every 100ms. */ void -roundrobin(struct cpu_info *ci) +roundrobin(struct clockintr *cl, void *cf) { + struct cpu_info *ci = curcpu(); struct schedstate_percpu *spc = >ci_schedstate; + uint64_t count; - spc->spc_rrticks = rrticks_init; + count = clockintr_advance(cl, roundrobin_period); if (ci->ci_curproc != NULL) { - if (spc->spc_schedflags & SPCF_SEENRR) { + if (spc->spc_schedflags & SPCF_SEENRR || count >= 2) { /* * The process has already been through a roundrobin * without switching and may be hogging the CPU. * Indicate that the process should yield. */ atomic_setbits_int(>spc_schedflags, - SPCF_SHOULDYIELD); + SPCF_SEENRR | SPCF_SHOULDYIELD); } else { atomic_setbits_int(>spc_schedflags, SPCF_SEENRR); @@ -695,8 +696,6 @@
Re: hardclock(9), roundrobin: make roundrobin() an independent clock interrupt
On 05/08/23(Sat) 17:17, Scott Cheloha wrote: > This is the next piece of the clock interrupt reorganization patch > series. The round robin logic is here to make sure process doesn't hog a CPU. The period to tell a process it should yield doesn't have to be tied to the hardclock period. We want to be sure a process doesn't run more than 100ms at a time. Is the priority of this new clock interrupt the same as the hardlock? I don't understand what clockintr_advance() is doing. Maybe you could write a manual for it? I'm afraid we could wait 200ms now? Or what `count' of 2 mean? Same question for clockintr_stagger(). Can we get rid of `hardclock_period' and use a variable set to 100ms? This should be tested on alpha which has a hz of 1024 but I'd argue this is an improvement. > This patch removes the roundrobin() call from hardclock() and makes > roundrobin() an independent clock interrupt. > > - Revise roundrobin() to make it a valid clock interrupt callback. > It remains periodic. It still runs at one tenth of the hardclock > frequency. > > - Account for multiple expirations in roundrobin(). If two or more > intervals have elapsed we set SPCF_SHOULDYIELD immediately. > > This preserves existing behavior: hardclock() is called multiple > times during clockintr_hardclock() if clock interrupts are blocked > for long enough. > > - Each schedstate_percpu has its own roundrobin() handle, spc_roundrobin. > spc_roundrobin is established during sched_init_cpu(), staggered during > the first clockintr_cpu_init() call, and advanced during > clockintr_cpu_init(). > Expirations during suspend/resume are discarded. > > - spc_rrticks and rrticks_init are now useless. Delete them. > > ok? > > Also, yes, I see the growing pile of scheduler-controlled clock > interrupt handles. My current plan is to move the setup code at the > end of clockintr_cpu_init() to a different routine, maybe something > like "sched_start_cpu()". On the primary CPU you'd call it immediately > after cpu_initclocks(). On secondary CPUs you'd call it at the end of > cpu_hatch() just before cpu_switchto(). > > In any case, we will need to find a home for that code someplace. It > can't stay in clockintr_cpu_init() forever. > > Index: kern/sched_bsd.c > === > RCS file: /cvs/src/sys/kern/sched_bsd.c,v > retrieving revision 1.79 > diff -u -p -r1.79 sched_bsd.c > --- kern/sched_bsd.c 5 Aug 2023 20:07:55 - 1.79 > +++ kern/sched_bsd.c 5 Aug 2023 22:15:25 - > @@ -56,7 +56,6 @@ > > > int lbolt; /* once a second sleep address */ > -int rrticks_init; /* # of hardclock ticks per roundrobin() */ > > #ifdef MULTIPROCESSOR > struct __mp_lock sched_lock; > @@ -69,21 +68,23 @@ uint32_t decay_aftersleep(uint32_t, uin > * Force switch among equal priority processes every 100ms. > */ > void > -roundrobin(struct cpu_info *ci) > +roundrobin(struct clockintr *cl, void *cf) > { > + struct cpu_info *ci = curcpu(); > struct schedstate_percpu *spc = >ci_schedstate; > + uint64_t count; > > - spc->spc_rrticks = rrticks_init; > + count = clockintr_advance(cl, hardclock_period * 10); > > if (ci->ci_curproc != NULL) { > - if (spc->spc_schedflags & SPCF_SEENRR) { > + if (spc->spc_schedflags & SPCF_SEENRR || count >= 2) { > /* >* The process has already been through a roundrobin >* without switching and may be hogging the CPU. >* Indicate that the process should yield. >*/ > atomic_setbits_int(>spc_schedflags, > - SPCF_SHOULDYIELD); > + SPCF_SEENRR | SPCF_SHOULDYIELD); > } else { > atomic_setbits_int(>spc_schedflags, > SPCF_SEENRR); > @@ -695,8 +696,6 @@ scheduler_start(void) >* its job. >*/ > timeout_set(_to, schedcpu, _to); > - > - rrticks_init = hz / 10; > schedcpu(_to); > > #ifndef SMALL_KERNEL > Index: kern/kern_sched.c > === > RCS file: /cvs/src/sys/kern/kern_sched.c,v > retrieving revision 1.84 > diff -u -p -r1.84 kern_sched.c > --- kern/kern_sched.c 5 Aug 2023 20:07:55 - 1.84 > +++ kern/kern_sched.c 5 Aug 2023 22:15:25 - > @@ -102,6 +102,12 @@ sched_init_cpu(struct cpu_info *ci) > if (spc->spc_profclock == NULL) > panic("%s: clockintr_establish profclock", __func__); > } > + if (spc->spc_roundrobin == NULL) { > + spc->spc_roundrobin = clockintr_establish(>ci_queue, > + roundrobin); > + if (spc->spc_roundrobin == NULL) > + panic("%s: clockintr_establish roundrobin", __func__); > + } >
hardclock(9), roundrobin: make roundrobin() an independent clock interrupt
This is the next piece of the clock interrupt reorganization patch series. This patch removes the roundrobin() call from hardclock() and makes roundrobin() an independent clock interrupt. - Revise roundrobin() to make it a valid clock interrupt callback. It remains periodic. It still runs at one tenth of the hardclock frequency. - Account for multiple expirations in roundrobin(). If two or more intervals have elapsed we set SPCF_SHOULDYIELD immediately. This preserves existing behavior: hardclock() is called multiple times during clockintr_hardclock() if clock interrupts are blocked for long enough. - Each schedstate_percpu has its own roundrobin() handle, spc_roundrobin. spc_roundrobin is established during sched_init_cpu(), staggered during the first clockintr_cpu_init() call, and advanced during clockintr_cpu_init(). Expirations during suspend/resume are discarded. - spc_rrticks and rrticks_init are now useless. Delete them. ok? Also, yes, I see the growing pile of scheduler-controlled clock interrupt handles. My current plan is to move the setup code at the end of clockintr_cpu_init() to a different routine, maybe something like "sched_start_cpu()". On the primary CPU you'd call it immediately after cpu_initclocks(). On secondary CPUs you'd call it at the end of cpu_hatch() just before cpu_switchto(). In any case, we will need to find a home for that code someplace. It can't stay in clockintr_cpu_init() forever. Index: kern/sched_bsd.c === RCS file: /cvs/src/sys/kern/sched_bsd.c,v retrieving revision 1.79 diff -u -p -r1.79 sched_bsd.c --- kern/sched_bsd.c5 Aug 2023 20:07:55 - 1.79 +++ kern/sched_bsd.c5 Aug 2023 22:15:25 - @@ -56,7 +56,6 @@ intlbolt; /* once a second sleep address */ -intrrticks_init; /* # of hardclock ticks per roundrobin() */ #ifdef MULTIPROCESSOR struct __mp_lock sched_lock; @@ -69,21 +68,23 @@ uint32_tdecay_aftersleep(uint32_t, uin * Force switch among equal priority processes every 100ms. */ void -roundrobin(struct cpu_info *ci) +roundrobin(struct clockintr *cl, void *cf) { + struct cpu_info *ci = curcpu(); struct schedstate_percpu *spc = >ci_schedstate; + uint64_t count; - spc->spc_rrticks = rrticks_init; + count = clockintr_advance(cl, hardclock_period * 10); if (ci->ci_curproc != NULL) { - if (spc->spc_schedflags & SPCF_SEENRR) { + if (spc->spc_schedflags & SPCF_SEENRR || count >= 2) { /* * The process has already been through a roundrobin * without switching and may be hogging the CPU. * Indicate that the process should yield. */ atomic_setbits_int(>spc_schedflags, - SPCF_SHOULDYIELD); + SPCF_SEENRR | SPCF_SHOULDYIELD); } else { atomic_setbits_int(>spc_schedflags, SPCF_SEENRR); @@ -695,8 +696,6 @@ scheduler_start(void) * its job. */ timeout_set(_to, schedcpu, _to); - - rrticks_init = hz / 10; schedcpu(_to); #ifndef SMALL_KERNEL Index: kern/kern_sched.c === RCS file: /cvs/src/sys/kern/kern_sched.c,v retrieving revision 1.84 diff -u -p -r1.84 kern_sched.c --- kern/kern_sched.c 5 Aug 2023 20:07:55 - 1.84 +++ kern/kern_sched.c 5 Aug 2023 22:15:25 - @@ -102,6 +102,12 @@ sched_init_cpu(struct cpu_info *ci) if (spc->spc_profclock == NULL) panic("%s: clockintr_establish profclock", __func__); } + if (spc->spc_roundrobin == NULL) { + spc->spc_roundrobin = clockintr_establish(>ci_queue, + roundrobin); + if (spc->spc_roundrobin == NULL) + panic("%s: clockintr_establish roundrobin", __func__); + } kthread_create_deferred(sched_kthreads_create, ci); Index: kern/kern_clockintr.c === RCS file: /cvs/src/sys/kern/kern_clockintr.c,v retrieving revision 1.30 diff -u -p -r1.30 kern_clockintr.c --- kern/kern_clockintr.c 5 Aug 2023 20:07:55 - 1.30 +++ kern/kern_clockintr.c 5 Aug 2023 22:15:25 - @@ -204,6 +204,11 @@ clockintr_cpu_init(const struct intrcloc clockintr_stagger(spc->spc_profclock, profclock_period, multiplier, MAXCPUS); } + if (spc->spc_roundrobin->cl_expiration == 0) { + clockintr_stagger(spc->spc_roundrobin, hardclock_period, + multiplier, MAXCPUS); + } + clockintr_advance(spc->spc_roundrobin, hardclock_period * 10); if