On Thu, Aug 10, 2023 at 01:05:27PM +0200, Martin Pieuchot wrote: > On 05/08/23(Sat) 17:17, Scott Cheloha wrote: > > This is the next piece of the clock interrupt reorganization patch > > series. > > The round robin logic is here to make sure process doesn't hog a CPU. > The period to tell a process it should yield doesn't have to be tied > to the hardclock period. We want to be sure a process doesn't run more > than 100ms at a time.
> Is the priority of this new clock interrupt the same as the hardlock? Yes. Clock interrupts on a given CPU are dispatched in order of expiration. If two clock interrupts on the same CPU have the same expiration value they are dispatched in FIFO order. > I don't understand what clockintr_advance() is doing. Maybe you could > write a manual for it? clockintr_advance() is a convenience wrapper for clockintr_schedule(). It reschedules periodic interrupts without drift. The manpage update is a work in progress. > I'm afraid we could wait 200ms now? Or what `count' of 2 mean? No. roundrobin() is still scheduled to run every 100ms. The code change ensures we properly account for situations where roundrobin() is so late that two or more roundrobin periods have elapsed: > @@ -69,21 +68,23 @@ uint32_t decay_aftersleep(uint32_t, uin > * Force switch among equal priority processes every 100ms. > */ > void > -roundrobin(struct cpu_info *ci) > +roundrobin(struct clockintr *cl, void *cf) > { > + struct cpu_info *ci = curcpu(); > struct schedstate_percpu *spc = &ci->ci_schedstate; > + uint64_t count; > > - spc->spc_rrticks = rrticks_init; > + count = clockintr_advance(cl, roundrobin_period); > > if (ci->ci_curproc != NULL) { > - if (spc->spc_schedflags & SPCF_SEENRR) { > + if (spc->spc_schedflags & SPCF_SEENRR || count >= 2) { > /* > * The process has already been through a roundrobin > * without switching and may be hogging the CPU. > * Indicate that the process should yield. > */ > atomic_setbits_int(&spc->spc_schedflags, > - SPCF_SHOULDYIELD); > + SPCF_SEENRR | SPCF_SHOULDYIELD); > } else { > atomic_setbits_int(&spc->spc_schedflags, > SPCF_SEENRR); In such a situation, we want to set both SPCF_SEENRR and SPCF_SHOULDYIELD on the thread. This simulates what would have happened under normal circumstances, i.e. the thread would have been interrupted by roundrobin() two separate times. > Same question for clockintr_stagger(). clockintr_stagger() adjusts the starting offset for the given clock interrupt. We use it to keep identical clock interrupts from expiring simultaneously across every CPU in the system. > Can we get rid of `hardclock_period' and use a variable set to 100ms? > This should be tested on alpha which has a hz of 1024 but I'd argue this > is an improvement. Sure, that's cleaner. The updated patch below adds a new "roundrobin_period" variable initialized during clockintr_init(). Index: kern/sched_bsd.c =================================================================== RCS file: /cvs/src/sys/kern/sched_bsd.c,v retrieving revision 1.79 diff -u -p -r1.79 sched_bsd.c --- kern/sched_bsd.c 5 Aug 2023 20:07:55 -0000 1.79 +++ kern/sched_bsd.c 10 Aug 2023 17:15:53 -0000 @@ -54,9 +54,8 @@ #include <sys/ktrace.h> #endif - +uint32_t roundrobin_period; /* [I] roundrobin period (ns) */ int lbolt; /* once a second sleep address */ -int rrticks_init; /* # of hardclock ticks per roundrobin() */ #ifdef MULTIPROCESSOR struct __mp_lock sched_lock; @@ -69,21 +68,23 @@ uint32_t decay_aftersleep(uint32_t, uin * Force switch among equal priority processes every 100ms. */ void -roundrobin(struct cpu_info *ci) +roundrobin(struct clockintr *cl, void *cf) { + struct cpu_info *ci = curcpu(); struct schedstate_percpu *spc = &ci->ci_schedstate; + uint64_t count; - spc->spc_rrticks = rrticks_init; + count = clockintr_advance(cl, roundrobin_period); if (ci->ci_curproc != NULL) { - if (spc->spc_schedflags & SPCF_SEENRR) { + if (spc->spc_schedflags & SPCF_SEENRR || count >= 2) { /* * The process has already been through a roundrobin * without switching and may be hogging the CPU. * Indicate that the process should yield. */ atomic_setbits_int(&spc->spc_schedflags, - SPCF_SHOULDYIELD); + SPCF_SEENRR | SPCF_SHOULDYIELD); } else { atomic_setbits_int(&spc->spc_schedflags, SPCF_SEENRR); @@ -695,8 +696,6 @@ scheduler_start(void) * its job. */ timeout_set(&schedcpu_to, schedcpu, &schedcpu_to); - - rrticks_init = hz / 10; schedcpu(&schedcpu_to); #ifndef SMALL_KERNEL Index: kern/kern_sched.c =================================================================== RCS file: /cvs/src/sys/kern/kern_sched.c,v retrieving revision 1.84 diff -u -p -r1.84 kern_sched.c --- kern/kern_sched.c 5 Aug 2023 20:07:55 -0000 1.84 +++ kern/kern_sched.c 10 Aug 2023 17:15:53 -0000 @@ -102,6 +102,12 @@ sched_init_cpu(struct cpu_info *ci) if (spc->spc_profclock == NULL) panic("%s: clockintr_establish profclock", __func__); } + if (spc->spc_roundrobin == NULL) { + spc->spc_roundrobin = clockintr_establish(&ci->ci_queue, + roundrobin); + if (spc->spc_roundrobin == NULL) + panic("%s: clockintr_establish roundrobin", __func__); + } kthread_create_deferred(sched_kthreads_create, ci); Index: kern/kern_clockintr.c =================================================================== RCS file: /cvs/src/sys/kern/kern_clockintr.c,v retrieving revision 1.30 diff -u -p -r1.30 kern_clockintr.c --- kern/kern_clockintr.c 5 Aug 2023 20:07:55 -0000 1.30 +++ kern/kern_clockintr.c 10 Aug 2023 17:15:53 -0000 @@ -69,6 +69,7 @@ clockintr_init(u_int flags) KASSERT(hz > 0 && hz <= 1000000000); hardclock_period = 1000000000 / hz; + roundrobin_period = hardclock_period * 10; KASSERT(stathz >= 1 && stathz <= 1000000000); @@ -204,6 +205,11 @@ clockintr_cpu_init(const struct intrcloc clockintr_stagger(spc->spc_profclock, profclock_period, multiplier, MAXCPUS); } + if (spc->spc_roundrobin->cl_expiration == 0) { + clockintr_stagger(spc->spc_roundrobin, hardclock_period, + multiplier, MAXCPUS); + } + clockintr_advance(spc->spc_roundrobin, roundrobin_period); if (reset_cq_intrclock) SET(cq->cq_flags, CQ_INTRCLOCK); Index: kern/kern_clock.c =================================================================== RCS file: /cvs/src/sys/kern/kern_clock.c,v retrieving revision 1.111 diff -u -p -r1.111 kern_clock.c --- kern/kern_clock.c 5 Aug 2023 20:07:55 -0000 1.111 +++ kern/kern_clock.c 10 Aug 2023 17:15:54 -0000 @@ -113,9 +113,6 @@ hardclock(struct clockframe *frame) { struct cpu_info *ci = curcpu(); - if (--ci->ci_schedstate.spc_rrticks <= 0) - roundrobin(ci); - #if NDT > 0 DT_ENTER(profile, NULL); if (CPU_IS_PRIMARY(ci)) Index: sys/sched.h =================================================================== RCS file: /cvs/src/sys/sys/sched.h,v retrieving revision 1.60 diff -u -p -r1.60 sched.h --- sys/sched.h 5 Aug 2023 20:07:56 -0000 1.60 +++ sys/sched.h 10 Aug 2023 17:15:54 -0000 @@ -105,10 +105,10 @@ struct schedstate_percpu { u_int spc_schedticks; /* ticks for schedclock() */ u_int64_t spc_cp_time[CPUSTATES]; /* CPU state statistics */ u_char spc_curpriority; /* usrpri of curproc */ - int spc_rrticks; /* ticks until roundrobin() */ struct clockintr *spc_itimer; /* [o] itimer_update handle */ struct clockintr *spc_profclock; /* [o] profclock handle */ + struct clockintr *spc_roundrobin; /* [o] roundrobin handle */ u_int spc_nrun; /* procs on the run queues */ @@ -145,16 +145,16 @@ struct cpustats { #define NICE_WEIGHT 2 /* priorities per nice level */ #define ESTCPULIM(e) min((e), NICE_WEIGHT * PRIO_MAX - SCHED_PPQ) +extern uint32_t roundrobin_period; extern int schedhz; /* ideally: 16 */ -extern int rrticks_init; /* ticks per roundrobin() */ struct proc; void schedclock(struct proc *); -struct cpu_info; -void roundrobin(struct cpu_info *); +void roundrobin(struct clockintr *, void *); void scheduler_start(void); void userret(struct proc *p); +struct cpu_info; void sched_init_cpu(struct cpu_info *); void sched_idle(void *); void sched_exit(struct proc *);