Re: hardclock(9), roundrobin: make roundrobin() an independent clock interrupt

Scott Cheloha Thu, 10 Aug 2023 10:19:17 -0700

On Thu, Aug 10, 2023 at 01:05:27PM +0200, Martin Pieuchot wrote:
> On 05/08/23(Sat) 17:17, Scott Cheloha wrote:
> > This is the next piece of the clock interrupt reorganization patch
> > series.
> 
> The round robin logic is here to make sure process doesn't hog a CPU.
> The period to tell a process it should yield doesn't have to be tied
> to the hardclock period.  We want to be sure a process doesn't run more
> than 100ms at a time.


> Is the priority of this new clock interrupt the same as the hardlock?

Yes.  Clock interrupts on a given CPU are dispatched in order of
expiration.  If two clock interrupts on the same CPU have the same
expiration value they are dispatched in FIFO order.

> I don't understand what clockintr_advance() is doing.  Maybe you could
> write a manual for it?

clockintr_advance() is a convenience wrapper for clockintr_schedule().
It reschedules periodic interrupts without drift.

The manpage update is a work in progress.

> I'm afraid we could wait 200ms now?  Or what `count' of 2 mean?

No.  roundrobin() is still scheduled to run every 100ms.  The code
change ensures we properly account for situations where roundrobin()
is so late that two or more roundrobin periods have elapsed:

> @@ -69,21 +68,23 @@ uint32_t          decay_aftersleep(uint32_t, uin
>   * Force switch among equal priority processes every 100ms.
>   */
>  void
> -roundrobin(struct cpu_info *ci)
> +roundrobin(struct clockintr *cl, void *cf)
>  {
> +     struct cpu_info *ci = curcpu();
>       struct schedstate_percpu *spc = &ci->ci_schedstate;
> +     uint64_t count;
>  
> -     spc->spc_rrticks = rrticks_init;
> +     count = clockintr_advance(cl, roundrobin_period);
>  
>       if (ci->ci_curproc != NULL) {
> -             if (spc->spc_schedflags & SPCF_SEENRR) {
> +             if (spc->spc_schedflags & SPCF_SEENRR || count >= 2) {
>                       /*
>                        * The process has already been through a roundrobin
>                        * without switching and may be hogging the CPU.
>                        * Indicate that the process should yield.
>                        */
>                       atomic_setbits_int(&spc->spc_schedflags,
> -                         SPCF_SHOULDYIELD);
> +                         SPCF_SEENRR | SPCF_SHOULDYIELD);
>               } else {
>                       atomic_setbits_int(&spc->spc_schedflags,
>                           SPCF_SEENRR);

In such a situation, we want to set both SPCF_SEENRR and
SPCF_SHOULDYIELD on the thread.  This simulates what would have
happened under normal circumstances, i.e. the thread would have
been interrupted by roundrobin() two separate times.

> Same question for clockintr_stagger().

clockintr_stagger() adjusts the starting offset for the given clock
interrupt.  We use it to keep identical clock interrupts from expiring
simultaneously across every CPU in the system.

> Can we get rid of `hardclock_period' and use a variable set to 100ms?
> This should be tested on alpha which has a hz of 1024 but I'd argue this
> is an improvement.

Sure, that's cleaner.  The updated patch below adds a new
"roundrobin_period" variable initialized during clockintr_init().

Index: kern/sched_bsd.c
===================================================================
RCS file: /cvs/src/sys/kern/sched_bsd.c,v
retrieving revision 1.79
diff -u -p -r1.79 sched_bsd.c
--- kern/sched_bsd.c    5 Aug 2023 20:07:55 -0000       1.79
+++ kern/sched_bsd.c    10 Aug 2023 17:15:53 -0000
@@ -54,9 +54,8 @@
 #include <sys/ktrace.h>
 #endif
 
-
+uint32_t roundrobin_period;    /* [I] roundrobin period (ns) */
 int    lbolt;                  /* once a second sleep address */
-int    rrticks_init;           /* # of hardclock ticks per roundrobin() */
 
 #ifdef MULTIPROCESSOR
 struct __mp_lock sched_lock;
@@ -69,21 +68,23 @@ uint32_t            decay_aftersleep(uint32_t, uin
  * Force switch among equal priority processes every 100ms.
  */
 void
-roundrobin(struct cpu_info *ci)
+roundrobin(struct clockintr *cl, void *cf)
 {
+       struct cpu_info *ci = curcpu();
        struct schedstate_percpu *spc = &ci->ci_schedstate;
+       uint64_t count;
 
-       spc->spc_rrticks = rrticks_init;
+       count = clockintr_advance(cl, roundrobin_period);
 
        if (ci->ci_curproc != NULL) {
-               if (spc->spc_schedflags & SPCF_SEENRR) {
+               if (spc->spc_schedflags & SPCF_SEENRR || count >= 2) {
                        /*
                         * The process has already been through a roundrobin
                         * without switching and may be hogging the CPU.
                         * Indicate that the process should yield.
                         */
                        atomic_setbits_int(&spc->spc_schedflags,
-                           SPCF_SHOULDYIELD);
+                           SPCF_SEENRR | SPCF_SHOULDYIELD);
                } else {
                        atomic_setbits_int(&spc->spc_schedflags,
                            SPCF_SEENRR);
@@ -695,8 +696,6 @@ scheduler_start(void)
         * its job.
         */
        timeout_set(&schedcpu_to, schedcpu, &schedcpu_to);
-
-       rrticks_init = hz / 10;
        schedcpu(&schedcpu_to);
 
 #ifndef SMALL_KERNEL
Index: kern/kern_sched.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sched.c,v
retrieving revision 1.84
diff -u -p -r1.84 kern_sched.c
--- kern/kern_sched.c   5 Aug 2023 20:07:55 -0000       1.84
+++ kern/kern_sched.c   10 Aug 2023 17:15:53 -0000
@@ -102,6 +102,12 @@ sched_init_cpu(struct cpu_info *ci)
                if (spc->spc_profclock == NULL)
                        panic("%s: clockintr_establish profclock", __func__);
        }
+       if (spc->spc_roundrobin == NULL) {
+               spc->spc_roundrobin = clockintr_establish(&ci->ci_queue,
+                   roundrobin);
+               if (spc->spc_roundrobin == NULL)
+                       panic("%s: clockintr_establish roundrobin", __func__);
+       }
 
        kthread_create_deferred(sched_kthreads_create, ci);
 
Index: kern/kern_clockintr.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clockintr.c,v
retrieving revision 1.30
diff -u -p -r1.30 kern_clockintr.c
--- kern/kern_clockintr.c       5 Aug 2023 20:07:55 -0000       1.30
+++ kern/kern_clockintr.c       10 Aug 2023 17:15:53 -0000
@@ -69,6 +69,7 @@ clockintr_init(u_int flags)
 
        KASSERT(hz > 0 && hz <= 1000000000);
        hardclock_period = 1000000000 / hz;
+       roundrobin_period = hardclock_period * 10;
 
        KASSERT(stathz >= 1 && stathz <= 1000000000);
 
@@ -204,6 +205,11 @@ clockintr_cpu_init(const struct intrcloc
                clockintr_stagger(spc->spc_profclock, profclock_period,
                    multiplier, MAXCPUS);
        }
+       if (spc->spc_roundrobin->cl_expiration == 0) {
+               clockintr_stagger(spc->spc_roundrobin, hardclock_period,
+                   multiplier, MAXCPUS);
+       }
+       clockintr_advance(spc->spc_roundrobin, roundrobin_period);
 
        if (reset_cq_intrclock)
                SET(cq->cq_flags, CQ_INTRCLOCK);
Index: kern/kern_clock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clock.c,v
retrieving revision 1.111
diff -u -p -r1.111 kern_clock.c
--- kern/kern_clock.c   5 Aug 2023 20:07:55 -0000       1.111
+++ kern/kern_clock.c   10 Aug 2023 17:15:54 -0000
@@ -113,9 +113,6 @@ hardclock(struct clockframe *frame)
 {
        struct cpu_info *ci = curcpu();
 
-       if (--ci->ci_schedstate.spc_rrticks <= 0)
-               roundrobin(ci);
-
 #if NDT > 0
        DT_ENTER(profile, NULL);
        if (CPU_IS_PRIMARY(ci))
Index: sys/sched.h
===================================================================
RCS file: /cvs/src/sys/sys/sched.h,v
retrieving revision 1.60
diff -u -p -r1.60 sched.h
--- sys/sched.h 5 Aug 2023 20:07:56 -0000       1.60
+++ sys/sched.h 10 Aug 2023 17:15:54 -0000
@@ -105,10 +105,10 @@ struct schedstate_percpu {
        u_int spc_schedticks;           /* ticks for schedclock() */
        u_int64_t spc_cp_time[CPUSTATES]; /* CPU state statistics */
        u_char spc_curpriority;         /* usrpri of curproc */
-       int spc_rrticks;                /* ticks until roundrobin() */
 
        struct clockintr *spc_itimer;   /* [o] itimer_update handle */
        struct clockintr *spc_profclock; /* [o] profclock handle */
+       struct clockintr *spc_roundrobin; /* [o] roundrobin handle */
 
        u_int spc_nrun;                 /* procs on the run queues */
 
@@ -145,16 +145,16 @@ struct cpustats {
 #define NICE_WEIGHT 2                  /* priorities per nice level */
 #define        ESTCPULIM(e) min((e), NICE_WEIGHT * PRIO_MAX - SCHED_PPQ)
 
+extern uint32_t roundrobin_period;
 extern int schedhz;                    /* ideally: 16 */
-extern int rrticks_init;               /* ticks per roundrobin() */
 
 struct proc;
 void schedclock(struct proc *);
-struct cpu_info;
-void roundrobin(struct cpu_info *);
+void roundrobin(struct clockintr *, void *);
 void scheduler_start(void);
 void userret(struct proc *p);
 
+struct cpu_info;
 void sched_init_cpu(struct cpu_info *);
 void sched_idle(void *);
 void sched_exit(struct proc *);

Re: hardclock(9), roundrobin: make roundrobin() an independent clock interrupt

Reply via email to