On Thu, Aug 10, 2023 at 01:05:27PM +0200, Martin Pieuchot wrote:
> On 05/08/23(Sat) 17:17, Scott Cheloha wrote:
> > This is the next piece of the clock interrupt reorganization patch
> > series.
>
> The round robin logic is here to make sure process doesn't hog a CPU.
> The period to tell a process it should yield doesn't have to be tied
> to the hardclock period. We want to be sure a process doesn't run more
> than 100ms at a time.
> Is the priority of this new clock interrupt the same as the hardlock?
Yes. Clock interrupts on a given CPU are dispatched in order of
expiration. If two clock interrupts on the same CPU have the same
expiration value they are dispatched in FIFO order.
> I don't understand what clockintr_advance() is doing. Maybe you could
> write a manual for it?
clockintr_advance() is a convenience wrapper for clockintr_schedule().
It reschedules periodic interrupts without drift.
The manpage update is a work in progress.
> I'm afraid we could wait 200ms now? Or what `count' of 2 mean?
No. roundrobin() is still scheduled to run every 100ms. The code
change ensures we properly account for situations where roundrobin()
is so late that two or more roundrobin periods have elapsed:
> @@ -69,21 +68,23 @@ uint32_t decay_aftersleep(uint32_t, uin
> * Force switch among equal priority processes every 100ms.
> */
> void
> -roundrobin(struct cpu_info *ci)
> +roundrobin(struct clockintr *cl, void *cf)
> {
> + struct cpu_info *ci = curcpu();
> struct schedstate_percpu *spc = &ci->ci_schedstate;
> + uint64_t count;
>
> - spc->spc_rrticks = rrticks_init;
> + count = clockintr_advance(cl, roundrobin_period);
>
> if (ci->ci_curproc != NULL) {
> - if (spc->spc_schedflags & SPCF_SEENRR) {
> + if (spc->spc_schedflags & SPCF_SEENRR || count >= 2) {
> /*
> * The process has already been through a roundrobin
> * without switching and may be hogging the CPU.
> * Indicate that the process should yield.
> */
> atomic_setbits_int(&spc->spc_schedflags,
> - SPCF_SHOULDYIELD);
> + SPCF_SEENRR | SPCF_SHOULDYIELD);
> } else {
> atomic_setbits_int(&spc->spc_schedflags,
> SPCF_SEENRR);
In such a situation, we want to set both SPCF_SEENRR and
SPCF_SHOULDYIELD on the thread. This simulates what would have
happened under normal circumstances, i.e. the thread would have
been interrupted by roundrobin() two separate times.
> Same question for clockintr_stagger().
clockintr_stagger() adjusts the starting offset for the given clock
interrupt. We use it to keep identical clock interrupts from expiring
simultaneously across every CPU in the system.
> Can we get rid of `hardclock_period' and use a variable set to 100ms?
> This should be tested on alpha which has a hz of 1024 but I'd argue this
> is an improvement.
Sure, that's cleaner. The updated patch below adds a new
"roundrobin_period" variable initialized during clockintr_init().
Index: kern/sched_bsd.c
===================================================================
RCS file: /cvs/src/sys/kern/sched_bsd.c,v
retrieving revision 1.79
diff -u -p -r1.79 sched_bsd.c
--- kern/sched_bsd.c 5 Aug 2023 20:07:55 -0000 1.79
+++ kern/sched_bsd.c 10 Aug 2023 17:15:53 -0000
@@ -54,9 +54,8 @@
#include <sys/ktrace.h>
#endif
-
+uint32_t roundrobin_period; /* [I] roundrobin period (ns) */
int lbolt; /* once a second sleep address */
-int rrticks_init; /* # of hardclock ticks per roundrobin() */
#ifdef MULTIPROCESSOR
struct __mp_lock sched_lock;
@@ -69,21 +68,23 @@ uint32_t decay_aftersleep(uint32_t, uin
* Force switch among equal priority processes every 100ms.
*/
void
-roundrobin(struct cpu_info *ci)
+roundrobin(struct clockintr *cl, void *cf)
{
+ struct cpu_info *ci = curcpu();
struct schedstate_percpu *spc = &ci->ci_schedstate;
+ uint64_t count;
- spc->spc_rrticks = rrticks_init;
+ count = clockintr_advance(cl, roundrobin_period);
if (ci->ci_curproc != NULL) {
- if (spc->spc_schedflags & SPCF_SEENRR) {
+ if (spc->spc_schedflags & SPCF_SEENRR || count >= 2) {
/*
* The process has already been through a roundrobin
* without switching and may be hogging the CPU.
* Indicate that the process should yield.
*/
atomic_setbits_int(&spc->spc_schedflags,
- SPCF_SHOULDYIELD);
+ SPCF_SEENRR | SPCF_SHOULDYIELD);
} else {
atomic_setbits_int(&spc->spc_schedflags,
SPCF_SEENRR);
@@ -695,8 +696,6 @@ scheduler_start(void)
* its job.
*/
timeout_set(&schedcpu_to, schedcpu, &schedcpu_to);
-
- rrticks_init = hz / 10;
schedcpu(&schedcpu_to);
#ifndef SMALL_KERNEL
Index: kern/kern_sched.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_sched.c,v
retrieving revision 1.84
diff -u -p -r1.84 kern_sched.c
--- kern/kern_sched.c 5 Aug 2023 20:07:55 -0000 1.84
+++ kern/kern_sched.c 10 Aug 2023 17:15:53 -0000
@@ -102,6 +102,12 @@ sched_init_cpu(struct cpu_info *ci)
if (spc->spc_profclock == NULL)
panic("%s: clockintr_establish profclock", __func__);
}
+ if (spc->spc_roundrobin == NULL) {
+ spc->spc_roundrobin = clockintr_establish(&ci->ci_queue,
+ roundrobin);
+ if (spc->spc_roundrobin == NULL)
+ panic("%s: clockintr_establish roundrobin", __func__);
+ }
kthread_create_deferred(sched_kthreads_create, ci);
Index: kern/kern_clockintr.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clockintr.c,v
retrieving revision 1.30
diff -u -p -r1.30 kern_clockintr.c
--- kern/kern_clockintr.c 5 Aug 2023 20:07:55 -0000 1.30
+++ kern/kern_clockintr.c 10 Aug 2023 17:15:53 -0000
@@ -69,6 +69,7 @@ clockintr_init(u_int flags)
KASSERT(hz > 0 && hz <= 1000000000);
hardclock_period = 1000000000 / hz;
+ roundrobin_period = hardclock_period * 10;
KASSERT(stathz >= 1 && stathz <= 1000000000);
@@ -204,6 +205,11 @@ clockintr_cpu_init(const struct intrcloc
clockintr_stagger(spc->spc_profclock, profclock_period,
multiplier, MAXCPUS);
}
+ if (spc->spc_roundrobin->cl_expiration == 0) {
+ clockintr_stagger(spc->spc_roundrobin, hardclock_period,
+ multiplier, MAXCPUS);
+ }
+ clockintr_advance(spc->spc_roundrobin, roundrobin_period);
if (reset_cq_intrclock)
SET(cq->cq_flags, CQ_INTRCLOCK);
Index: kern/kern_clock.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_clock.c,v
retrieving revision 1.111
diff -u -p -r1.111 kern_clock.c
--- kern/kern_clock.c 5 Aug 2023 20:07:55 -0000 1.111
+++ kern/kern_clock.c 10 Aug 2023 17:15:54 -0000
@@ -113,9 +113,6 @@ hardclock(struct clockframe *frame)
{
struct cpu_info *ci = curcpu();
- if (--ci->ci_schedstate.spc_rrticks <= 0)
- roundrobin(ci);
-
#if NDT > 0
DT_ENTER(profile, NULL);
if (CPU_IS_PRIMARY(ci))
Index: sys/sched.h
===================================================================
RCS file: /cvs/src/sys/sys/sched.h,v
retrieving revision 1.60
diff -u -p -r1.60 sched.h
--- sys/sched.h 5 Aug 2023 20:07:56 -0000 1.60
+++ sys/sched.h 10 Aug 2023 17:15:54 -0000
@@ -105,10 +105,10 @@ struct schedstate_percpu {
u_int spc_schedticks; /* ticks for schedclock() */
u_int64_t spc_cp_time[CPUSTATES]; /* CPU state statistics */
u_char spc_curpriority; /* usrpri of curproc */
- int spc_rrticks; /* ticks until roundrobin() */
struct clockintr *spc_itimer; /* [o] itimer_update handle */
struct clockintr *spc_profclock; /* [o] profclock handle */
+ struct clockintr *spc_roundrobin; /* [o] roundrobin handle */
u_int spc_nrun; /* procs on the run queues */
@@ -145,16 +145,16 @@ struct cpustats {
#define NICE_WEIGHT 2 /* priorities per nice level */
#define ESTCPULIM(e) min((e), NICE_WEIGHT * PRIO_MAX - SCHED_PPQ)
+extern uint32_t roundrobin_period;
extern int schedhz; /* ideally: 16 */
-extern int rrticks_init; /* ticks per roundrobin() */
struct proc;
void schedclock(struct proc *);
-struct cpu_info;
-void roundrobin(struct cpu_info *);
+void roundrobin(struct clockintr *, void *);
void scheduler_start(void);
void userret(struct proc *p);
+struct cpu_info;
void sched_init_cpu(struct cpu_info *);
void sched_idle(void *);
void sched_exit(struct proc *);