On Fri, Oct 06, 2023 at 02:14:52PM +0200, Alexander Bluhm wrote:
> > @@ -718,11 +743,13 @@ softclock(void *arg)
> > softclock_process_tick_timeout(to, new);
> > }
> > tostat.tos_softclocks++;
> > - needsproc = !CIRCQ_EMPTY(&timeout_proc);
> > - mtx_leave(&timeout_mutex);
> > -
> > - if (needsproc)
> > + if (!CIRCQ_EMPTY(&timeout_proc))
> > wakeup(&timeout_proc);
> > +#ifdef MULTIPROCESSOR
> > + if(!CIRCQ_EMPTY(&timeout_proc_mpsafe))
> > + wakeup(&timeout_proc_mpsafe);
> > +#endif
> > + mtx_leave(&timeout_mutex);
> > }
> >
> > void
>
> Was there a good reason that wakeup() did run without mutex?
> Do we really want to change this?
>
I dont understand you. Original code does wakeup() outside mutex. I
moved wakeup() under mutex. You want to move it back?
Original softclock_thread() uses sleep_setup()/sleep_finish():
softclock_thread(void *arg)
{
/* ... */
for (;;) {
sleep_setup(&timeout_proc, PSWP, "bored");
sleep_finish(0, CIRCQ_EMPTY(&timeout_proc));
mtx_enter(&timeout_mutex);
while (!CIRCQ_EMPTY(&timeout_proc)) {
This is not applicable for mpsafe softclock thread, because it could
already run while new timers added to `timeout_proc_mpsafe' list. The
`timeout_mutex' used to protect timers list, so I used msleep_nsec()
for sleep.
I reworked original softclock_thread() and corresponding wakeup for
consistency reason.
> > @@ -730,6 +757,11 @@ softclock_create_thread(void *arg)
> > {
> > if (kthread_create(softclock_thread, NULL, NULL, "softclock"))
> > panic("fork softclock");
> > +#ifdef MULTIPROCESSOR
> > + if (kthread_create(softclock_thread_mpsafe, NULL, NULL,
> > + "softclock_mpsafe"))
> > + panic("fork softclock_mpsafe");
> > +#endif
> > }
> >
> > void
>
> Could you make the visible name of the thread shorter? e.g.
> "softclock_mpsafe" -> "softclockmp", it should fit in ps and top.
>
Sure. I don't like long *_proc_mpsafe and *_thread_mpsafe names. Better
to use `timeoutmp_proc' and `softclockmp_thread'.
Index: share/man/man9/timeout.9
===================================================================
RCS file: /cvs/src/share/man/man9/timeout.9,v
retrieving revision 1.56
diff -u -p -r1.56 timeout.9
--- share/man/man9/timeout.9 1 Jan 2023 01:19:18 -0000 1.56
+++ share/man/man9/timeout.9 6 Oct 2023 12:43:53 -0000
@@ -193,11 +193,16 @@ Counts the time elapsed since the system
The timeout's behavior may be configured with the bitwise OR of
zero or more of the following
.Fa flags :
-.Bl -tag -width TIMEOUT_PROC
+.Bl -tag -width TIMEOUT_MPSAFE
.It Dv TIMEOUT_PROC
Execute the timeout in a process context instead of the default
.Dv IPL_SOFTCLOCK
interrupt context.
+.It Dv TIMEOUT_MPSAFE
+Execute the timeout without the kernel lock.
+Requires the
+.Dv TIMEOUT_PROC
+flag.
.El
.El
.Pp
@@ -367,8 +372,9 @@ The function
.Fa fn
must not block and must be safe to execute on any CPU in the system.
.Pp
-Currently,
-all timeouts are executed under the kernel lock.
+Timeouts without the
+.Dv TIMEOUT_MPSAFE
+flag are executed under the kernel lock.
.Sh RETURN VALUES
.Fn timeout_add ,
.Fn timeout_add_sec ,
Index: sys/kern/kern_timeout.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_timeout.c,v
retrieving revision 1.95
diff -u -p -r1.95 kern_timeout.c
--- sys/kern/kern_timeout.c 29 Jul 2023 06:52:08 -0000 1.95
+++ sys/kern/kern_timeout.c 6 Oct 2023 12:43:54 -0000
@@ -75,6 +75,9 @@ struct circq timeout_wheel_kc[BUCKETS];
struct circq timeout_new; /* [T] New, unscheduled timeouts */
struct circq timeout_todo; /* [T] Due or needs rescheduling */
struct circq timeout_proc; /* [T] Due + needs process context */
+#ifdef MULTIPROCESSOR
+struct circq timeoutmp_proc; /* [T] Process ctx + no kernel lock */
+#endif
time_t timeout_level_width[WHEELCOUNT]; /* [I] Wheel level width
(seconds) */
struct timespec tick_ts; /* [I] Length of a tick (1/hz secs) */
@@ -171,6 +174,9 @@ void softclock_create_thread(void *);
void softclock_process_kclock_timeout(struct timeout *, int);
void softclock_process_tick_timeout(struct timeout *, int);
void softclock_thread(void *);
+#ifdef MULTIPROCESSOR
+void softclockmp_thread(void *);
+#endif
void timeout_barrier_timeout(void *);
uint32_t timeout_bucket(const struct timeout *);
uint32_t timeout_maskwheel(uint32_t, const struct timespec *);
@@ -228,6 +234,9 @@ timeout_startup(void)
CIRCQ_INIT(&timeout_new);
CIRCQ_INIT(&timeout_todo);
CIRCQ_INIT(&timeout_proc);
+#ifdef MULTIPROCESSOR
+ CIRCQ_INIT(&timeoutmp_proc);
+#endif
for (b = 0; b < nitems(timeout_wheel); b++)
CIRCQ_INIT(&timeout_wheel[b]);
for (b = 0; b < nitems(timeout_wheel_kc); b++)
@@ -261,10 +270,16 @@ void
timeout_set_flags(struct timeout *to, void (*fn)(void *), void *arg, int
kclock,
int flags)
{
+ KASSERT(!ISSET(flags, ~(TIMEOUT_PROC | TIMEOUT_MPSAFE)));
+
to->to_func = fn;
to->to_arg = arg;
to->to_kclock = kclock;
to->to_flags = flags | TIMEOUT_INITIALIZED;
+
+ /* For now, only process context timeouts may be marked MP-safe. */
+ if (ISSET(to->to_flags, TIMEOUT_MPSAFE))
+ KASSERT(ISSET(to->to_flags, TIMEOUT_PROC));
}
void
@@ -659,7 +674,12 @@ softclock_process_kclock_timeout(struct
if (!new && timespeccmp(&to->to_abstime, &kc->kc_late, <=))
tostat.tos_late++;
if (ISSET(to->to_flags, TIMEOUT_PROC)) {
- CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
+#ifdef MULTIPROCESSOR
+ if (ISSET(to->to_flags, TIMEOUT_MPSAFE))
+ CIRCQ_INSERT_TAIL(&timeoutmp_proc, &to->to_list);
+ else
+#endif
+ CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
return;
}
timeout_run(to);
@@ -681,7 +701,12 @@ softclock_process_tick_timeout(struct ti
if (!new && delta < 0)
tostat.tos_late++;
if (ISSET(to->to_flags, TIMEOUT_PROC)) {
- CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
+#ifdef MULTIPROCESSOR
+ if (ISSET(to->to_flags, TIMEOUT_MPSAFE))
+ CIRCQ_INSERT_TAIL(&timeoutmp_proc, &to->to_list);
+ else
+#endif
+ CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
return;
}
timeout_run(to);
@@ -698,7 +723,7 @@ void
softclock(void *arg)
{
struct timeout *first_new, *to;
- int needsproc, new;
+ int new;
first_new = NULL;
new = 0;
@@ -718,11 +743,13 @@ softclock(void *arg)
softclock_process_tick_timeout(to, new);
}
tostat.tos_softclocks++;
- needsproc = !CIRCQ_EMPTY(&timeout_proc);
- mtx_leave(&timeout_mutex);
-
- if (needsproc)
+ if (!CIRCQ_EMPTY(&timeout_proc))
wakeup(&timeout_proc);
+#ifdef MULTIPROCESSOR
+ if(!CIRCQ_EMPTY(&timeoutmp_proc))
+ wakeup(&timeoutmp_proc);
+#endif
+ mtx_leave(&timeout_mutex);
}
void
@@ -730,6 +757,10 @@ softclock_create_thread(void *arg)
{
if (kthread_create(softclock_thread, NULL, NULL, "softclock"))
panic("fork softclock");
+#ifdef MULTIPROCESSOR
+ if (kthread_create(softclockmp_thread, NULL, NULL, "softclockm"))
+ panic("fork softclock_mpsafe");
+#endif
}
void
@@ -752,22 +783,52 @@ softclock_thread(void *arg)
s = splsoftclock();
for (;;) {
- sleep_setup(&timeout_proc, PSWP, "bored");
- sleep_finish(0, CIRCQ_EMPTY(&timeout_proc));
-
mtx_enter(&timeout_mutex);
+ if (CIRCQ_EMPTY(&timeout_proc)){
+ tostat.tos_thread_wakeups++;
+ msleep_nsec(&timeout_proc, &timeout_mutex, PSWP,
+ "bored", INFSLP);
+ }
while (!CIRCQ_EMPTY(&timeout_proc)) {
to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc));
CIRCQ_REMOVE(&to->to_list);
timeout_run(to);
tostat.tos_run_thread++;
}
- tostat.tos_thread_wakeups++;
mtx_leave(&timeout_mutex);
}
splx(s);
}
+#ifdef MULTIPROCESSOR
+void
+softclockmp_thread(void *arg)
+{
+ struct circq *elm;
+ struct timeout *to;
+
+ KERNEL_ASSERT_LOCKED();
+ KERNEL_UNLOCK();
+
+ for (;;) {
+ mtx_enter(&timeout_mutex);
+ if(CIRCQ_EMPTY(&timeoutmp_proc)) {
+ tostat.tos_thread_wakeups++;
+ msleep_nsec(&timeoutmp_proc, &timeout_mutex,
+ PSWP, "boredmp", INFSLP);
+ }
+ while (!CIRCQ_EMPTY(&timeoutmp_proc)) {
+ elm = CIRCQ_FIRST(&timeoutmp_proc);
+ to = timeout_from_circq(elm);
+ CIRCQ_REMOVE(&to->to_list);
+ timeout_run(to);
+ tostat.tos_run_thread++;
+ }
+ mtx_leave(&timeout_mutex);
+ }
+}
+#endif /* MULTIPROCESSOR*/
+
#ifndef SMALL_KERNEL
void
timeout_adjust_ticks(int adj)
@@ -875,6 +936,10 @@ db_show_timeout(struct timeout *to, stru
where = "softint";
else if (bucket == &timeout_proc)
where = "thread";
+#ifdef MULTIPROCESSOR
+ else if (bucket == &timeoutmp_proc)
+ where = "thread";
+#endif
else {
if (to->to_kclock != KCLOCK_NONE)
wheel = timeout_wheel_kc;
@@ -918,6 +983,9 @@ db_show_callout(db_expr_t addr, int hadd
db_show_callout_bucket(&timeout_new);
db_show_callout_bucket(&timeout_todo);
db_show_callout_bucket(&timeout_proc);
+#ifdef MULTIPROCESSOR
+ db_show_callout_bucket(&timeoutmp_proc);
+#endif
for (b = 0; b < nitems(timeout_wheel); b++)
db_show_callout_bucket(&timeout_wheel[b]);
for (b = 0; b < nitems(timeout_wheel_kc); b++)
Index: sys/sys/timeout.h
===================================================================
RCS file: /cvs/src/sys/sys/timeout.h,v
retrieving revision 1.47
diff -u -p -r1.47 timeout.h
--- sys/sys/timeout.h 31 Dec 2022 16:06:24 -0000 1.47
+++ sys/sys/timeout.h 6 Oct 2023 12:43:54 -0000
@@ -54,6 +54,7 @@ struct timeout {
#define TIMEOUT_ONQUEUE 0x02 /* on any timeout queue */
#define TIMEOUT_INITIALIZED 0x04 /* initialized */
#define TIMEOUT_TRIGGERED 0x08 /* running or ran */
+#define TIMEOUT_MPSAFE 0x10 /* run without kernel lock */
struct timeoutstat {
uint64_t tos_added; /* timeout_add*(9) calls */