On Thu, Oct 05, 2023 at 10:07:24PM +0300, Vitaliy Makkoveev wrote:
> On Thu, Oct 05, 2023 at 11:09:52AM -0500, Scott Cheloha wrote:
> > On Thu, Oct 05, 2023 at 12:57:24AM +0200, Alexander Bluhm wrote:
> > > 
> > > This is a first step to unlock TCP syn cache.  The timer function
> > > is independent of the socket code.  That makes it easy to start
> > > there.
> > > 
> > > [...]
> > > 
> > > Still missing:
> > > - [...]
> > > - Run timer without kernel lock.  I am not aware of such a feature.
> > >   There is already some network code that could benefit from that.
> > >   Can we get timer without kernel lock like TASKQ_MPSAFE implements
> > >   it for tasks?
> > 
> > This patch adds a TIMEOUT_MPSAFE flag for use with TIMEOUT_PROC.
> > Softint timeouts are a different story.
> > 
> > To run syn_cache_timer() without the kernel lock you would initialize
> > it like this:
> > 
> >     timeout_set_flags(&sc->sc_timer, syn_cache_timer, sc, KCLOCK_NONE,
> >         TIMEOUT_PROC | TIMEOUT_MPSAFE);
> > 
> > Use with caution, this needs another set of eyes.
> > 
> 
> I don't like that softclock_thread() mixes kernel locked and mpsafe
> timers processing. Could you use separate threads for them?
> 

I reworked your diff for a little. At firts I use separate
softclock_thread_mpsafe() for mpsafe timers. I don't think we need to
bind this thread to the primary CPU.

Also, on uniprocessor machine we don't need dedicated mpsafe timers
processing, so this specific code separated with preprocessor.

Man page is included to this diff.

I'm using this with pipex(4) and PF_ROUTE sockets.

Index: sys/kern/kern_timeout.c
===================================================================
RCS file: /cvs/src/sys/kern/kern_timeout.c,v
retrieving revision 1.95
diff -u -p -r1.95 kern_timeout.c
--- sys/kern/kern_timeout.c     29 Jul 2023 06:52:08 -0000      1.95
+++ sys/kern/kern_timeout.c     6 Oct 2023 10:57:07 -0000
@@ -75,6 +75,9 @@ struct circq timeout_wheel_kc[BUCKETS];       
 struct circq timeout_new;              /* [T] New, unscheduled timeouts */
 struct circq timeout_todo;             /* [T] Due or needs rescheduling */
 struct circq timeout_proc;             /* [T] Due + needs process context */
+#ifdef MULTIPROCESSOR
+struct circq timeout_proc_mpsafe;      /* [T] Process ctx + no kernel lock */
+#endif
 
 time_t timeout_level_width[WHEELCOUNT];        /* [I] Wheel level width 
(seconds) */
 struct timespec tick_ts;               /* [I] Length of a tick (1/hz secs) */
@@ -171,6 +174,9 @@ void softclock_create_thread(void *);
 void softclock_process_kclock_timeout(struct timeout *, int);
 void softclock_process_tick_timeout(struct timeout *, int);
 void softclock_thread(void *);
+#ifdef MULTIPROCESSOR
+void softclock_thread_mpsafe(void *);
+#endif
 void timeout_barrier_timeout(void *);
 uint32_t timeout_bucket(const struct timeout *);
 uint32_t timeout_maskwheel(uint32_t, const struct timespec *);
@@ -228,6 +234,9 @@ timeout_startup(void)
        CIRCQ_INIT(&timeout_new);
        CIRCQ_INIT(&timeout_todo);
        CIRCQ_INIT(&timeout_proc);
+#ifdef MULTIPROCESSOR
+       CIRCQ_INIT(&timeout_proc_mpsafe);
+#endif
        for (b = 0; b < nitems(timeout_wheel); b++)
                CIRCQ_INIT(&timeout_wheel[b]);
        for (b = 0; b < nitems(timeout_wheel_kc); b++)
@@ -261,10 +270,16 @@ void
 timeout_set_flags(struct timeout *to, void (*fn)(void *), void *arg, int 
kclock,
     int flags)
 {
+       KASSERT(!ISSET(flags, ~(TIMEOUT_PROC | TIMEOUT_MPSAFE)));
+
        to->to_func = fn;
        to->to_arg = arg;
        to->to_kclock = kclock;
        to->to_flags = flags | TIMEOUT_INITIALIZED;
+
+       /* For now, only process context timeouts may be marked MP-safe. */
+       if (ISSET(to->to_flags, TIMEOUT_MPSAFE))
+               KASSERT(ISSET(to->to_flags, TIMEOUT_PROC));
 }
 
 void
@@ -659,7 +674,12 @@ softclock_process_kclock_timeout(struct 
        if (!new && timespeccmp(&to->to_abstime, &kc->kc_late, <=))
                tostat.tos_late++;
        if (ISSET(to->to_flags, TIMEOUT_PROC)) {
-               CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
+#ifdef MULTIPROCESSOR
+               if (ISSET(to->to_flags, TIMEOUT_MPSAFE))
+                       CIRCQ_INSERT_TAIL(&timeout_proc_mpsafe, &to->to_list);
+               else
+#endif
+                       CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
                return;
        }
        timeout_run(to);
@@ -681,7 +701,12 @@ softclock_process_tick_timeout(struct ti
        if (!new && delta < 0)
                tostat.tos_late++;
        if (ISSET(to->to_flags, TIMEOUT_PROC)) {
-               CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
+#ifdef MULTIPROCESSOR
+               if (ISSET(to->to_flags, TIMEOUT_MPSAFE))
+                       CIRCQ_INSERT_TAIL(&timeout_proc_mpsafe, &to->to_list);
+               else
+#endif
+                       CIRCQ_INSERT_TAIL(&timeout_proc, &to->to_list);
                return;
        }
        timeout_run(to);
@@ -698,7 +723,7 @@ void
 softclock(void *arg)
 {
        struct timeout *first_new, *to;
-       int needsproc, new;
+       int new;
 
        first_new = NULL;
        new = 0;
@@ -718,11 +743,13 @@ softclock(void *arg)
                        softclock_process_tick_timeout(to, new);
        }
        tostat.tos_softclocks++;
-       needsproc = !CIRCQ_EMPTY(&timeout_proc);
-       mtx_leave(&timeout_mutex);
-
-       if (needsproc)
+       if (!CIRCQ_EMPTY(&timeout_proc))
                wakeup(&timeout_proc);
+#ifdef MULTIPROCESSOR
+       if(!CIRCQ_EMPTY(&timeout_proc_mpsafe))
+               wakeup(&timeout_proc_mpsafe);
+#endif
+       mtx_leave(&timeout_mutex);
 }
 
 void
@@ -730,6 +757,11 @@ softclock_create_thread(void *arg)
 {
        if (kthread_create(softclock_thread, NULL, NULL, "softclock"))
                panic("fork softclock");
+#ifdef MULTIPROCESSOR
+       if (kthread_create(softclock_thread_mpsafe, NULL, NULL,
+           "softclock_mpsafe"))
+               panic("fork softclock_mpsafe");
+#endif
 }
 
 void
@@ -752,22 +784,52 @@ softclock_thread(void *arg)
 
        s = splsoftclock();
        for (;;) {
-               sleep_setup(&timeout_proc, PSWP, "bored");
-               sleep_finish(0, CIRCQ_EMPTY(&timeout_proc));
-
                mtx_enter(&timeout_mutex);
+               if (CIRCQ_EMPTY(&timeout_proc)){
+                       tostat.tos_thread_wakeups++;
+                       msleep_nsec(&timeout_proc, &timeout_mutex, PSWP,
+                           "bored", INFSLP);
+               }
                while (!CIRCQ_EMPTY(&timeout_proc)) {
                        to = timeout_from_circq(CIRCQ_FIRST(&timeout_proc));
                        CIRCQ_REMOVE(&to->to_list);
                        timeout_run(to);
                        tostat.tos_run_thread++;
                }
-               tostat.tos_thread_wakeups++;
                mtx_leave(&timeout_mutex);
        }
        splx(s);
 }
 
+#ifdef MULTIPROCESSOR
+void
+softclock_thread_mpsafe(void *arg)
+{
+       struct circq *elm;
+       struct timeout *to;
+
+       KERNEL_ASSERT_LOCKED();
+       KERNEL_UNLOCK();
+
+       for (;;) {
+               mtx_enter(&timeout_mutex);
+               if(CIRCQ_EMPTY(&timeout_proc_mpsafe)) {
+                       tostat.tos_thread_wakeups++;
+                       msleep_nsec(&timeout_proc_mpsafe, &timeout_mutex,
+                           PSWP, "boredmp", INFSLP);
+               }
+               while (!CIRCQ_EMPTY(&timeout_proc_mpsafe)) {
+                       elm = CIRCQ_FIRST(&timeout_proc_mpsafe);
+                       to = timeout_from_circq(elm);
+                       CIRCQ_REMOVE(&to->to_list);
+                       timeout_run(to);
+                       tostat.tos_run_thread++;
+               }
+               mtx_leave(&timeout_mutex);
+       }
+}
+#endif /* MULTIPROCESSOR*/
+
 #ifndef SMALL_KERNEL
 void
 timeout_adjust_ticks(int adj)
@@ -875,6 +937,10 @@ db_show_timeout(struct timeout *to, stru
                where = "softint";
        else if (bucket == &timeout_proc)
                where = "thread";
+#ifdef MULTIPROCESSOR
+       else if (bucket == &timeout_proc_mpsafe)
+               where = "thread";
+#endif
        else {
                if (to->to_kclock != KCLOCK_NONE)
                        wheel = timeout_wheel_kc;
@@ -918,6 +984,9 @@ db_show_callout(db_expr_t addr, int hadd
        db_show_callout_bucket(&timeout_new);
        db_show_callout_bucket(&timeout_todo);
        db_show_callout_bucket(&timeout_proc);
+#ifdef MULTIPROCESSOR
+       db_show_callout_bucket(&timeout_proc_mpsafe);
+#endif
        for (b = 0; b < nitems(timeout_wheel); b++)
                db_show_callout_bucket(&timeout_wheel[b]);
        for (b = 0; b < nitems(timeout_wheel_kc); b++)
Index: sys/sys/timeout.h
===================================================================
RCS file: /cvs/src/sys/sys/timeout.h,v
retrieving revision 1.47
diff -u -p -r1.47 timeout.h
--- sys/sys/timeout.h   31 Dec 2022 16:06:24 -0000      1.47
+++ sys/sys/timeout.h   6 Oct 2023 10:57:07 -0000
@@ -54,6 +54,7 @@ struct timeout {
 #define TIMEOUT_ONQUEUE                0x02    /* on any timeout queue */
 #define TIMEOUT_INITIALIZED    0x04    /* initialized */
 #define TIMEOUT_TRIGGERED      0x08    /* running or ran */
+#define TIMEOUT_MPSAFE         0x10    /* run without kernel lock */
 
 struct timeoutstat {
        uint64_t tos_added;             /* timeout_add*(9) calls */

Reply via email to