TCP timers end up calling ip_output(), which will soon require that the
caller holds a rwlock.

In order to do that, I diff below changes 'struct tcptimer' to become a
tuple of (timeout, task).  When a timeout fires, the corresponding task
gets scheduled.

I'm currently using the ``softnettq'' to schedule everything related to
the network.  This is the safest approach, because concurrent tasks are
still serialized.

I extended the G/C timeout to 1sec, because there's no reference count
for 'struct tcpcb'.  And the only race possible is described in the
comment below:

        /*
         * If one of the timer tasks is already running, it must
         * be spinning on the KERNEL_LOCK().  So schedule a G/C
         * in one second, when we known the task will have released
         * its reference.
         */

Since the first thing task are doing is checking for the ``TF_DEAD''
flag this should be enough.

Comments, oks?

Index: netinet/tcp_timer.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_timer.c,v
retrieving revision 1.49
diff -u -p -r1.49 tcp_timer.c
--- netinet/tcp_timer.c 7 Mar 2016 18:44:00 -0000       1.49
+++ netinet/tcp_timer.c 12 Sep 2016 11:42:16 -0000
@@ -71,6 +71,11 @@ void tcp_timer_persist(void *);
 void   tcp_timer_keep(void *);
 void   tcp_timer_2msl(void *);
 
+void   tcp_timer_rexmt_task(void *);
+void   tcp_timer_persist_task(void *);
+void   tcp_timer_keep_task(void *);
+void   tcp_timer_2msl_task(void *);
+
 const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS] = {
        tcp_timer_rexmt,
        tcp_timer_persist,
@@ -78,6 +83,13 @@ const tcp_timer_func_t tcp_timer_funcs[T
        tcp_timer_2msl,
 };
 
+const tcp_timer_func_t tcp_timer_tasks[TCPT_NTIMERS] = {
+       tcp_timer_rexmt_task,
+       tcp_timer_persist_task,
+       tcp_timer_keep_task,
+       tcp_timer_2msl_task,
+};
+
 /*
  * Timer state initialization, called from tcp_init().
  */
@@ -105,6 +117,14 @@ void
 tcp_delack(void *arg)
 {
        struct tcpcb *tp = arg;
+
+       task_add(softnettq, &tp->t_delack_to_task);
+}
+
+void
+tcp_delack_task(void *arg)
+{
+       struct tcpcb *tp = arg;
        int s;
 
        /*
@@ -112,15 +132,15 @@ tcp_delack(void *arg)
         * for whatever reason, it will restart the delayed
         * ACK callout.
         */
-
+       KERNEL_LOCK();
        s = splsoftnet();
-       if (tp->t_flags & TF_DEAD) {
-               splx(s);
-               return;
-       }
+       if (tp->t_flags & TF_DEAD)
+               goto out;
        tp->t_flags |= TF_ACKNOW;
        (void) tcp_output(tp);
+ out:
        splx(s);
+       KERNEL_UNLOCK();
 }
 
 /*
@@ -144,8 +164,7 @@ tcp_slowtimo(void)
  * Cancel all timers for TCP tp.
  */
 void
-tcp_canceltimers(tp)
-       struct tcpcb *tp;
+tcp_canceltimers(struct tcpcb *tp)
 {
        int i;
 
@@ -153,6 +172,15 @@ tcp_canceltimers(tp)
                TCP_TIMER_DISARM(tp, i);
 }
 
+void
+tcp_destroytimers(struct tcpcb *tp)
+{
+       int i;
+
+       for (i = 0; i < TCPT_NTIMERS; i++)
+               TCP_TIMER_DESTROY(tp, i);
+}
+
 int    tcp_backoff[TCP_MAXRXTSHIFT + 1] =
     { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
 
@@ -191,14 +219,21 @@ void
 tcp_timer_rexmt(void *arg)
 {
        struct tcpcb *tp = arg;
+
+       task_add(softnettq, &tp->t_timer[TCPT_REXMT].tcpt_task);
+}
+
+void
+tcp_timer_rexmt_task(void *arg)
+{
+       struct tcpcb *tp = arg;
        uint32_t rto;
        int s;
 
+       KERNEL_LOCK();
        s = splsoftnet();
-       if (tp->t_flags & TF_DEAD) {
-               splx(s);
-               return;
-       }
+       if (tp->t_flags & TF_DEAD)
+               goto out;
 
        if ((tp->t_flags & TF_PMTUD_PEND) && tp->t_inpcb &&
            SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) &&
@@ -225,8 +260,7 @@ tcp_timer_rexmt(void *arg)
                sin.sin_addr = tp->t_inpcb->inp_faddr;
                in_pcbnotifyall(&tcbtable, sintosa(&sin),
                    tp->t_inpcb->inp_rtableid, EMSGSIZE, tcp_mtudisc);
-               splx(s);
-               return;
+               goto out;
        }
 
 #ifdef TCP_SACK
@@ -378,20 +412,29 @@ tcp_timer_rexmt(void *arg)
 
  out:
        splx(s);
+       KERNEL_UNLOCK();
 }
 
 void
 tcp_timer_persist(void *arg)
 {
        struct tcpcb *tp = arg;
+
+       task_add(softnettq, &tp->t_timer[TCPT_PERSIST].tcpt_task);
+}
+
+void
+tcp_timer_persist_task(void *arg)
+{
+       struct tcpcb *tp = arg;
        uint32_t rto;
        int s;
 
+       KERNEL_LOCK();
        s = splsoftnet();
        if ((tp->t_flags & TF_DEAD) ||
             TCP_TIMER_ISARMED(tp, TCPT_REXMT)) {
-               splx(s);
-               return;
+               goto out;
        }
        tcpstat.tcps_persisttimeo++;
        /*
@@ -417,19 +460,27 @@ tcp_timer_persist(void *arg)
        tp->t_force = 0;
  out:
        splx(s);
+       KERNEL_UNLOCK();
 }
 
 void
 tcp_timer_keep(void *arg)
 {
        struct tcpcb *tp = arg;
+
+       task_add(softnettq, &tp->t_timer[TCPT_KEEP].tcpt_task);
+}
+
+void
+tcp_timer_keep_task(void *arg)
+{
+       struct tcpcb *tp = arg;
        int s;
 
+       KERNEL_LOCK();
        s = splsoftnet();
-       if (tp->t_flags & TF_DEAD) {
-               splx(s);
-               return;
-       }
+       if (tp->t_flags & TF_DEAD)
+               goto out;
 
        tcpstat.tcps_keeptimeo++;
        if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
@@ -458,8 +509,9 @@ tcp_timer_keep(void *arg)
                TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl);
        } else
                TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
-
+ out:
        splx(s);
+       KERNEL_UNLOCK();
        return;
 
  dropit:
@@ -467,19 +519,27 @@ tcp_timer_keep(void *arg)
        tp = tcp_drop(tp, ETIMEDOUT);
 
        splx(s);
+       KERNEL_UNLOCK();
 }
 
 void
 tcp_timer_2msl(void *arg)
 {
        struct tcpcb *tp = arg;
+
+       task_add(softnettq, &tp->t_timer[TCPT_2MSL].tcpt_task);
+}
+
+void
+tcp_timer_2msl_task(void *arg)
+{
+       struct tcpcb *tp = arg;
        int s;
 
+       KERNEL_LOCK();
        s = splsoftnet();
-       if (tp->t_flags & TF_DEAD) {
-               splx(s);
-               return;
-       }
+       if (tp->t_flags & TF_DEAD)
+               goto out;
 
 #ifdef TCP_SACK
        tcp_timer_freesack(tp);
@@ -491,5 +551,7 @@ tcp_timer_2msl(void *arg)
        else
                tp = tcp_close(tp);
 
+ out:
        splx(s);
+       KERNEL_UNLOCK();
 }
Index: netinet/tcp_subr.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_subr.c,v
retrieving revision 1.154
diff -u -p -r1.154 tcp_subr.c
--- netinet/tcp_subr.c  6 Sep 2016 00:04:15 -0000       1.154
+++ netinet/tcp_subr.c  12 Sep 2016 11:41:30 -0000
@@ -526,6 +526,7 @@ tcp_close(struct tcpcb *tp)
        tcp_freeq(tp);
 
        tcp_canceltimers(tp);
+       tcp_destroytimers(tp);
        TCP_CLEAR_DELACK(tp);
        syn_cache_cleanup(tp);
 
@@ -542,7 +543,14 @@ tcp_close(struct tcpcb *tp)
                (void) m_free(tp->t_template);
 
        tp->t_flags |= TF_DEAD;
-       timeout_add(&tp->t_reap_to, 0);
+       /*
+        * If one of the timer tasks is already running, it must
+        * be spinning on the KERNEL_LOCK().  So schedule a G/C
+        * in one second, when we known the task will have released
+        * its reference.
+        */
+       KERNEL_ASSERT_LOCKED();
+       timeout_add_sec(&tp->t_reap_to, 1);
 
        inp->inp_ppcb = 0;
        soisdisconnected(so);
Index: netinet/tcp_timer.h
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_timer.h,v
retrieving revision 1.13
diff -u -p -r1.13 tcp_timer.h
--- netinet/tcp_timer.h 6 Jul 2011 23:44:20 -0000       1.13
+++ netinet/tcp_timer.h 12 Sep 2016 11:43:35 -0000
@@ -115,18 +115,25 @@ const char *tcptimers[] =
 /*
  * Init, arm, disarm, and test TCP timers.
  */
-#define        TCP_TIMER_INIT(tp, timer)                                       
\
-       timeout_set(&(tp)->t_timer[(timer)], tcp_timer_funcs[(timer)], tp)
+#define        TCP_TIMER_INIT(tp, timer) do {                                  
\
+       timeout_set(&(tp)->t_timer[(timer)].tcpt_timeout,               \
+           tcp_timer_funcs[(timer)], tp);                              \
+       task_set(&(tp)->t_timer[(timer)].tcpt_task,                     \
+           tcp_timer_tasks[(timer)], tp);                              \
+} while(0)
 
 #define        TCP_TIMER_ARM(tp, timer, nticks)                                
\
-       timeout_add(&(tp)->t_timer[(timer)], (nticks) * (hz / PR_SLOWHZ))
+       timeout_add(&(tp)->t_timer[(timer)].tcpt_timeout,               \
+           (nticks) * (hz / PR_SLOWHZ))
 
 #define        TCP_TIMER_DISARM(tp, timer)                                     
\
-       timeout_del(&(tp)->t_timer[(timer)])
+       timeout_del(&(tp)->t_timer[(timer)].tcpt_timeout)
 
 #define        TCP_TIMER_ISARMED(tp, timer)                                    
\
-       timeout_pending(&(tp)->t_timer[(timer)])
+       timeout_pending(&(tp)->t_timer[(timer)].tcpt_timeout)
 
+#define        TCP_TIMER_DESTROY(tp, timer)                                    
\
+       task_del(softnettq, &(tp)->t_timer[(timer)].tcpt_task)
 /*
  * Force a time value to be in a certain range.
  */
@@ -143,6 +150,7 @@ do {                                                        
                \
 typedef void (*tcp_timer_func_t)(void *);
 
 extern const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS];
+extern const tcp_timer_func_t tcp_timer_tasks[TCPT_NTIMERS];
 
 extern int tcptv_keep_init;
 extern int tcp_always_keepalive;       /* assume SO_KEEPALIVE is always set */
Index: netinet/tcp_var.h
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_var.h,v
retrieving revision 1.115
diff -u -p -r1.115 tcp_var.h
--- netinet/tcp_var.h   20 Jul 2016 19:57:53 -0000      1.115
+++ netinet/tcp_var.h   12 Sep 2016 11:42:38 -0000
@@ -64,12 +64,17 @@ struct tcpqent {
        struct mbuf     *tcpqe_m;       /* mbuf contains packet */
 };
 
+struct tcptimer {
+       struct timeout  tcpt_timeout;
+       struct task     tcpt_task;
+};
+
 /*
  * Tcp control block, one per tcp; fields:
  */
 struct tcpcb {
        struct tcpqehead t_segq;                /* sequencing queue */
-       struct timeout t_timer[TCPT_NTIMERS];   /* tcp timers */
+       struct tcptimer t_timer[TCPT_NTIMERS];  /* tcp timers */
        short   t_state;                /* state of this connection */
        short   t_rxtshift;             /* log(2) of rexmt exp. backoff */
        short   t_rxtcur;               /* current retransmit value */
@@ -104,6 +109,7 @@ struct tcpcb {
        struct  mbuf *t_template;       /* skeletal packet for transmit */
        struct  inpcb *t_inpcb;         /* back pointer to internet pcb */
        struct  timeout t_delack_to;    /* delayed ACK callback */
+       struct  task t_delack_to_task;  /* task for the ACK callback */
 /*
  * The following fields are used as in the protocol specification.
  * See RFC793, Dec. 1981, page 21.
@@ -215,9 +221,12 @@ struct tcpcb {
 #ifdef _KERNEL
 extern int tcp_delack_ticks;
 void   tcp_delack(void *);
+void   tcp_delack_task(void *);
 
-#define TCP_INIT_DELACK(tp)                                            \
-       timeout_set(&(tp)->t_delack_to, tcp_delack, tp)
+#define TCP_INIT_DELACK(tp) do {                                       \
+       timeout_set(&(tp)->t_delack_to, tcp_delack, tp);                \
+       task_set(&(tp)->t_delack_to_task, tcp_delack_task, tp);         \
+} while (/* CONSTCOND */ 0)
 
 #define TCP_RESTART_DELACK(tp)                                         \
        timeout_add(&(tp)->t_delack_to, tcp_delack_ticks)
@@ -594,6 +603,7 @@ extern      int tcp_syn_cache_active; /* acti
 
 int     tcp_attach(struct socket *);
 void    tcp_canceltimers(struct tcpcb *);
+void    tcp_destroytimers(struct tcpcb *);
 struct tcpcb *
         tcp_close(struct tcpcb *);
 void    tcp_reaper(void *);
Index: netinet/tcp_debug.c
===================================================================
RCS file: /cvs/src/sys/netinet/tcp_debug.c,v
retrieving revision 1.23
diff -u -p -r1.23 tcp_debug.c
--- netinet/tcp_debug.c 14 Mar 2015 03:38:52 -0000      1.23
+++ netinet/tcp_debug.c 12 Sep 2016 09:58:40 -0000
@@ -81,6 +81,7 @@
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/protosw.h>
+#include <sys/task.h>
 
 #include <net/route.h>
 
Index: sys/systm.h
===================================================================
RCS file: /cvs/src/sys/sys/systm.h,v
retrieving revision 1.116
diff -u -p -r1.116 systm.h
--- sys/systm.h 4 Sep 2016 09:22:29 -0000       1.116
+++ sys/systm.h 12 Sep 2016 10:01:13 -0000
@@ -287,6 +289,8 @@ struct uio;
 int    uiomove(void *, size_t, struct uio *);
 
 #if defined(_KERNEL)
+extern struct taskq *softnettq;                /* Network task queue. */
+
 __returns_twice int    setjmp(label_t *);
 __dead void    longjmp(label_t *);
 #endif

Reply via email to