On 12/09/16(Mon) 13:54, Martin Pieuchot wrote: > TCP timers end up calling ip_output(), which will soon require that the > caller holds a rwlock. > > In order to do that, I diff below changes 'struct tcptimer' to become a > tuple of (timeout, task). When a timeout fires, the corresponding task > gets scheduled. > > I'm currently using the ``softnettq'' to schedule everything related to > the network. This is the safest approach, because concurrent tasks are > still serialized. > > I extended the G/C timeout to 1sec, because there's no reference count > for 'struct tcpcb'. And the only race possible is described in the > comment below: > > /* > * If one of the timer tasks is already running, it must > * be spinning on the KERNEL_LOCK(). So schedule a G/C > * in one second, when we known the task will have released > * its reference. > */ > > Since the first thing task are doing is checking for the ``TF_DEAD'' > flag this should be enough. > > Comments, oks?
So I heard a lot of comments about using a specific API for timed task. I agreed with all of them but think it's a second step. Apart from that, do you have any other concern? Ok? > Index: netinet/tcp_timer.c > =================================================================== > RCS file: /cvs/src/sys/netinet/tcp_timer.c,v > retrieving revision 1.49 > diff -u -p -r1.49 tcp_timer.c > --- netinet/tcp_timer.c 7 Mar 2016 18:44:00 -0000 1.49 > +++ netinet/tcp_timer.c 12 Sep 2016 11:42:16 -0000 > @@ -71,6 +71,11 @@ void tcp_timer_persist(void *); > void tcp_timer_keep(void *); > void tcp_timer_2msl(void *); > > +void tcp_timer_rexmt_task(void *); > +void tcp_timer_persist_task(void *); > +void tcp_timer_keep_task(void *); > +void tcp_timer_2msl_task(void *); > + > const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS] = { > tcp_timer_rexmt, > tcp_timer_persist, > @@ -78,6 +83,13 @@ const tcp_timer_func_t tcp_timer_funcs[T > tcp_timer_2msl, > }; > > +const tcp_timer_func_t tcp_timer_tasks[TCPT_NTIMERS] = { > + tcp_timer_rexmt_task, > + tcp_timer_persist_task, > + tcp_timer_keep_task, > + tcp_timer_2msl_task, > +}; > + > /* > * Timer state initialization, called from tcp_init(). > */ > @@ -105,6 +117,14 @@ void > tcp_delack(void *arg) > { > struct tcpcb *tp = arg; > + > + task_add(softnettq, &tp->t_delack_to_task); > +} > + > +void > +tcp_delack_task(void *arg) > +{ > + struct tcpcb *tp = arg; > int s; > > /* > @@ -112,15 +132,15 @@ tcp_delack(void *arg) > * for whatever reason, it will restart the delayed > * ACK callout. > */ > - > + KERNEL_LOCK(); > s = splsoftnet(); > - if (tp->t_flags & TF_DEAD) { > - splx(s); > - return; > - } > + if (tp->t_flags & TF_DEAD) > + goto out; > tp->t_flags |= TF_ACKNOW; > (void) tcp_output(tp); > + out: > splx(s); > + KERNEL_UNLOCK(); > } > > /* > @@ -144,8 +164,7 @@ tcp_slowtimo(void) > * Cancel all timers for TCP tp. > */ > void > -tcp_canceltimers(tp) > - struct tcpcb *tp; > +tcp_canceltimers(struct tcpcb *tp) > { > int i; > > @@ -153,6 +172,15 @@ tcp_canceltimers(tp) > TCP_TIMER_DISARM(tp, i); > } > > +void > +tcp_destroytimers(struct tcpcb *tp) > +{ > + int i; > + > + for (i = 0; i < TCPT_NTIMERS; i++) > + TCP_TIMER_DESTROY(tp, i); > +} > + > int tcp_backoff[TCP_MAXRXTSHIFT + 1] = > { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 }; > > @@ -191,14 +219,21 @@ void > tcp_timer_rexmt(void *arg) > { > struct tcpcb *tp = arg; > + > + task_add(softnettq, &tp->t_timer[TCPT_REXMT].tcpt_task); > +} > + > +void > +tcp_timer_rexmt_task(void *arg) > +{ > + struct tcpcb *tp = arg; > uint32_t rto; > int s; > > + KERNEL_LOCK(); > s = splsoftnet(); > - if (tp->t_flags & TF_DEAD) { > - splx(s); > - return; > - } > + if (tp->t_flags & TF_DEAD) > + goto out; > > if ((tp->t_flags & TF_PMTUD_PEND) && tp->t_inpcb && > SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) && > @@ -225,8 +260,7 @@ tcp_timer_rexmt(void *arg) > sin.sin_addr = tp->t_inpcb->inp_faddr; > in_pcbnotifyall(&tcbtable, sintosa(&sin), > tp->t_inpcb->inp_rtableid, EMSGSIZE, tcp_mtudisc); > - splx(s); > - return; > + goto out; > } > > #ifdef TCP_SACK > @@ -378,20 +412,29 @@ tcp_timer_rexmt(void *arg) > > out: > splx(s); > + KERNEL_UNLOCK(); > } > > void > tcp_timer_persist(void *arg) > { > struct tcpcb *tp = arg; > + > + task_add(softnettq, &tp->t_timer[TCPT_PERSIST].tcpt_task); > +} > + > +void > +tcp_timer_persist_task(void *arg) > +{ > + struct tcpcb *tp = arg; > uint32_t rto; > int s; > > + KERNEL_LOCK(); > s = splsoftnet(); > if ((tp->t_flags & TF_DEAD) || > TCP_TIMER_ISARMED(tp, TCPT_REXMT)) { > - splx(s); > - return; > + goto out; > } > tcpstat.tcps_persisttimeo++; > /* > @@ -417,19 +460,27 @@ tcp_timer_persist(void *arg) > tp->t_force = 0; > out: > splx(s); > + KERNEL_UNLOCK(); > } > > void > tcp_timer_keep(void *arg) > { > struct tcpcb *tp = arg; > + > + task_add(softnettq, &tp->t_timer[TCPT_KEEP].tcpt_task); > +} > + > +void > +tcp_timer_keep_task(void *arg) > +{ > + struct tcpcb *tp = arg; > int s; > > + KERNEL_LOCK(); > s = splsoftnet(); > - if (tp->t_flags & TF_DEAD) { > - splx(s); > - return; > - } > + if (tp->t_flags & TF_DEAD) > + goto out; > > tcpstat.tcps_keeptimeo++; > if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) > @@ -458,8 +509,9 @@ tcp_timer_keep(void *arg) > TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl); > } else > TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle); > - > + out: > splx(s); > + KERNEL_UNLOCK(); > return; > > dropit: > @@ -467,19 +519,27 @@ tcp_timer_keep(void *arg) > tp = tcp_drop(tp, ETIMEDOUT); > > splx(s); > + KERNEL_UNLOCK(); > } > > void > tcp_timer_2msl(void *arg) > { > struct tcpcb *tp = arg; > + > + task_add(softnettq, &tp->t_timer[TCPT_2MSL].tcpt_task); > +} > + > +void > +tcp_timer_2msl_task(void *arg) > +{ > + struct tcpcb *tp = arg; > int s; > > + KERNEL_LOCK(); > s = splsoftnet(); > - if (tp->t_flags & TF_DEAD) { > - splx(s); > - return; > - } > + if (tp->t_flags & TF_DEAD) > + goto out; > > #ifdef TCP_SACK > tcp_timer_freesack(tp); > @@ -491,5 +551,7 @@ tcp_timer_2msl(void *arg) > else > tp = tcp_close(tp); > > + out: > splx(s); > + KERNEL_UNLOCK(); > } > Index: netinet/tcp_subr.c > =================================================================== > RCS file: /cvs/src/sys/netinet/tcp_subr.c,v > retrieving revision 1.154 > diff -u -p -r1.154 tcp_subr.c > --- netinet/tcp_subr.c 6 Sep 2016 00:04:15 -0000 1.154 > +++ netinet/tcp_subr.c 12 Sep 2016 11:41:30 -0000 > @@ -526,6 +526,7 @@ tcp_close(struct tcpcb *tp) > tcp_freeq(tp); > > tcp_canceltimers(tp); > + tcp_destroytimers(tp); > TCP_CLEAR_DELACK(tp); > syn_cache_cleanup(tp); > > @@ -542,7 +543,14 @@ tcp_close(struct tcpcb *tp) > (void) m_free(tp->t_template); > > tp->t_flags |= TF_DEAD; > - timeout_add(&tp->t_reap_to, 0); > + /* > + * If one of the timer tasks is already running, it must > + * be spinning on the KERNEL_LOCK(). So schedule a G/C > + * in one second, when we known the task will have released > + * its reference. > + */ > + KERNEL_ASSERT_LOCKED(); > + timeout_add_sec(&tp->t_reap_to, 1); > > inp->inp_ppcb = 0; > soisdisconnected(so); > Index: netinet/tcp_timer.h > =================================================================== > RCS file: /cvs/src/sys/netinet/tcp_timer.h,v > retrieving revision 1.13 > diff -u -p -r1.13 tcp_timer.h > --- netinet/tcp_timer.h 6 Jul 2011 23:44:20 -0000 1.13 > +++ netinet/tcp_timer.h 12 Sep 2016 11:43:35 -0000 > @@ -115,18 +115,25 @@ const char *tcptimers[] = > /* > * Init, arm, disarm, and test TCP timers. > */ > -#define TCP_TIMER_INIT(tp, timer) > \ > - timeout_set(&(tp)->t_timer[(timer)], tcp_timer_funcs[(timer)], tp) > +#define TCP_TIMER_INIT(tp, timer) do { > \ > + timeout_set(&(tp)->t_timer[(timer)].tcpt_timeout, \ > + tcp_timer_funcs[(timer)], tp); \ > + task_set(&(tp)->t_timer[(timer)].tcpt_task, \ > + tcp_timer_tasks[(timer)], tp); \ > +} while(0) > > #define TCP_TIMER_ARM(tp, timer, nticks) > \ > - timeout_add(&(tp)->t_timer[(timer)], (nticks) * (hz / PR_SLOWHZ)) > + timeout_add(&(tp)->t_timer[(timer)].tcpt_timeout, \ > + (nticks) * (hz / PR_SLOWHZ)) > > #define TCP_TIMER_DISARM(tp, timer) > \ > - timeout_del(&(tp)->t_timer[(timer)]) > + timeout_del(&(tp)->t_timer[(timer)].tcpt_timeout) > > #define TCP_TIMER_ISARMED(tp, timer) > \ > - timeout_pending(&(tp)->t_timer[(timer)]) > + timeout_pending(&(tp)->t_timer[(timer)].tcpt_timeout) > > +#define TCP_TIMER_DESTROY(tp, timer) > \ > + task_del(softnettq, &(tp)->t_timer[(timer)].tcpt_task) > /* > * Force a time value to be in a certain range. > */ > @@ -143,6 +150,7 @@ do { > \ > typedef void (*tcp_timer_func_t)(void *); > > extern const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS]; > +extern const tcp_timer_func_t tcp_timer_tasks[TCPT_NTIMERS]; > > extern int tcptv_keep_init; > extern int tcp_always_keepalive; /* assume SO_KEEPALIVE is always set */ > Index: netinet/tcp_var.h > =================================================================== > RCS file: /cvs/src/sys/netinet/tcp_var.h,v > retrieving revision 1.115 > diff -u -p -r1.115 tcp_var.h > --- netinet/tcp_var.h 20 Jul 2016 19:57:53 -0000 1.115 > +++ netinet/tcp_var.h 12 Sep 2016 11:42:38 -0000 > @@ -64,12 +64,17 @@ struct tcpqent { > struct mbuf *tcpqe_m; /* mbuf contains packet */ > }; > > +struct tcptimer { > + struct timeout tcpt_timeout; > + struct task tcpt_task; > +}; > + > /* > * Tcp control block, one per tcp; fields: > */ > struct tcpcb { > struct tcpqehead t_segq; /* sequencing queue */ > - struct timeout t_timer[TCPT_NTIMERS]; /* tcp timers */ > + struct tcptimer t_timer[TCPT_NTIMERS]; /* tcp timers */ > short t_state; /* state of this connection */ > short t_rxtshift; /* log(2) of rexmt exp. backoff */ > short t_rxtcur; /* current retransmit value */ > @@ -104,6 +109,7 @@ struct tcpcb { > struct mbuf *t_template; /* skeletal packet for transmit */ > struct inpcb *t_inpcb; /* back pointer to internet pcb */ > struct timeout t_delack_to; /* delayed ACK callback */ > + struct task t_delack_to_task; /* task for the ACK callback */ > /* > * The following fields are used as in the protocol specification. > * See RFC793, Dec. 1981, page 21. > @@ -215,9 +221,12 @@ struct tcpcb { > #ifdef _KERNEL > extern int tcp_delack_ticks; > void tcp_delack(void *); > +void tcp_delack_task(void *); > > -#define TCP_INIT_DELACK(tp) \ > - timeout_set(&(tp)->t_delack_to, tcp_delack, tp) > +#define TCP_INIT_DELACK(tp) do { \ > + timeout_set(&(tp)->t_delack_to, tcp_delack, tp); \ > + task_set(&(tp)->t_delack_to_task, tcp_delack_task, tp); \ > +} while (/* CONSTCOND */ 0) > > #define TCP_RESTART_DELACK(tp) > \ > timeout_add(&(tp)->t_delack_to, tcp_delack_ticks) > @@ -594,6 +603,7 @@ extern int tcp_syn_cache_active; /* acti > > int tcp_attach(struct socket *); > void tcp_canceltimers(struct tcpcb *); > +void tcp_destroytimers(struct tcpcb *); > struct tcpcb * > tcp_close(struct tcpcb *); > void tcp_reaper(void *); > Index: netinet/tcp_debug.c > =================================================================== > RCS file: /cvs/src/sys/netinet/tcp_debug.c,v > retrieving revision 1.23 > diff -u -p -r1.23 tcp_debug.c > --- netinet/tcp_debug.c 14 Mar 2015 03:38:52 -0000 1.23 > +++ netinet/tcp_debug.c 12 Sep 2016 09:58:40 -0000 > @@ -81,6 +81,7 @@ > #include <sys/mbuf.h> > #include <sys/socket.h> > #include <sys/protosw.h> > +#include <sys/task.h> > > #include <net/route.h> > > Index: sys/systm.h > =================================================================== > RCS file: /cvs/src/sys/sys/systm.h,v > retrieving revision 1.116 > diff -u -p -r1.116 systm.h > --- sys/systm.h 4 Sep 2016 09:22:29 -0000 1.116 > +++ sys/systm.h 12 Sep 2016 10:01:13 -0000 > @@ -287,6 +289,8 @@ struct uio; > int uiomove(void *, size_t, struct uio *); > > #if defined(_KERNEL) > +extern struct taskq *softnettq; /* Network task queue. */ > + > __returns_twice int setjmp(label_t *); > __dead void longjmp(label_t *); > #endif >