On 12/09/16(Mon) 13:54, Martin Pieuchot wrote:
> TCP timers end up calling ip_output(), which will soon require that the
> caller holds a rwlock.
>
> In order to do that, I diff below changes 'struct tcptimer' to become a
> tuple of (timeout, task). When a timeout fires, the corresponding task
> gets scheduled.
>
> I'm currently using the ``softnettq'' to schedule everything related to
> the network. This is the safest approach, because concurrent tasks are
> still serialized.
>
> I extended the G/C timeout to 1sec, because there's no reference count
> for 'struct tcpcb'. And the only race possible is described in the
> comment below:
>
> /*
> * If one of the timer tasks is already running, it must
> * be spinning on the KERNEL_LOCK(). So schedule a G/C
> * in one second, when we known the task will have released
> * its reference.
> */
>
> Since the first thing task are doing is checking for the ``TF_DEAD''
> flag this should be enough.
>
> Comments, oks?
So I heard a lot of comments about using a specific API for timed task.
I agreed with all of them but think it's a second step.
Apart from that, do you have any other concern? Ok?
> Index: netinet/tcp_timer.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/tcp_timer.c,v
> retrieving revision 1.49
> diff -u -p -r1.49 tcp_timer.c
> --- netinet/tcp_timer.c 7 Mar 2016 18:44:00 -0000 1.49
> +++ netinet/tcp_timer.c 12 Sep 2016 11:42:16 -0000
> @@ -71,6 +71,11 @@ void tcp_timer_persist(void *);
> void tcp_timer_keep(void *);
> void tcp_timer_2msl(void *);
>
> +void tcp_timer_rexmt_task(void *);
> +void tcp_timer_persist_task(void *);
> +void tcp_timer_keep_task(void *);
> +void tcp_timer_2msl_task(void *);
> +
> const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS] = {
> tcp_timer_rexmt,
> tcp_timer_persist,
> @@ -78,6 +83,13 @@ const tcp_timer_func_t tcp_timer_funcs[T
> tcp_timer_2msl,
> };
>
> +const tcp_timer_func_t tcp_timer_tasks[TCPT_NTIMERS] = {
> + tcp_timer_rexmt_task,
> + tcp_timer_persist_task,
> + tcp_timer_keep_task,
> + tcp_timer_2msl_task,
> +};
> +
> /*
> * Timer state initialization, called from tcp_init().
> */
> @@ -105,6 +117,14 @@ void
> tcp_delack(void *arg)
> {
> struct tcpcb *tp = arg;
> +
> + task_add(softnettq, &tp->t_delack_to_task);
> +}
> +
> +void
> +tcp_delack_task(void *arg)
> +{
> + struct tcpcb *tp = arg;
> int s;
>
> /*
> @@ -112,15 +132,15 @@ tcp_delack(void *arg)
> * for whatever reason, it will restart the delayed
> * ACK callout.
> */
> -
> + KERNEL_LOCK();
> s = splsoftnet();
> - if (tp->t_flags & TF_DEAD) {
> - splx(s);
> - return;
> - }
> + if (tp->t_flags & TF_DEAD)
> + goto out;
> tp->t_flags |= TF_ACKNOW;
> (void) tcp_output(tp);
> + out:
> splx(s);
> + KERNEL_UNLOCK();
> }
>
> /*
> @@ -144,8 +164,7 @@ tcp_slowtimo(void)
> * Cancel all timers for TCP tp.
> */
> void
> -tcp_canceltimers(tp)
> - struct tcpcb *tp;
> +tcp_canceltimers(struct tcpcb *tp)
> {
> int i;
>
> @@ -153,6 +172,15 @@ tcp_canceltimers(tp)
> TCP_TIMER_DISARM(tp, i);
> }
>
> +void
> +tcp_destroytimers(struct tcpcb *tp)
> +{
> + int i;
> +
> + for (i = 0; i < TCPT_NTIMERS; i++)
> + TCP_TIMER_DESTROY(tp, i);
> +}
> +
> int tcp_backoff[TCP_MAXRXTSHIFT + 1] =
> { 1, 2, 4, 8, 16, 32, 64, 64, 64, 64, 64, 64, 64 };
>
> @@ -191,14 +219,21 @@ void
> tcp_timer_rexmt(void *arg)
> {
> struct tcpcb *tp = arg;
> +
> + task_add(softnettq, &tp->t_timer[TCPT_REXMT].tcpt_task);
> +}
> +
> +void
> +tcp_timer_rexmt_task(void *arg)
> +{
> + struct tcpcb *tp = arg;
> uint32_t rto;
> int s;
>
> + KERNEL_LOCK();
> s = splsoftnet();
> - if (tp->t_flags & TF_DEAD) {
> - splx(s);
> - return;
> - }
> + if (tp->t_flags & TF_DEAD)
> + goto out;
>
> if ((tp->t_flags & TF_PMTUD_PEND) && tp->t_inpcb &&
> SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) &&
> @@ -225,8 +260,7 @@ tcp_timer_rexmt(void *arg)
> sin.sin_addr = tp->t_inpcb->inp_faddr;
> in_pcbnotifyall(&tcbtable, sintosa(&sin),
> tp->t_inpcb->inp_rtableid, EMSGSIZE, tcp_mtudisc);
> - splx(s);
> - return;
> + goto out;
> }
>
> #ifdef TCP_SACK
> @@ -378,20 +412,29 @@ tcp_timer_rexmt(void *arg)
>
> out:
> splx(s);
> + KERNEL_UNLOCK();
> }
>
> void
> tcp_timer_persist(void *arg)
> {
> struct tcpcb *tp = arg;
> +
> + task_add(softnettq, &tp->t_timer[TCPT_PERSIST].tcpt_task);
> +}
> +
> +void
> +tcp_timer_persist_task(void *arg)
> +{
> + struct tcpcb *tp = arg;
> uint32_t rto;
> int s;
>
> + KERNEL_LOCK();
> s = splsoftnet();
> if ((tp->t_flags & TF_DEAD) ||
> TCP_TIMER_ISARMED(tp, TCPT_REXMT)) {
> - splx(s);
> - return;
> + goto out;
> }
> tcpstat.tcps_persisttimeo++;
> /*
> @@ -417,19 +460,27 @@ tcp_timer_persist(void *arg)
> tp->t_force = 0;
> out:
> splx(s);
> + KERNEL_UNLOCK();
> }
>
> void
> tcp_timer_keep(void *arg)
> {
> struct tcpcb *tp = arg;
> +
> + task_add(softnettq, &tp->t_timer[TCPT_KEEP].tcpt_task);
> +}
> +
> +void
> +tcp_timer_keep_task(void *arg)
> +{
> + struct tcpcb *tp = arg;
> int s;
>
> + KERNEL_LOCK();
> s = splsoftnet();
> - if (tp->t_flags & TF_DEAD) {
> - splx(s);
> - return;
> - }
> + if (tp->t_flags & TF_DEAD)
> + goto out;
>
> tcpstat.tcps_keeptimeo++;
> if (TCPS_HAVEESTABLISHED(tp->t_state) == 0)
> @@ -458,8 +509,9 @@ tcp_timer_keep(void *arg)
> TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepintvl);
> } else
> TCP_TIMER_ARM(tp, TCPT_KEEP, tcp_keepidle);
> -
> + out:
> splx(s);
> + KERNEL_UNLOCK();
> return;
>
> dropit:
> @@ -467,19 +519,27 @@ tcp_timer_keep(void *arg)
> tp = tcp_drop(tp, ETIMEDOUT);
>
> splx(s);
> + KERNEL_UNLOCK();
> }
>
> void
> tcp_timer_2msl(void *arg)
> {
> struct tcpcb *tp = arg;
> +
> + task_add(softnettq, &tp->t_timer[TCPT_2MSL].tcpt_task);
> +}
> +
> +void
> +tcp_timer_2msl_task(void *arg)
> +{
> + struct tcpcb *tp = arg;
> int s;
>
> + KERNEL_LOCK();
> s = splsoftnet();
> - if (tp->t_flags & TF_DEAD) {
> - splx(s);
> - return;
> - }
> + if (tp->t_flags & TF_DEAD)
> + goto out;
>
> #ifdef TCP_SACK
> tcp_timer_freesack(tp);
> @@ -491,5 +551,7 @@ tcp_timer_2msl(void *arg)
> else
> tp = tcp_close(tp);
>
> + out:
> splx(s);
> + KERNEL_UNLOCK();
> }
> Index: netinet/tcp_subr.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/tcp_subr.c,v
> retrieving revision 1.154
> diff -u -p -r1.154 tcp_subr.c
> --- netinet/tcp_subr.c 6 Sep 2016 00:04:15 -0000 1.154
> +++ netinet/tcp_subr.c 12 Sep 2016 11:41:30 -0000
> @@ -526,6 +526,7 @@ tcp_close(struct tcpcb *tp)
> tcp_freeq(tp);
>
> tcp_canceltimers(tp);
> + tcp_destroytimers(tp);
> TCP_CLEAR_DELACK(tp);
> syn_cache_cleanup(tp);
>
> @@ -542,7 +543,14 @@ tcp_close(struct tcpcb *tp)
> (void) m_free(tp->t_template);
>
> tp->t_flags |= TF_DEAD;
> - timeout_add(&tp->t_reap_to, 0);
> + /*
> + * If one of the timer tasks is already running, it must
> + * be spinning on the KERNEL_LOCK(). So schedule a G/C
> + * in one second, when we known the task will have released
> + * its reference.
> + */
> + KERNEL_ASSERT_LOCKED();
> + timeout_add_sec(&tp->t_reap_to, 1);
>
> inp->inp_ppcb = 0;
> soisdisconnected(so);
> Index: netinet/tcp_timer.h
> ===================================================================
> RCS file: /cvs/src/sys/netinet/tcp_timer.h,v
> retrieving revision 1.13
> diff -u -p -r1.13 tcp_timer.h
> --- netinet/tcp_timer.h 6 Jul 2011 23:44:20 -0000 1.13
> +++ netinet/tcp_timer.h 12 Sep 2016 11:43:35 -0000
> @@ -115,18 +115,25 @@ const char *tcptimers[] =
> /*
> * Init, arm, disarm, and test TCP timers.
> */
> -#define TCP_TIMER_INIT(tp, timer)
> \
> - timeout_set(&(tp)->t_timer[(timer)], tcp_timer_funcs[(timer)], tp)
> +#define TCP_TIMER_INIT(tp, timer) do {
> \
> + timeout_set(&(tp)->t_timer[(timer)].tcpt_timeout, \
> + tcp_timer_funcs[(timer)], tp); \
> + task_set(&(tp)->t_timer[(timer)].tcpt_task, \
> + tcp_timer_tasks[(timer)], tp); \
> +} while(0)
>
> #define TCP_TIMER_ARM(tp, timer, nticks)
> \
> - timeout_add(&(tp)->t_timer[(timer)], (nticks) * (hz / PR_SLOWHZ))
> + timeout_add(&(tp)->t_timer[(timer)].tcpt_timeout, \
> + (nticks) * (hz / PR_SLOWHZ))
>
> #define TCP_TIMER_DISARM(tp, timer)
> \
> - timeout_del(&(tp)->t_timer[(timer)])
> + timeout_del(&(tp)->t_timer[(timer)].tcpt_timeout)
>
> #define TCP_TIMER_ISARMED(tp, timer)
> \
> - timeout_pending(&(tp)->t_timer[(timer)])
> + timeout_pending(&(tp)->t_timer[(timer)].tcpt_timeout)
>
> +#define TCP_TIMER_DESTROY(tp, timer)
> \
> + task_del(softnettq, &(tp)->t_timer[(timer)].tcpt_task)
> /*
> * Force a time value to be in a certain range.
> */
> @@ -143,6 +150,7 @@ do {
> \
> typedef void (*tcp_timer_func_t)(void *);
>
> extern const tcp_timer_func_t tcp_timer_funcs[TCPT_NTIMERS];
> +extern const tcp_timer_func_t tcp_timer_tasks[TCPT_NTIMERS];
>
> extern int tcptv_keep_init;
> extern int tcp_always_keepalive; /* assume SO_KEEPALIVE is always set */
> Index: netinet/tcp_var.h
> ===================================================================
> RCS file: /cvs/src/sys/netinet/tcp_var.h,v
> retrieving revision 1.115
> diff -u -p -r1.115 tcp_var.h
> --- netinet/tcp_var.h 20 Jul 2016 19:57:53 -0000 1.115
> +++ netinet/tcp_var.h 12 Sep 2016 11:42:38 -0000
> @@ -64,12 +64,17 @@ struct tcpqent {
> struct mbuf *tcpqe_m; /* mbuf contains packet */
> };
>
> +struct tcptimer {
> + struct timeout tcpt_timeout;
> + struct task tcpt_task;
> +};
> +
> /*
> * Tcp control block, one per tcp; fields:
> */
> struct tcpcb {
> struct tcpqehead t_segq; /* sequencing queue */
> - struct timeout t_timer[TCPT_NTIMERS]; /* tcp timers */
> + struct tcptimer t_timer[TCPT_NTIMERS]; /* tcp timers */
> short t_state; /* state of this connection */
> short t_rxtshift; /* log(2) of rexmt exp. backoff */
> short t_rxtcur; /* current retransmit value */
> @@ -104,6 +109,7 @@ struct tcpcb {
> struct mbuf *t_template; /* skeletal packet for transmit */
> struct inpcb *t_inpcb; /* back pointer to internet pcb */
> struct timeout t_delack_to; /* delayed ACK callback */
> + struct task t_delack_to_task; /* task for the ACK callback */
> /*
> * The following fields are used as in the protocol specification.
> * See RFC793, Dec. 1981, page 21.
> @@ -215,9 +221,12 @@ struct tcpcb {
> #ifdef _KERNEL
> extern int tcp_delack_ticks;
> void tcp_delack(void *);
> +void tcp_delack_task(void *);
>
> -#define TCP_INIT_DELACK(tp) \
> - timeout_set(&(tp)->t_delack_to, tcp_delack, tp)
> +#define TCP_INIT_DELACK(tp) do { \
> + timeout_set(&(tp)->t_delack_to, tcp_delack, tp); \
> + task_set(&(tp)->t_delack_to_task, tcp_delack_task, tp); \
> +} while (/* CONSTCOND */ 0)
>
> #define TCP_RESTART_DELACK(tp)
> \
> timeout_add(&(tp)->t_delack_to, tcp_delack_ticks)
> @@ -594,6 +603,7 @@ extern int tcp_syn_cache_active; /* acti
>
> int tcp_attach(struct socket *);
> void tcp_canceltimers(struct tcpcb *);
> +void tcp_destroytimers(struct tcpcb *);
> struct tcpcb *
> tcp_close(struct tcpcb *);
> void tcp_reaper(void *);
> Index: netinet/tcp_debug.c
> ===================================================================
> RCS file: /cvs/src/sys/netinet/tcp_debug.c,v
> retrieving revision 1.23
> diff -u -p -r1.23 tcp_debug.c
> --- netinet/tcp_debug.c 14 Mar 2015 03:38:52 -0000 1.23
> +++ netinet/tcp_debug.c 12 Sep 2016 09:58:40 -0000
> @@ -81,6 +81,7 @@
> #include <sys/mbuf.h>
> #include <sys/socket.h>
> #include <sys/protosw.h>
> +#include <sys/task.h>
>
> #include <net/route.h>
>
> Index: sys/systm.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/systm.h,v
> retrieving revision 1.116
> diff -u -p -r1.116 systm.h
> --- sys/systm.h 4 Sep 2016 09:22:29 -0000 1.116
> +++ sys/systm.h 12 Sep 2016 10:01:13 -0000
> @@ -287,6 +289,8 @@ struct uio;
> int uiomove(void *, size_t, struct uio *);
>
> #if defined(_KERNEL)
> +extern struct taskq *softnettq; /* Network task queue. */
> +
> __returns_twice int setjmp(label_t *);
> __dead void longjmp(label_t *);
> #endif
>