Hi, There is a race in the tcp timers. As the may sleep to grab the netlock, they may still run after they have been disarmed. Deleting the timeout is not sufficient to cancel them. But the code from 4.4 BSD is assuming this.
So my solution is to add a flag for every timer to see whether it has been armed or canceled. Note that I can remove the TF_DEAD check as tcp_canceltimers() is called before the reaper timer is fired. Cancelation works reliably now. ok? bluhm Index: netinet/tcp_timer.c =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.c,v retrieving revision 1.62 diff -u -p -r1.62 tcp_timer.c --- netinet/tcp_timer.c 23 Jan 2018 21:41:17 -0000 1.62 +++ netinet/tcp_timer.c 2 Feb 2018 21:23:18 -0000 @@ -185,8 +185,11 @@ tcp_timer_rexmt(void *arg) uint32_t rto; NET_LOCK(); - if (tp->t_flags & TF_DEAD) + /* Ignore canceled timeouts or timeouts that have been rescheduled. */ + if (!ISSET((tp)->t_flags, TF_TMR_REXMT) || + timeout_pending(&tp->t_timer[TCPT_REXMT])) goto out; + CLR((tp)->t_flags, TF_TMR_REXMT); if ((tp->t_flags & TF_PMTUD_PEND) && tp->t_inpcb && SEQ_GEQ(tp->t_pmtud_th_seq, tp->snd_una) && @@ -370,10 +373,14 @@ tcp_timer_persist(void *arg) uint32_t rto; NET_LOCK(); - if ((tp->t_flags & TF_DEAD) || - TCP_TIMER_ISARMED(tp, TCPT_REXMT)) { + /* Ignore canceled timeouts or timeouts that have been rescheduled. */ + if (!ISSET((tp)->t_flags, TF_TMR_PERSIST) || + timeout_pending(&tp->t_timer[TCPT_PERSIST])) + goto out; + CLR((tp)->t_flags, TF_TMR_PERSIST); + + if (TCP_TIMER_ISARMED(tp, TCPT_REXMT)) goto out; - } tcpstat_inc(tcps_persisttimeo); /* * Hack: if the peer is dead/unreachable, we do not @@ -406,8 +413,11 @@ tcp_timer_keep(void *arg) struct tcpcb *tp = arg; NET_LOCK(); - if (tp->t_flags & TF_DEAD) + /* Ignore canceled timeouts or timeouts that have been rescheduled. */ + if (!ISSET((tp)->t_flags, TF_TMR_KEEP) || + timeout_pending(&tp->t_timer[TCPT_KEEP])) goto out; + CLR((tp)->t_flags, TF_TMR_KEEP); tcpstat_inc(tcps_keeptimeo); if (TCPS_HAVEESTABLISHED(tp->t_state) == 0) @@ -452,8 +462,11 @@ tcp_timer_2msl(void *arg) struct tcpcb *tp = arg; NET_LOCK(); - if (tp->t_flags & TF_DEAD) + /* Ignore canceled timeouts or timeouts that have been rescheduled. */ + if (!ISSET((tp)->t_flags, TF_TMR_2MSL) || + timeout_pending(&tp->t_timer[TCPT_2MSL])) goto out; + CLR((tp)->t_flags, TF_TMR_2MSL); tcp_timer_freesack(tp); Index: netinet/tcp_timer.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_timer.h,v retrieving revision 1.15 diff -u -p -r1.15 tcp_timer.h --- netinet/tcp_timer.h 23 Jan 2018 21:41:17 -0000 1.15 +++ netinet/tcp_timer.h 2 Feb 2018 21:23:18 -0000 @@ -120,13 +120,19 @@ const char *tcptimers[TCPT_NTIMERS] = timeout_set_proc(&(tp)->t_timer[(timer)], tcp_timer_funcs[(timer)], tp) #define TCP_TIMER_ARM(tp, timer, nticks) \ - timeout_add(&(tp)->t_timer[(timer)], (nticks) * (hz / PR_SLOWHZ)) +do { \ + SET((tp)->t_flags, TF_TIMER << (timer)); \ + timeout_add(&(tp)->t_timer[(timer)], (nticks) * (hz / PR_SLOWHZ)); \ +} while (0) #define TCP_TIMER_DISARM(tp, timer) \ - timeout_del(&(tp)->t_timer[(timer)]) +do { \ + CLR((tp)->t_flags, TF_TIMER << (timer)); \ + timeout_del(&(tp)->t_timer[(timer)]); \ +} while (0) #define TCP_TIMER_ISARMED(tp, timer) \ - timeout_pending(&(tp)->t_timer[(timer)]) + ISSET((tp)->t_flags, TF_TIMER << (timer)) /* * Force a time value to be in a certain range. Index: netinet/tcp_var.h =================================================================== RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_var.h,v retrieving revision 1.129 diff -u -p -r1.129 tcp_var.h --- netinet/tcp_var.h 23 Jan 2018 21:41:17 -0000 1.129 +++ netinet/tcp_var.h 2 Feb 2018 21:23:18 -0000 @@ -100,6 +100,12 @@ struct tcpcb { #define TF_NEEDOUTPUT 0x00800000 /* call tcp_output after tcp_input */ #define TF_BLOCKOUTPUT 0x01000000 /* avert tcp_output during tcp_input */ #define TF_NOPUSH 0x02000000 /* don't push */ +#define TF_TMR_REXMT 0x04000000 /* retransmit timer armed */ +#define TF_TMR_PERSIST 0x08000000 /* retransmit persistence timer armed */ +#define TF_TMR_KEEP 0x10000000 /* keep alive timer armed */ +#define TF_TMR_2MSL 0x20000000 /* 2*msl quiet time timer armed */ +#define TF_TMR_REAPER 0x40000000 /* delayed cleanup timer armed, dead */ +#define TF_TIMER TF_TMR_REXMT /* used to shift with TCPT values */ struct mbuf *t_template; /* skeletal packet for transmit */ struct inpcb *t_inpcb; /* back pointer to internet pcb */