On 24/10/17(Tue) 23:22, Job Snijders wrote: > Dear all, > > This patch builds upon the work shared in the following email. Mike's > patch is a prerequisite to apply this patch. > > Date: Tue, 24 Oct 2017 15:21:08 +0200 > From: Mike Belopuhov <m...@belopuhov.com> > Subject: Re: Refactor TCP partial ACK handling > > TCP_FACK was disabled by provos@ in June 1999. This patch removes > the TCP_FACK option and associated #if{,n}def code. > > TCP_FACK is an algorithm that decides that when something is lost, all > not SACKed packets until the most forward SACK are lost. It may be a > correct estimate, if network does not reorder packets. > > The algorithm described in RFC 6675 may be a better replacement. This > culling patch can provide guidance how and where to implement 6675.
I'm happy to see fewer #ifdef in these spaghetti. Especially now that some refactoring is welcome for future CC and MP works. ok mpi@ > diff --git share/man/man4/options.4 share/man/man4/options.4 > index c28d4e27896..737dc29efea 100644 > --- share/man/man4/options.4 > +++ share/man/man4/options.4 > @@ -445,11 +445,6 @@ TCP to adjust the transmission rate using this signal. > Both communication endpoints negotiate enabling > .Em ECN > functionality at the TCP connection establishment. > -.It Cd option TCP_FACK > -Turns on forward acknowledgements allowing a more precise estimate of > -outstanding data during the fast recovery phase by using > -.Em SACK > -information. > .It Cd option TCP_SIGNATURE > Turns on support for the TCP MD5 Signature option (RFC 2385). > This is used by > diff --git sys/conf/GENERIC sys/conf/GENERIC > index 6df800175ed..e385b45785c 100644 > --- sys/conf/GENERIC > +++ sys/conf/GENERIC > @@ -47,7 +47,6 @@ option FUSE # FUSE > option SOCKET_SPLICE # Socket Splicing for TCP and UDP > option TCP_ECN # Explicit Congestion Notification for > TCP > option TCP_SIGNATURE # TCP MD5 Signatures, for BGP routing > sessions > -#option TCP_FACK # Forward Acknowledgements for TCP > > option INET6 # IPv6 > option IPSEC # IPsec > diff --git sys/netinet/tcp_input.c sys/netinet/tcp_input.c > index 8d172e2905c..4321d85854c 100644 > --- sys/netinet/tcp_input.c > +++ sys/netinet/tcp_input.c > @@ -974,10 +974,6 @@ findpcb: > if (SEQ_GT(tp->snd_una, tp->snd_last)) > #endif > tp->snd_last = tp->snd_una; > -#ifdef TCP_FACK > - tp->snd_fack = tp->snd_una; > - tp->retran_data = 0; > -#endif > m_freem(m); > > /* > @@ -1566,18 +1562,7 @@ trimthenstep6: > */ > if (TCP_TIMER_ISARMED(tp, TCPT_REXMT) == 0) > tp->t_dupacks = 0; > -#ifdef TCP_FACK > - /* > - * In FACK, can enter fast rec. if the receiver > - * reports a reass. queue longer than 3 segs. > - */ > - else if (++tp->t_dupacks == tcprexmtthresh || > - ((SEQ_GT(tp->snd_fack, tcprexmtthresh * > - tp->t_maxseg + tp->snd_una)) && > - SEQ_GT(tp->snd_una, tp->snd_last))) { > -#else > else if (++tp->t_dupacks == tcprexmtthresh) { > -#endif /* TCP_FACK */ > tcp_seq onxt = tp->snd_nxt; > u_long win = > ulmin(tp->snd_wnd, tp->snd_cwnd) / > @@ -1603,15 +1588,6 @@ trimthenstep6: > #endif > tcpstat_inc(tcps_cwr_frecovery); > > tcpstat_inc(tcps_sack_recovery_episode); > -#ifdef TCP_FACK > - tp->t_dupacks = tcprexmtthresh; > - (void) tcp_output(tp); > - /* > - * During FR, snd_cwnd is held > - * constant for FACK. > - */ > - tp->snd_cwnd = tp->snd_ssthresh; > -#else > /* > * tcp_output() will send > * oldest SACK-eligible rtx. > @@ -1619,7 +1595,6 @@ trimthenstep6: > (void) tcp_output(tp); > tp->snd_cwnd = tp->snd_ssthresh+ > tp->t_maxseg * tp->t_dupacks; > -#endif /* TCP_FACK */ > goto drop; > } > TCP_TIMER_DISARM(tp, TCPT_REXMT); > @@ -1639,17 +1614,6 @@ trimthenstep6: > tp->snd_nxt = onxt; > goto drop; > } else if (tp->t_dupacks > tcprexmtthresh) { > -#ifdef TCP_FACK > - /* > - * while (awnd < cwnd) > - * sendsomething(); > - */ > - if (tp->sack_enable) { > - if (tp->snd_awnd < tp->snd_cwnd) > - tcp_output(tp); > - goto drop; > - } > -#endif /* TCP_FACK */ > tp->snd_cwnd += tp->t_maxseg; > (void) tcp_output(tp); > goto drop; > @@ -1685,11 +1649,6 @@ trimthenstep6: > tcp_seq_subtract(tp->snd_max, > th->th_ack); > tp->t_dupacks = 0; > -#ifdef TCP_FACK > - if (tp->sack_enable && > - SEQ_GT(th->th_ack, tp->snd_fack)) > - tp->snd_fack = th->th_ack; > -#endif > } > } else { > /* > @@ -1789,16 +1748,6 @@ trimthenstep6: > #endif > if (SEQ_LT(tp->snd_nxt, tp->snd_una)) > tp->snd_nxt = tp->snd_una; > -#ifdef TCP_FACK > - if (SEQ_GT(tp->snd_una, tp->snd_fack)) { > - tp->snd_fack = tp->snd_una; > - /* Update snd_awnd for partial ACK > - * without any SACK blocks. > - */ > - tp->snd_awnd = tcp_seq_subtract(tp->snd_nxt, > - tp->snd_fack) + tp->retran_data; > - } > -#endif > > switch (tp->t_state) { > > @@ -2467,11 +2416,6 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, > u_char *cp, int optlen) > continue; /* bad SACK fields */ > if (SEQ_LEQ(sack.end, tp->snd_una)) > continue; /* old block */ > -#ifdef TCP_FACK > - /* Updates snd_fack. */ > - if (SEQ_GT(sack.end, tp->snd_fack)) > - tp->snd_fack = sack.end; > -#endif > if (SEQ_GT(th->th_ack, tp->snd_una)) { > if (SEQ_LT(sack.start, th->th_ack)) > continue; > @@ -2520,16 +2464,6 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, > u_char *cp, int optlen) > } > if (SEQ_LEQ(sack.start, cur->start)) { > /* Data acks at least the beginning of hole */ > -#ifdef TCP_FACK > - if (SEQ_GT(sack.end, cur->rxmit)) > - tp->retran_data -= > - tcp_seq_subtract(cur->rxmit, > - cur->start); > - else > - tp->retran_data -= > - tcp_seq_subtract(sack.end, > - cur->start); > -#endif > if (SEQ_GEQ(sack.end, cur->end)) { > /* Acks entire hole, so delete hole */ > if (p != cur) { > @@ -2554,12 +2488,6 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, > u_char *cp, int optlen) > } > /* move end of hole backward */ > if (SEQ_GEQ(sack.end, cur->end)) { > -#ifdef TCP_FACK > - if (SEQ_GT(cur->rxmit, sack.start)) > - tp->retran_data -= > - tcp_seq_subtract(cur->rxmit, > - sack.start); > -#endif > cur->end = sack.start; > cur->rxmit = SEQ_MIN(cur->rxmit, cur->end); > cur->dups++; > @@ -2580,16 +2508,6 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, > u_char *cp, int optlen) > pool_get(&sackhl_pool, PR_NOWAIT); > if (temp == NULL) > goto done; /* ENOBUFS */ > -#ifdef TCP_FACK > - if (SEQ_GT(cur->rxmit, sack.end)) > - tp->retran_data -= > - tcp_seq_subtract(sack.end, > - sack.start); > - else if (SEQ_GT(cur->rxmit, sack.start)) > - tp->retran_data -= > - tcp_seq_subtract(cur->rxmit, > - sack.start); > -#endif > temp->next = cur->next; > temp->start = sack.end; > temp->end = cur->end; > @@ -2631,21 +2549,6 @@ tcp_sack_option(struct tcpcb *tp, struct tcphdr *th, > u_char *cp, int optlen) > } > } > done: > -#ifdef TCP_FACK > - /* > - * Update retran_data and snd_awnd. Go through the list of > - * holes. Increment retran_data by (hole->rxmit - hole->start). > - */ > - tp->retran_data = 0; > - cur = tp->snd_holes; > - while (cur) { > - tp->retran_data += cur->rxmit - cur->start; > - cur = cur->next; > - } > - tp->snd_awnd = tcp_seq_subtract(tp->snd_nxt, tp->snd_fack) + > - tp->retran_data; > -#endif > - > return; > } > > @@ -2705,11 +2608,9 @@ tcp_sack_partialack(struct tcpcb *tp, struct tcphdr > *th) > /* Turn off retx. timer (will start again next segment) */ > TCP_TIMER_DISARM(tp, TCPT_REXMT); > tp->t_rtttime = 0; > -#ifndef TCP_FACK > /* > * Partial window deflation. This statement relies on the > - * fact that tp->snd_una has not been updated yet. In FACK > - * hold snd_cwnd constant during fast recovery. > + * fact that tp->snd_una has not been updated yet. > */ > if (tp->snd_cwnd > (th->th_ack - tp->snd_una)) { > tp->snd_cwnd -= th->th_ack - tp->snd_una; > @@ -2718,11 +2619,6 @@ tcp_sack_partialack(struct tcpcb *tp, struct tcphdr > *th) > tp->snd_cwnd = tp->t_maxseg; > tp->snd_cwnd += tp->t_maxseg; > tp->t_flags |= TF_NEEDOUTPUT; > -#else > - /* Force call to tcp_output */ > - if (tp->snd_awnd < tp->snd_cwnd) > - tp->t_flags |= TF_NEEDOUTPUT; > -#endif > } > > /* > @@ -3703,11 +3599,6 @@ syn_cache_get(struct sockaddr *src, struct sockaddr > *dst, struct tcphdr *th, > tp->irs = sc->sc_irs; > tcp_sendseqinit(tp); > tp->snd_last = tp->snd_una; > -#ifdef TCP_FACK > - tp->snd_fack = tp->snd_una; > - tp->retran_data = 0; > - tp->snd_awnd = 0; > -#endif > #ifdef TCP_ECN > if (sc->sc_flags & SCF_ECN_PERMIT) { > tp->t_flags |= TF_ECN_PERMIT; > diff --git sys/netinet/tcp_output.c sys/netinet/tcp_output.c > index a0a88b5ec3d..a652ad1d80d 100644 > --- sys/netinet/tcp_output.c > +++ sys/netinet/tcp_output.c > @@ -130,17 +130,7 @@ tcp_sack_output(struct tcpcb *tp) > return (NULL); > p = tp->snd_holes; > while (p) { > -#ifndef TCP_FACK > if (p->dups >= tcprexmtthresh && SEQ_LT(p->rxmit, p->end)) { > -#else > - /* In FACK, if p->dups is less than tcprexmtthresh, but > - * snd_fack advances more than tcprextmtthresh * tp->t_maxseg, > - * tcp_input() will try fast retransmit. This forces output. > - */ > - if ((p->dups >= tcprexmtthresh || > - tp->t_dupacks == tcprexmtthresh) && > - SEQ_LT(p->rxmit, p->end)) { > -#endif /* TCP_FACK */ > if (SEQ_LT(p->rxmit, tp->snd_una)) {/* old SACK hole */ > p = p->next; > continue; > @@ -258,15 +248,6 @@ again: > if (tp->sack_enable && SEQ_LT(tp->snd_nxt, tp->snd_max)) > tcp_sack_adjust(tp); > off = tp->snd_nxt - tp->snd_una; > -#ifdef TCP_FACK > - /* Normally, sendable data is limited by off < tp->snd_cwnd. > - * But in FACK, sendable data is limited by snd_awnd < snd_cwnd, > - * regardless of offset. > - */ > - if (tp->sack_enable && (tp->t_dupacks > tcprexmtthresh)) > - win = tp->snd_wnd; > - else > -#endif > win = ulmin(tp->snd_wnd, tp->snd_cwnd); > > flags = tcp_outflags[tp->t_state]; > @@ -285,11 +266,8 @@ again: > sack_rxmit = 1; > /* Coalesce holes into a single retransmission */ > len = min(tp->t_maxseg, p->end - p->rxmit); > -#ifndef TCP_FACK > - /* in FACK, hold snd_cwnd constant during recovery */ > if (SEQ_LT(tp->snd_una, tp->snd_last)) > tp->snd_cwnd -= tp->t_maxseg; > -#endif > } > } > > @@ -329,17 +307,6 @@ again: > > if (!sack_rxmit) { > len = ulmin(so->so_snd.sb_cc, win) - off; > - > -#ifdef TCP_FACK > - /* > - * If we're in fast recovery (SEQ_GT(tp->snd_last, > tp->snd_una)), > - * and amount of outstanding data (snd_awnd) is >= snd_cwnd, > then > - * do not send data (like zero window conditions) > - */ > - if (tp->sack_enable && SEQ_GT(tp->snd_last, tp->snd_una) && > - len && (tp->snd_awnd >= tp->snd_cwnd)) > - len = 0; > -#endif /* TCP_FACK */ > } > > if (len < 0) { > @@ -794,9 +761,6 @@ send: > sendalot = 0; > th->th_seq = htonl(p->rxmit); > p->rxmit += len; > -#ifdef TCP_FACK > - tp->retran_data += len; > -#endif > tcpstat_pkt(tcps_sack_rexmits, tcps_sack_rexmit_bytes, len); > } > > @@ -1096,12 +1060,6 @@ send: > #endif /* INET6 */ > } > > -#ifdef TCP_FACK > - /* Update snd_awnd to reflect the new data that was sent. */ > - tp->snd_awnd = tcp_seq_subtract(tp->snd_max, tp->snd_fack) + > - tp->retran_data; > -#endif > - > if (error) { > out: > if (error == ENOBUFS) { > diff --git sys/netinet/tcp_timer.c sys/netinet/tcp_timer.c > index 9d2e75942cd..3452121da6d 100644 > --- sys/netinet/tcp_timer.c > +++ sys/netinet/tcp_timer.c > @@ -172,11 +172,6 @@ tcp_timer_freesack(struct tcpcb *tp) > pool_put(&sackhl_pool, p); > } > tp->snd_holes = 0; > -#ifdef TCP_FACK > - tp->snd_fack = tp->snd_una; > - tp->retran_data = 0; > - tp->snd_awnd = 0; > -#endif > } > > void > diff --git sys/netinet/tcp_usrreq.c sys/netinet/tcp_usrreq.c > index d7ee1ecc517..549149a0531 100644 > --- sys/netinet/tcp_usrreq.c > +++ sys/netinet/tcp_usrreq.c > @@ -269,11 +269,6 @@ tcp_usrreq(struct socket *so, int req, struct mbuf *m, > struct mbuf *nam, > tcp_set_iss_tsm(tp); > tcp_sendseqinit(tp); > tp->snd_last = tp->snd_una; > -#ifdef TCP_FACK > - tp->snd_fack = tp->snd_una; > - tp->retran_data = 0; > - tp->snd_awnd = 0; > -#endif > error = tcp_output(tp); > break; > > diff --git sys/netinet/tcp_var.h sys/netinet/tcp_var.h > index 4c1b37b2b68..4ca5fc0afe4 100644 > --- sys/netinet/tcp_var.h > +++ sys/netinet/tcp_var.h > @@ -119,12 +119,6 @@ struct tcpcb { > int sack_enable; /* enable SACK for this connection */ > int snd_numholes; /* number of holes seen by sender */ > struct sackhole *snd_holes; /* linked list of holes (sorted) */ > -#if 1 /*defined(TCP_FACK)*/ > - tcp_seq snd_fack; /* for FACK congestion control */ > - u_long snd_awnd; /* snd_nxt - snd_fack + */ > - /* retransmitted data */ > - int retran_data; /* amount of outstanding retx. data */ > -#endif /* TCP_FACK */ > tcp_seq snd_last; /* for use in fast recovery */ > /* receive sequence variables */ > u_long rcv_wnd; /* receive window */ > diff --git usr.bin/netstat/inet.c usr.bin/netstat/inet.c > index 468d514a22e..4cb82ea7379 100644 > --- usr.bin/netstat/inet.c > +++ usr.bin/netstat/inet.c > @@ -1496,9 +1496,6 @@ tcpcb_dump(u_long off) > p("%lu", snd_wnd, "\n "); > p("%d", sack_enable, ", "); > p("%d", snd_numholes, ", "); > - p("%u", snd_fack, ", "); > - p("%lu",snd_awnd, "\n "); > - p("%u", retran_data, ", "); > p("%u", snd_last, "\n "); > p("%u", irs, "\n "); > p("%u", rcv_nxt, ", "); >