Author: rscheff
Date: Sun Nov  8 18:47:05 2020
New Revision: 367492
URL: https://svnweb.freebsd.org/changeset/base/367492

Log:
  Prevent premature SACK block transmission during loss recovery
  
  Under specific conditions, a window update can be sent with
  outdated SACK information. Some clients react to this by
  subsequently delaying loss recovery, making TCP perform very
  poorly.
  
  Reported by:  chengc_netapp.com
  Reviewed by:  rrs, jtl
  MFC after:    2 weeks
  Sponsored by: NetApp, Inc.
  Differential Revision:        https://reviews.freebsd.org/D24237

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_reass.c
  head/sys/netinet/tcp_stacks/bbr.c
  head/sys/netinet/tcp_stacks/rack.c
  head/sys/netinet/tcp_stacks/rack_bbr_common.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_input.c
==============================================================================
--- head/sys/netinet/tcp_input.c        Sun Nov  8 18:27:49 2020        
(r367491)
+++ head/sys/netinet/tcp_input.c        Sun Nov  8 18:47:05 2020        
(r367492)
@@ -1462,6 +1462,29 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, stru
 }
 
 void
+tcp_handle_wakeup(struct tcpcb *tp, struct socket *so)
+{
+       /*
+        * Since tp might be gone if the session entered
+        * the TIME_WAIT state before coming here, we need
+        * to check if the socket is still connected.
+        */
+       if ((so->so_state & SS_ISCONNECTED) == 0)
+               return;
+       INP_LOCK_ASSERT(tp->t_inpcb);
+       if (tp->t_flags & TF_WAKESOR) {
+               tp->t_flags &= ~TF_WAKESOR;
+               SOCKBUF_UNLOCK_ASSERT(&so->so_rcv);
+               sorwakeup(so);
+       }
+       if (tp->t_flags & TF_WAKESOW) {
+               tp->t_flags &= ~TF_WAKESOW;
+               SOCKBUF_UNLOCK_ASSERT(&so->so_snd);
+               sowwakeup(so);
+       }
+}
+
+void
 tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so,
     struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos)
 {
@@ -1811,7 +1834,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
                                else if (!tcp_timer_active(tp, TT_PERSIST))
                                        tcp_timer_activate(tp, TT_REXMT,
                                                      tp->t_rxtcur);
-                               sowwakeup(so);
+                               tp->t_flags |= TF_WAKESOW;
                                if (sbavail(&so->so_snd))
                                        (void) tp->t_fb->tfb_tcp_output(tp);
                                goto check_delack;
@@ -1876,8 +1899,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
                                m_adj(m, drop_hdrlen);  /* delayed header drop 
*/
                                sbappendstream_locked(&so->so_rcv, m, 0);
                        }
-                       /* NB: sorwakeup_locked() does an implicit unlock. */
-                       sorwakeup_locked(so);
+                       SOCKBUF_UNLOCK(&so->so_rcv);
+                       tp->t_flags |= TF_WAKESOR;
                        if (DELAY_ACK(tp, tlen)) {
                                tp->t_flags |= TF_DELACK;
                        } else {
@@ -2811,8 +2834,8 @@ process_ACK:
                                tp->snd_wnd = 0;
                        ourfinisacked = 0;
                }
-               /* NB: sowwakeup_locked() does an implicit unlock. */
-               sowwakeup_locked(so);
+               SOCKBUF_UNLOCK(&so->so_snd);
+               tp->t_flags |= TF_WAKESOW;
                m_freem(mfree);
                /* Detect una wraparound. */
                if (!IN_RECOVERY(tp->t_flags) &&
@@ -3033,8 +3056,8 @@ dodata:                                                   
/* XXX */
                                m_freem(m);
                        else
                                sbappendstream_locked(&so->so_rcv, m, 0);
-                       /* NB: sorwakeup_locked() does an implicit unlock. */
-                       sorwakeup_locked(so);
+                       SOCKBUF_UNLOCK(&so->so_rcv);
+                       tp->t_flags |= TF_WAKESOR;
                } else {
                        /*
                         * XXX: Due to the header drop above "th" is
@@ -3101,6 +3124,8 @@ dodata:                                                   
/* XXX */
        if (thflags & TH_FIN) {
                if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
                        socantrcvmore(so);
+                       /* The socket upcall is handled by socantrcvmore. */
+                       tp->t_flags &= ~TF_WAKESOR;
                        /*
                         * If connection is half-synchronized
                         * (ie NEEDSYN flag on) then delay ACK,
@@ -3164,6 +3189,7 @@ check_delack:
                tp->t_flags &= ~TF_DELACK;
                tcp_timer_activate(tp, TT_DELACK, tcp_delacktime);
        }
+       tcp_handle_wakeup(tp, so);
        INP_WUNLOCK(tp->t_inpcb);
        return;
 
@@ -3197,6 +3223,7 @@ dropafterack:
        TCP_PROBE3(debug__input, tp, th, m);
        tp->t_flags |= TF_ACKNOW;
        (void) tp->t_fb->tfb_tcp_output(tp);
+       tcp_handle_wakeup(tp, so);
        INP_WUNLOCK(tp->t_inpcb);
        m_freem(m);
        return;
@@ -3204,6 +3231,7 @@ dropafterack:
 dropwithreset:
        if (tp != NULL) {
                tcp_dropwithreset(m, th, tp, tlen, rstreason);
+               tcp_handle_wakeup(tp, so);
                INP_WUNLOCK(tp->t_inpcb);
        } else
                tcp_dropwithreset(m, th, NULL, tlen, rstreason);
@@ -3219,8 +3247,10 @@ drop:
                          &tcp_savetcp, 0);
 #endif
        TCP_PROBE3(debug__input, tp, th, m);
-       if (tp != NULL)
+       if (tp != NULL) {
+               tcp_handle_wakeup(tp, so);
                INP_WUNLOCK(tp->t_inpcb);
+       }
        m_freem(m);
 }
 

Modified: head/sys/netinet/tcp_reass.c
==============================================================================
--- head/sys/netinet/tcp_reass.c        Sun Nov  8 18:27:49 2020        
(r367491)
+++ head/sys/netinet/tcp_reass.c        Sun Nov  8 18:47:05 2020        
(r367492)
@@ -959,7 +959,8 @@ new_entry:
                } else {
                        sbappendstream_locked(&so->so_rcv, m, 0);
                }
-               sorwakeup_locked(so);
+               SOCKBUF_UNLOCK(&so->so_rcv);
+               tp->t_flags |= TF_WAKESOR;
                return (flags);
        }
        if (tcp_new_limits) {
@@ -1107,6 +1108,7 @@ present:
 #ifdef TCP_REASS_LOGGING
        tcp_reass_log_dump(tp);
 #endif
-       sorwakeup_locked(so);
+       SOCKBUF_UNLOCK(&so->so_rcv);
+       tp->t_flags |= TF_WAKESOR;
        return (flags);
 }

Modified: head/sys/netinet/tcp_stacks/bbr.c
==============================================================================
--- head/sys/netinet/tcp_stacks/bbr.c   Sun Nov  8 18:27:49 2020        
(r367491)
+++ head/sys/netinet/tcp_stacks/bbr.c   Sun Nov  8 18:47:05 2020        
(r367492)
@@ -7876,8 +7876,8 @@ bbr_process_ack(struct mbuf *m, struct tcphdr *th, str
        acked_amount = min(acked, (int)sbavail(&so->so_snd));
        tp->snd_wnd -= acked_amount;
        mfree = sbcut_locked(&so->so_snd, acked_amount);
-       /* NB: sowwakeup_locked() does an implicit unlock. */
-       sowwakeup_locked(so);
+       SOCKBUF_UNLOCK(&so->so_snd);
+       tp->t_flags |= TF_WAKESOW;
        m_freem(mfree);
        if (SEQ_GT(th->th_ack, tp->snd_una)) {
                bbr_collapse_rtt(tp, bbr, TCP_REXMTVAL(tp));
@@ -8353,8 +8353,8 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, st
                                appended =
 #endif
                                        sbappendstream_locked(&so->so_rcv, m, 
0);
-                       /* NB: sorwakeup_locked() does an implicit unlock. */
-                       sorwakeup_locked(so);
+                       SOCKBUF_UNLOCK(&so->so_rcv);
+                       tp->t_flags |= TF_WAKESOR;
 #ifdef NETFLIX_SB_LIMITS
                        if (so->so_rcv.sb_shlim && appended != mcnt)
                                counter_fo_release(so->so_rcv.sb_shlim,
@@ -8414,6 +8414,8 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, st
        if (thflags & TH_FIN) {
                if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
                        socantrcvmore(so);
+                       /* The socket upcall is handled by socantrcvmore. */
+                       tp->t_flags &= ~TF_WAKESOR;
                        /*
                         * If connection is half-synchronized (ie NEEDSYN
                         * flag on) then delay ACK, so it may be piggybacked
@@ -8604,8 +8606,8 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, 
                        sbappendstream_locked(&so->so_rcv, m, 0);
                ctf_calc_rwin(so, tp);
        }
-       /* NB: sorwakeup_locked() does an implicit unlock. */
-       sorwakeup_locked(so);
+       SOCKBUF_UNLOCK(&so->so_rcv);
+       tp->t_flags |= TF_WAKESOR;
 #ifdef NETFLIX_SB_LIMITS
        if (so->so_rcv.sb_shlim && mcnt != appended)
                counter_fo_release(so->so_rcv.sb_shlim, mcnt - appended);
@@ -8796,7 +8798,7 @@ bbr_fastack(struct mbuf *m, struct tcphdr *th, struct 
                    &tcp_savetcp, 0);
 #endif
        /* Wake up the socket if we have room to write more */
-       sowwakeup(so);
+       tp->t_flags |= TF_WAKESOW;
        if (tp->snd_una == tp->snd_max) {
                /* Nothing left outstanding */
                bbr_log_progress_event(bbr, tp, ticks, PROGRESS_CLEAR, 
__LINE__);
@@ -11740,8 +11742,10 @@ bbr_do_segment(struct mbuf *m, struct tcphdr *th, stru
        }
        retval = bbr_do_segment_nounlock(m, th, so, tp,
                                         drop_hdrlen, tlen, iptos, 0, &tv);
-       if (retval == 0)
+       if (retval == 0) {
+               tcp_handle_wakeup(tp, so);
                INP_WUNLOCK(tp->t_inpcb);
+       }
 }
 
 /*

Modified: head/sys/netinet/tcp_stacks/rack.c
==============================================================================
--- head/sys/netinet/tcp_stacks/rack.c  Sun Nov  8 18:27:49 2020        
(r367491)
+++ head/sys/netinet/tcp_stacks/rack.c  Sun Nov  8 18:47:05 2020        
(r367492)
@@ -8344,8 +8344,8 @@ rack_process_ack(struct mbuf *m, struct tcphdr *th, st
                 */
                ourfinisacked = 1;
        }
-       /* NB: sowwakeup_locked() does an implicit unlock. */
-       sowwakeup_locked(so);
+       SOCKBUF_UNLOCK(&so->so_snd);
+       tp->t_flags |= TF_WAKESOW;
        m_freem(mfree);
        if (rack->r_ctl.rc_early_recovery == 0) {
                if (IN_RECOVERY(tp->t_flags)) {
@@ -8665,8 +8665,8 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, s
                                appended =
 #endif
                                        sbappendstream_locked(&so->so_rcv, m, 
0);
-                       /* NB: sorwakeup_locked() does an implicit unlock. */
-                       sorwakeup_locked(so);
+                       SOCKBUF_UNLOCK(&so->so_rcv);
+                       tp->t_flags |= TF_WAKESOR;
 #ifdef NETFLIX_SB_LIMITS
                        if (so->so_rcv.sb_shlim && appended != mcnt)
                                counter_fo_release(so->so_rcv.sb_shlim,
@@ -8731,6 +8731,8 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, s
        if (thflags & TH_FIN) {
                if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
                        socantrcvmore(so);
+                       /* The socket upcall is handled by socantrcvmore. */
+                       tp->t_flags &= ~TF_WAKESOR;
                        /*
                         * If connection is half-synchronized (ie NEEDSYN
                         * flag on) then delay ACK, so it may be piggybacked
@@ -8922,8 +8924,8 @@ rack_do_fastnewdata(struct mbuf *m, struct tcphdr *th,
                        sbappendstream_locked(&so->so_rcv, m, 0);
                ctf_calc_rwin(so, tp);
        }
-       /* NB: sorwakeup_locked() does an implicit unlock. */
-       sorwakeup_locked(so);
+       SOCKBUF_UNLOCK(&so->so_rcv);
+       tp->t_flags |= TF_WAKESOR;
 #ifdef NETFLIX_SB_LIMITS
        if (so->so_rcv.sb_shlim && mcnt != appended)
                counter_fo_release(so->so_rcv.sb_shlim, mcnt - appended);
@@ -9140,7 +9142,7 @@ rack_fastack(struct mbuf *m, struct tcphdr *th, struct
                rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, __LINE__);
        }
        /* Wake up the socket if we have room to write more */
-       sowwakeup(so);
+       tp->t_flags |= TF_WAKESOW;
        if (sbavail(&so->so_snd)) {
                rack->r_wanted_output = 1;
        }
@@ -11188,8 +11190,10 @@ rack_do_segment(struct mbuf *m, struct tcphdr *th, str
                tcp_get_usecs(&tv);
        }
        if(rack_do_segment_nounlock(m, th, so, tp,
-                                   drop_hdrlen, tlen, iptos, 0, &tv) == 0)
+                                   drop_hdrlen, tlen, iptos, 0, &tv) == 0) {
+               tcp_handle_wakeup(tp, so);
                INP_WUNLOCK(tp->t_inpcb);
+       }
 }
 
 struct rack_sendmap *

Modified: head/sys/netinet/tcp_stacks/rack_bbr_common.c
==============================================================================
--- head/sys/netinet/tcp_stacks/rack_bbr_common.c       Sun Nov  8 18:27:49 
2020        (r367491)
+++ head/sys/netinet/tcp_stacks/rack_bbr_common.c       Sun Nov  8 18:47:05 
2020        (r367492)
@@ -458,6 +458,7 @@ ctf_do_queued_segments(struct socket *so, struct tcpcb
                        /* We lost the tcpcb (maybe a RST came in)? */
                        return(1);
                }
+               tcp_handle_wakeup(tp, so);
        }
        return (0);
 }

Modified: head/sys/netinet/tcp_var.h
==============================================================================
--- head/sys/netinet/tcp_var.h  Sun Nov  8 18:27:49 2020        (r367491)
+++ head/sys/netinet/tcp_var.h  Sun Nov  8 18:47:05 2020        (r367492)
@@ -381,7 +381,7 @@ TAILQ_HEAD(tcp_funchead, tcp_function);
 #define        TF_NEEDFIN      0x00000800      /* send FIN (implicit state) */
 #define        TF_NOPUSH       0x00001000      /* don't push */
 #define        TF_PREVVALID    0x00002000      /* saved values for bad rxmit 
valid */
-#define        TF_UNUSED1      0x00004000      /* unused */
+#define        TF_WAKESOR      0x00004000      /* wake up receive socket */
 #define        TF_GPUTINPROG   0x00008000      /* Goodput measurement in 
progress */
 #define        TF_MORETOCOME   0x00010000      /* More data to be appended to 
sock */
 #define        TF_LQ_OVERFLOW  0x00020000      /* listen queue overflow */
@@ -393,9 +393,9 @@ TAILQ_HEAD(tcp_funchead, tcp_function);
 #define        TF_FORCEDATA    0x00800000      /* force out a byte */
 #define        TF_TSO          0x01000000      /* TSO enabled on this 
connection */
 #define        TF_TOE          0x02000000      /* this connection is offloaded 
*/
-#define        TF_UNUSED3      0x04000000      /* unused */
-#define        TF_UNUSED4      0x08000000      /* unused */
-#define        TF_UNUSED5      0x10000000      /* unused */
+#define        TF_WAKESOW      0x04000000      /* wake up send socket */
+#define        TF_UNUSED1      0x08000000      /* unused */
+#define        TF_UNUSED2      0x10000000      /* unused */
 #define        TF_CONGRECOVERY 0x20000000      /* congestion recovery mode */
 #define        TF_WASCRECOVERY 0x40000000      /* was in congestion recovery */
 #define        TF_FASTOPEN     0x80000000      /* TCP Fast Open indication */
@@ -931,7 +931,8 @@ char        *tcp_log_addrs(struct in_conninfo *, struct 
tcphd
            const void *);
 char   *tcp_log_vain(struct in_conninfo *, struct tcphdr *, void *,
            const void *);
-int     tcp_reass(struct tcpcb *, struct tcphdr *, tcp_seq *, int *, struct 
mbuf *);
+int     tcp_reass(struct tcpcb *, struct tcphdr *, tcp_seq *, int *,
+           struct mbuf *);
 void    tcp_reass_global_init(void);
 void    tcp_reass_flush(struct tcpcb *);
 void    tcp_dooptions(struct tcpopt *, u_char *, int, int);
@@ -955,6 +956,7 @@ void        hhook_run_tcp_est_in(struct tcpcb *tp,
 int     tcp_input(struct mbuf **, int *, int);
 int     tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *,
            struct tcpcb *, int);
+void    tcp_handle_wakeup(struct tcpcb *, struct socket *);
 void    tcp_do_segment(struct mbuf *, struct tcphdr *,
                        struct socket *, struct tcpcb *, int, int, uint8_t);
 
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to