Author: lstewart
Date: Thu Aug 25 13:33:32 2016
New Revision: 304803
URL: https://svnweb.freebsd.org/changeset/base/304803

Log:
  Pass the number of segments coalesced by LRO up the stack by repurposing the
  tso_segsz pkthdr field during RX processing, and use the information in TCP 
for
  more correct accounting and as a congestion control input. This is only a 
start,
  and an audit of other uses for the data is left as future work.
  
  Reviewed by:  gallatin, rrs
  Sponsored by: Netflix, Inc.
  Differential Revision:        https://reviews.freebsd.org/D7564

Modified:
  head/sys/netinet/cc/cc.h
  head/sys/netinet/cc/cc_newreno.c
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_lro.c
  head/sys/netinet/tcp_var.h
  head/sys/sys/mbuf.h

Modified: head/sys/netinet/cc/cc.h
==============================================================================
--- head/sys/netinet/cc/cc.h    Thu Aug 25 13:24:11 2016        (r304802)
+++ head/sys/netinet/cc/cc.h    Thu Aug 25 13:33:32 2016        (r304803)
@@ -86,6 +86,7 @@ struct cc_var {
                struct tcpcb            *tcp;
                struct sctp_nets        *sctp;
        } ccvc;
+       uint16_t        nsegs; /* # segments coalesced into current chain. */
 };
 
 /* cc_var flags. */

Modified: head/sys/netinet/cc/cc_newreno.c
==============================================================================
--- head/sys/netinet/cc/cc_newreno.c    Thu Aug 25 13:24:11 2016        
(r304802)
+++ head/sys/netinet/cc/cc_newreno.c    Thu Aug 25 13:33:32 2016        
(r304803)
@@ -137,7 +137,8 @@ newreno_ack_received(struct cc_var *ccv,
                         */
                        if (CCV(ccv, snd_nxt) == CCV(ccv, snd_max))
                                incr = min(ccv->bytes_this_ack,
-                                   V_tcp_abc_l_var * CCV(ccv, t_maxseg));
+                                   ccv->nsegs * V_tcp_abc_l_var *
+                                   CCV(ccv, t_maxseg));
                        else
                                incr = min(ccv->bytes_this_ack, CCV(ccv, 
t_maxseg));
                }

Modified: head/sys/netinet/tcp_input.c
==============================================================================
--- head/sys/netinet/tcp_input.c        Thu Aug 25 13:24:11 2016        
(r304802)
+++ head/sys/netinet/tcp_input.c        Thu Aug 25 13:33:32 2016        
(r304803)
@@ -300,10 +300,12 @@ hhook_run_tcp_est_in(struct tcpcb *tp, s
  * CC wrapper hook functions
  */
 void
-cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t type)
+cc_ack_received(struct tcpcb *tp, struct tcphdr *th, uint16_t nsegs,
+    uint16_t type)
 {
        INP_WLOCK_ASSERT(tp->t_inpcb);
 
+       tp->ccv->nsegs = nsegs;
        tp->ccv->bytes_this_ack = BYTES_THIS_ACK(tp, th);
        if (tp->snd_cwnd <= tp->snd_wnd)
                tp->ccv->flags |= CCF_CWND_LIMITED;
@@ -313,7 +315,7 @@ cc_ack_received(struct tcpcb *tp, struct
        if (type == CC_ACK) {
                if (tp->snd_cwnd > tp->snd_ssthresh) {
                        tp->t_bytes_acked += min(tp->ccv->bytes_this_ack,
-                            V_tcp_abc_l_var * tcp_maxseg(tp));
+                            nsegs * V_tcp_abc_l_var * tcp_maxseg(tp));
                        if (tp->t_bytes_acked >= tp->snd_cwnd) {
                                tp->t_bytes_acked -= tp->snd_cwnd;
                                tp->ccv->flags |= CCF_ABC_SENTAWND;
@@ -1502,6 +1504,7 @@ tcp_do_segment(struct mbuf *m, struct tc
        int thflags, acked, ourfinisacked, needoutput = 0, sack_changed;
        int rstreason, todrop, win;
        u_long tiwin;
+       uint16_t nsegs;
        char *s;
        struct in_conninfo *inc;
        struct mbuf *mfree;
@@ -1521,6 +1524,7 @@ tcp_do_segment(struct mbuf *m, struct tc
        inc = &tp->t_inpcb->inp_inc;
        tp->sackhint.last_sack_ack = 0;
        sack_changed = 0;
+       nsegs = max(1, m->m_pkthdr.lro_nsegs);
 
        /*
         * If this is either a state-changing packet or current state isn't
@@ -1759,7 +1763,7 @@ tcp_do_segment(struct mbuf *m, struct tc
                                /* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */
                                hhook_run_tcp_est_in(tp, th, &to);
 
-                               TCPSTAT_INC(tcps_rcvackpack);
+                               TCPSTAT_ADD(tcps_rcvackpack, nsegs);
                                TCPSTAT_ADD(tcps_rcvackbyte, acked);
                                sbdrop(&so->so_snd, acked);
                                if (SEQ_GT(tp->snd_una, tp->snd_recover) &&
@@ -1772,7 +1776,7 @@ tcp_do_segment(struct mbuf *m, struct tc
                                 * typically means increasing the congestion
                                 * window.
                                 */
-                               cc_ack_received(tp, th, CC_ACK);
+                               cc_ack_received(tp, th, nsegs, CC_ACK);
 
                                tp->snd_una = th->th_ack;
                                /*
@@ -1838,7 +1842,7 @@ tcp_do_segment(struct mbuf *m, struct tc
                         * rcv_nxt.
                         */
                        tp->rcv_up = tp->rcv_nxt;
-                       TCPSTAT_INC(tcps_rcvpack);
+                       TCPSTAT_ADD(tcps_rcvpack, nsegs);
                        TCPSTAT_ADD(tcps_rcvbyte, tlen);
 #ifdef TCPDEBUG
                        if (so->so_options & SO_DEBUG)
@@ -2570,7 +2574,8 @@ tcp_do_segment(struct mbuf *m, struct tc
                                        tp->t_dupacks = 0;
                                else if (++tp->t_dupacks > tcprexmtthresh ||
                                     IN_FASTRECOVERY(tp->t_flags)) {
-                                       cc_ack_received(tp, th, CC_DUPACK);
+                                       cc_ack_received(tp, th, nsegs,
+                                           CC_DUPACK);
                                        if ((tp->t_flags & TF_SACK_PERMIT) &&
                                            IN_FASTRECOVERY(tp->t_flags)) {
                                                int awnd;
@@ -2620,7 +2625,8 @@ tcp_do_segment(struct mbuf *m, struct tc
                                        }
                                        /* Congestion signal before ack. */
                                        cc_cong_signal(tp, th, CC_NDUPACK);
-                                       cc_ack_received(tp, th, CC_DUPACK);
+                                       cc_ack_received(tp, th, nsegs,
+                                           CC_DUPACK);
                                        tcp_timer_activate(tp, TT_REXMT, 0);
                                        tp->t_rtttime = 0;
                                        if (tp->t_flags & TF_SACK_PERMIT) {
@@ -2654,7 +2660,8 @@ tcp_do_segment(struct mbuf *m, struct tc
                                         * segment. Restore the original
                                         * snd_cwnd after packet transmission.
                                         */
-                                       cc_ack_received(tp, th, CC_DUPACK);
+                                       cc_ack_received(tp, th, nsegs,
+                                           CC_DUPACK);
                                        u_long oldcwnd = tp->snd_cwnd;
                                        tcp_seq oldsndmax = tp->snd_max;
                                        u_int sent;
@@ -2756,7 +2763,7 @@ process_ACK:
                KASSERT(acked >= 0, ("%s: acked unexepectedly negative "
                    "(tp->snd_una=%u, th->th_ack=%u, tp=%p, m=%p)", __func__,
                    tp->snd_una, th->th_ack, tp, m));
-               TCPSTAT_INC(tcps_rcvackpack);
+               TCPSTAT_ADD(tcps_rcvackpack, nsegs);
                TCPSTAT_ADD(tcps_rcvackbyte, acked);
 
                /*
@@ -2821,7 +2828,7 @@ process_ACK:
                 * control related information. This typically means increasing
                 * the congestion window.
                 */
-               cc_ack_received(tp, th, CC_ACK);
+               cc_ack_received(tp, th, nsegs, CC_ACK);
 
                SOCKBUF_LOCK(&so->so_snd);
                if (acked > sbavail(&so->so_snd)) {

Modified: head/sys/netinet/tcp_lro.c
==============================================================================
--- head/sys/netinet/tcp_lro.c  Thu Aug 25 13:24:11 2016        (r304802)
+++ head/sys/netinet/tcp_lro.c  Thu Aug 25 13:33:32 2016        (r304803)
@@ -392,6 +392,7 @@ tcp_lro_flush(struct lro_ctrl *lc, struc
 #endif
        }
 
+       le->m_head->m_pkthdr.lro_nsegs = le->append_cnt + 1;
        (*lc->ifp->if_input)(lc->ifp, le->m_head);
        lc->lro_queued += le->append_cnt + 1;
        lc->lro_flushed++;

Modified: head/sys/netinet/tcp_var.h
==============================================================================
--- head/sys/netinet/tcp_var.h  Thu Aug 25 13:24:11 2016        (r304802)
+++ head/sys/netinet/tcp_var.h  Thu Aug 25 13:33:32 2016        (r304803)
@@ -784,7 +784,7 @@ void        tcp_pulloutofband(struct socket *,
 void   tcp_xmit_timer(struct tcpcb *, int);
 void   tcp_newreno_partial_ack(struct tcpcb *, struct tcphdr *);
 void   cc_ack_received(struct tcpcb *tp, struct tcphdr *th,
-                           uint16_t type);
+                           uint16_t nsegs, uint16_t type);
 void   cc_conn_init(struct tcpcb *tp);
 void   cc_post_recovery(struct tcpcb *tp, struct tcphdr *th);
 void   cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type);

Modified: head/sys/sys/mbuf.h
==============================================================================
--- head/sys/sys/mbuf.h Thu Aug 25 13:24:11 2016        (r304802)
+++ head/sys/sys/mbuf.h Thu Aug 25 13:33:32 2016        (r304803)
@@ -174,6 +174,7 @@ struct pkthdr {
 #define        PH_vt           PH_per
 #define        vt_nrecs        sixteen[0]
 #define        tso_segsz       PH_per.sixteen[1]
+#define        lro_nsegs       tso_segsz
 #define        csum_phsum      PH_per.sixteen[2]
 #define        csum_data       PH_per.thirtytwo[1]
 
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to