Author: tuexen
Date: Mon Sep  2 19:04:02 2019
New Revision: 351725
URL: https://svnweb.freebsd.org/changeset/base/351725

Log:
  This patch improves the DSACK handling to conform with RFC 2883.
  The lowest SACK block is used when multiple Blocks would be elegible as
  DSACK blocks ACK blocks get reordered - while maintaining the ordering of
  SACK blocks not relevant in the DSACK context is maintained.
  
  Reviewed by:          rrs@, tuexen@
  Obtained from:                Richard Scheffenegger
  MFC after:            1 week
  Differential Revision:        https://reviews.freebsd.org/D21038

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_sack.c
  head/sys/netinet/tcp_stacks/rack.c
  head/sys/netinet/tcp_stacks/rack_bbr_common.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_input.c
==============================================================================
--- head/sys/netinet/tcp_input.c        Mon Sep  2 18:32:08 2019        
(r351724)
+++ head/sys/netinet/tcp_input.c        Mon Sep  2 19:04:02 2019        
(r351725)
@@ -1486,7 +1486,6 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, stru
        } else {
                tp->rfbuf_cnt += tlen;  /* add up */
        }
-
        return (newsize);
 }
 

Modified: head/sys/netinet/tcp_sack.c
==============================================================================
--- head/sys/netinet/tcp_sack.c Mon Sep  2 18:32:08 2019        (r351724)
+++ head/sys/netinet/tcp_sack.c Mon Sep  2 19:04:02 2019        (r351725)
@@ -149,7 +149,109 @@ SYSCTL_INT(_net_inet_tcp_sack, OID_AUTO, globalholes, 
     &VNET_NAME(tcp_sack_globalholes), 0,
     "Global number of TCP SACK holes currently allocated");
 
+
 /*
+ * This function will find overlaps with the currently stored sackblocks
+ * and add any overlap as a dsack block upfront
+ */
+void
+tcp_update_dsack_list(struct tcpcb *tp, tcp_seq rcv_start, tcp_seq rcv_end)
+{
+       struct sackblk head_blk,mid_blk,saved_blks[MAX_SACK_BLKS];
+       int i, j, n, identical;
+       tcp_seq start, end;
+
+       INP_WLOCK_ASSERT(tp->t_inpcb);
+
+       KASSERT(SEQ_LT(rcv_start, rcv_end), ("rcv_start < rcv_end"));
+
+       if (tp->t_inpcb->inp_socket->so_options & SO_DEBUG) {
+               log(LOG_DEBUG, "\nDSACK update: %d..%d, rcv_nxt: %u\n",
+               rcv_start, rcv_end, tp->rcv_nxt);
+       }
+
+       if (SEQ_LT(rcv_end, tp->rcv_nxt) ||
+           ((rcv_end == tp->rcv_nxt) &&
+            (tp->rcv_numsacks > 0 ) &&
+            (tp->sackblks[0].end == tp->rcv_nxt))) {
+               saved_blks[0].start = rcv_start;
+               saved_blks[0].end = rcv_end;
+       } else {
+               saved_blks[0].start = saved_blks[0].end = 0;
+       }
+
+       head_blk.start = head_blk.end = 0;
+       mid_blk.start = rcv_start;
+       mid_blk.end = rcv_end;
+       identical = 0;
+
+       for (i = 0; i < tp->rcv_numsacks; i++) {
+               start = tp->sackblks[i].start;
+               end = tp->sackblks[i].end;
+               if (SEQ_LT(rcv_end, start)) {
+                       /* pkt left to sack blk */
+                       continue;
+               }
+               if (SEQ_GT(rcv_start, end)) {
+                       /* pkt right to sack blk */
+                       continue;
+               }
+               if (SEQ_GT(tp->rcv_nxt, end)) {
+                       if ((SEQ_MAX(rcv_start, start) != SEQ_MIN(rcv_end, 
end)) &&
+                           (SEQ_GT(head_blk.start, SEQ_MAX(rcv_start, start)) 
||
+                           (head_blk.start == head_blk.end))) {
+                               head_blk.start = SEQ_MAX(rcv_start, start);
+                               head_blk.end = SEQ_MIN(rcv_end, end);
+                       }
+                       continue;
+               }
+               if (((head_blk.start == head_blk.end) ||
+                    SEQ_LT(start, head_blk.start)) &&
+                    (SEQ_GT(end, rcv_start) &&
+                     SEQ_LEQ(start, rcv_end))) {
+                       head_blk.start = start;
+                       head_blk.end = end;
+               }
+               mid_blk.start = SEQ_MIN(mid_blk.start, start);
+               mid_blk.end = SEQ_MAX(mid_blk.end, end);
+               if ((mid_blk.start == start) &&
+                   (mid_blk.end == end))
+                       identical = 1;
+       }
+       if (SEQ_LT(head_blk.start, head_blk.end)) {
+               /* store overlapping range */
+               saved_blks[0].start = SEQ_MAX(rcv_start, head_blk.start);
+               saved_blks[0].end   = SEQ_MIN(rcv_end, head_blk.end);
+       }
+       n = 1;
+       /*
+        * Second, if not ACKed, store the SACK block that
+        * overlaps with the DSACK block unless it is identical
+        */
+       if ((SEQ_LT(tp->rcv_nxt, mid_blk.end) &&
+           !((mid_blk.start == saved_blks[0].start) &&
+           (mid_blk.end == saved_blks[0].end))) ||
+           identical == 1) {
+               saved_blks[n].start = mid_blk.start;
+               saved_blks[n++].end = mid_blk.end;
+       }
+       for (j = 0; (j < tp->rcv_numsacks) && (j < MAX_SACK_BLKS-1); j++) {
+               if (((SEQ_LT(tp->sackblks[j].end, mid_blk.start) ||
+                     SEQ_GT(tp->sackblks[j].start, mid_blk.end)) &&
+                   (SEQ_GT(tp->sackblks[j].start, tp->rcv_nxt))))
+               saved_blks[n++] = tp->sackblks[j];
+       }
+       j = 0;
+       for (i = 0; i < n; i++) {
+               /* we can end up with a stale inital entry */
+               if (SEQ_LT(saved_blks[i].start, saved_blks[i].end)) {
+                       tp->sackblks[j++] = saved_blks[i];
+               }
+       }
+       tp->rcv_numsacks = j;
+}
+
+/*
  * This function is called upon receipt of new valid data (while not in
  * header prediction mode), and it updates the ordered list of sacks.
  */
@@ -170,9 +272,16 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_sta
        /* Check arguments. */
        KASSERT(SEQ_LEQ(rcv_start, rcv_end), ("rcv_start <= rcv_end"));
 
-       /* SACK block for the received segment. */
-       head_blk.start = rcv_start;
-       head_blk.end = rcv_end;
+       if ((rcv_start == rcv_end) &&
+           (tp->rcv_numsacks >= 1) &&
+           (rcv_end == tp->sackblks[0].end)) {
+               /* retaining DSACK block below rcv_nxt (todrop) */
+               head_blk = tp->sackblks[0];
+       } else {
+               /* SACK block for the received segment. */
+               head_blk.start = rcv_start;
+               head_blk.end = rcv_end;
+       }
 
        /*
         * Merge updated SACK blocks into head_blk, and save unchanged SACK
@@ -266,6 +375,10 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_sta
                 */
                if (num_saved >= MAX_SACK_BLKS)
                        num_saved--;
+       }
+       if ((rcv_start == rcv_end) &&
+           (rcv_start == tp->sackblks[0].end)) {
+               num_head = 1;
        }
        if (num_saved > 0) {
                /*

Modified: head/sys/netinet/tcp_stacks/rack.c
==============================================================================
--- head/sys/netinet/tcp_stacks/rack.c  Mon Sep  2 18:32:08 2019        
(r351724)
+++ head/sys/netinet/tcp_stacks/rack.c  Mon Sep  2 19:04:02 2019        
(r351725)
@@ -1783,6 +1783,14 @@ rack_drop_checks(struct tcpopt *to, struct mbuf *m, st
                        TCPSTAT_INC(tcps_rcvpartduppack);
                        TCPSTAT_ADD(tcps_rcvpartdupbyte, todrop);
                }
+               if (tp->t_flags & TF_SACK_PERMIT) {
+                       /*
+                        * record the left, to-be-dropped edge of data
+                        * here, for use as dsack block further down
+                        */
+                       tcp_update_sack_list(tp, th->th_seq,
+                           th->th_seq + todrop);
+               }
                *drop_hdrlen += todrop; /* drop from the top afterwards */
                th->th_seq += todrop;
                tlen -= todrop;
@@ -4900,7 +4908,8 @@ dodata:                           /* XXX */
                    (TCPS_HAVEESTABLISHED(tp->t_state) ||
                    tfo_syn)) {
                        if (DELAY_ACK(tp, tlen) || tfo_syn) {
-                               rack_timer_cancel(tp, rack, 
rack->r_ctl.rc_rcvtime, __LINE__);
+                               rack_timer_cancel(tp, rack,
+                                   rack->r_ctl.rc_rcvtime, __LINE__);
                                tp->t_flags |= TF_DELACK;
                        } else {
                                rack->r_wanted_output++;
@@ -4934,18 +4943,29 @@ dodata:                         /* XXX */
                         * DSACK actually handled in the fastpath
                         * above.
                         */
-                       tcp_update_sack_list(tp, save_start, save_start + 
save_tlen);
+                       tcp_update_sack_list(tp, save_start,
+                           save_start + save_tlen);
                } else if ((tlen > 0) && SEQ_GT(tp->rcv_nxt, save_rnxt)) {
                        /*
                         * Cleaning sackblks by using zero length
                         * update.
                         */
-                       tcp_update_sack_list(tp, save_start, save_start);
+                       if ((tp->rcv_numsacks >= 1) &&
+                           (tp->sackblks[0].end == save_start)) {
+                               /* partial overlap, recorded at todrop above */
+                               tcp_update_sack_list(tp, tp->sackblks[0].start,
+                                   tp->sackblks[0].end);
+                       } else {
+                               tcp_update_dsack_list(tp, save_start,
+                                   save_start + save_tlen);
+                       }
                } else if ((tlen > 0) && (tlen >= save_tlen)) {
                        /* Update of sackblks. */
-                       tcp_update_sack_list(tp, save_start, save_start + 
save_tlen);
+                       tcp_update_dsack_list(tp, save_start,
+                           save_start + save_tlen);
                } else if (tlen > 0) {
-                       tcp_update_sack_list(tp, save_start, save_start+tlen);
+                       tcp_update_dsack_list(tp, save_start,
+                           save_start + tlen);
                }
        } else {
                m_freem(m);
@@ -4967,7 +4987,8 @@ dodata:                           /* XXX */
                         * now.
                         */
                        if (tp->t_flags & TF_NEEDSYN) {
-                               rack_timer_cancel(tp, rack, 
rack->r_ctl.rc_rcvtime, __LINE__);
+                               rack_timer_cancel(tp, rack,
+                                   rack->r_ctl.rc_rcvtime, __LINE__);
                                tp->t_flags |= TF_DELACK;
                        } else {
                                tp->t_flags |= TF_ACKNOW;
@@ -4984,7 +5005,8 @@ dodata:                           /* XXX */
                        tp->t_starttime = ticks;
                        /* FALLTHROUGH */
                case TCPS_ESTABLISHED:
-                       rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, 
__LINE__);
+                       rack_timer_cancel(tp, rack,
+                           rack->r_ctl.rc_rcvtime, __LINE__);
                        tcp_state_change(tp, TCPS_CLOSE_WAIT);
                        break;
 
@@ -4993,7 +5015,8 @@ dodata:                           /* XXX */
                         * acked so enter the CLOSING state.
                         */
                case TCPS_FIN_WAIT_1:
-                       rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, 
__LINE__);
+                       rack_timer_cancel(tp, rack,
+                           rack->r_ctl.rc_rcvtime, __LINE__);
                        tcp_state_change(tp, TCPS_CLOSING);
                        break;
 
@@ -5003,7 +5026,8 @@ dodata:                           /* XXX */
                         * other standard timers.
                         */
                case TCPS_FIN_WAIT_2:
-                       rack_timer_cancel(tp, rack, rack->r_ctl.rc_rcvtime, 
__LINE__);
+                       rack_timer_cancel(tp, rack,
+                           rack->r_ctl.rc_rcvtime, __LINE__);
                        INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
                        tcp_twstart(tp);
                        return (1);
@@ -5012,7 +5036,8 @@ dodata:                           /* XXX */
        /*
         * Return any desired output.
         */
-       if ((tp->t_flags & TF_ACKNOW) || (sbavail(&so->so_snd) > (tp->snd_max - 
tp->snd_una))) {
+       if ((tp->t_flags & TF_ACKNOW) ||
+           (sbavail(&so->so_snd) > (tp->snd_max - tp->snd_una))) {
                rack->r_wanted_output++;
        }
        INP_WLOCK_ASSERT(tp->t_inpcb);

Modified: head/sys/netinet/tcp_stacks/rack_bbr_common.c
==============================================================================
--- head/sys/netinet/tcp_stacks/rack_bbr_common.c       Mon Sep  2 18:32:08 
2019        (r351724)
+++ head/sys/netinet/tcp_stacks/rack_bbr_common.c       Mon Sep  2 19:04:02 
2019        (r351725)
@@ -495,7 +495,8 @@ ctf_drop_checks(struct tcpopt *to, struct mbuf *m, str
                 * DSACK - add SACK block for dropped range
                 */
                if (tp->t_flags & TF_SACK_PERMIT) {
-                       tcp_update_sack_list(tp, th->th_seq, th->th_seq + tlen);
+                       tcp_update_sack_list(tp, th->th_seq,
+                           th->th_seq + todrop);
                        /*
                         * ACK now, as the next in-sequence segment
                         * will clear the DSACK block again

Modified: head/sys/netinet/tcp_var.h
==============================================================================
--- head/sys/netinet/tcp_var.h  Mon Sep  2 18:32:08 2019        (r351724)
+++ head/sys/netinet/tcp_var.h  Mon Sep  2 19:04:02 2019        (r351725)
@@ -939,6 +939,7 @@ uint32_t tcp_new_ts_offset(struct in_conninfo *);
 tcp_seq         tcp_new_isn(struct in_conninfo *);
 
 int     tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
+void    tcp_update_dsack_list(struct tcpcb *, tcp_seq, tcp_seq);
 void    tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq 
rcv_lastend);
 void    tcp_clean_dsack_blocks(struct tcpcb *tp);
 void    tcp_clean_sackreport(struct tcpcb *tp);
_______________________________________________
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to