The branch main has been updated by rscheff:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=e2c6a6d29b163f936c6f9a8020021f711768808d

commit e2c6a6d29b163f936c6f9a8020021f711768808d
Author:     Richard Scheffenegger <rsch...@freebsd.org>
AuthorDate: 2023-10-09 09:55:14 +0000
Commit:     Richard Scheffenegger <rsch...@freebsd.org>
CommitDate: 2023-10-09 10:37:20 +0000

    tcp: include RFC6675 IsLost() in pipe calculation
    
    Add more accounting while processing SACK data, to
    keep track of when a packet is deemed lost using
    the RFC6675 guidance.
    
    Together with PRR (RFC6972) this allows a sender to
    retransmit presumed lost packets faster, and loss
    recovery to complete earlier.
    
    Reviewed By: cc, rrs, #transport
    Sponsored by: NetApp, Inc.
    Differential Revision: https://reviews.freebsd.org/D39299
---
 sys/netinet/tcp_input.c |  3 +-
 sys/netinet/tcp_sack.c  | 77 ++++++++++++++++++++++++++++++++++++++++++++-----
 sys/netinet/tcp_var.h   |  2 ++
 3 files changed, 74 insertions(+), 8 deletions(-)

diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c
index 11ec7d2d74d2..ba11503955dc 100644
--- a/sys/netinet/tcp_input.c
+++ b/sys/netinet/tcp_input.c
@@ -4048,7 +4048,8 @@ tcp_compute_pipe(struct tcpcb *tp)
        if (tp->t_fb->tfb_compute_pipe == NULL) {
                return (tp->snd_max - tp->snd_una +
                        tp->sackhint.sack_bytes_rexmit -
-                       tp->sackhint.sacked_bytes);
+                       tp->sackhint.sacked_bytes -
+                       tp->sackhint.lost_bytes);
        } else {
                return((*tp->t_fb->tfb_compute_pipe)(tp));
        }
diff --git a/sys/netinet/tcp_sack.c b/sys/netinet/tcp_sack.c
index fca4b99ee83b..9a01ff684cbb 100644
--- a/sys/netinet/tcp_sack.c
+++ b/sys/netinet/tcp_sack.c
@@ -555,6 +555,11 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, 
tcp_seq th_ack)
        int i, j, num_sack_blks, sack_changed;
        int delivered_data, left_edge_delta;
 
+       tcp_seq loss_hiack = 0;
+       int loss_thresh = 0;
+       int loss_sblks = 0;
+       int notlost_bytes = 0;
+
        INP_WLOCK_ASSERT(tptoinpcb(tp));
 
        num_sack_blks = 0;
@@ -637,6 +642,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq 
th_ack)
                 */
                tp->snd_fack = SEQ_MAX(tp->snd_una, th_ack);
                tp->sackhint.sacked_bytes = 0;  /* reset */
+               tp->sackhint.hole_bytes = 0;
        }
        /*
         * In the while-loop below, incoming SACK blocks (sack_blocks[]) and
@@ -661,13 +667,17 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, 
tcp_seq th_ack)
                 */
                if (((temp = TAILQ_LAST(&tp->snd_holes, sackhole_head)) != 
NULL) &&
                    SEQ_LEQ(tp->snd_fack,temp->end)) {
+                       tp->sackhint.hole_bytes -= temp->end - temp->start;
                        temp->start = SEQ_MAX(tp->snd_fack, 
SEQ_MAX(tp->snd_una, th_ack));
                        temp->end = sblkp->start;
                        temp->rxmit = temp->start;
                        delivered_data += sblkp->end - sblkp->start;
+                       tp->sackhint.hole_bytes += temp->end - temp->start;
+                       KASSERT(tp->sackhint.hole_bytes >= 0,
+                           ("sackhint hole bytes >= 0"));
                        tp->snd_fack = sblkp->end;
-                       sblkp--;
                        sack_changed = 1;
+                       sblkp--;
                } else {
                        /*
                         * Append a new SACK hole at the tail.  If the
@@ -678,10 +688,11 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, 
tcp_seq th_ack)
                        temp = tcp_sackhole_insert(tp, 
tp->snd_fack,sblkp->start,NULL);
                        if (temp != NULL) {
                                delivered_data += sblkp->end - sblkp->start;
+                               tp->sackhint.hole_bytes += temp->end - 
temp->start;
                                tp->snd_fack = sblkp->end;
+                               sack_changed = 1;
                                /* Go to the previous sack block. */
                                sblkp--;
-                               sack_changed = 1;
                        } else {
                                /*
                                 * We failed to add a new hole based on the 
current
@@ -709,11 +720,29 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, 
tcp_seq th_ack)
                sack_changed = 1;
        }
        cur = TAILQ_LAST(&tp->snd_holes, sackhole_head); /* Last SACK hole. */
+       loss_hiack = tp->snd_fack;
+
        /*
         * Since the incoming sack blocks are sorted, we can process them
         * making one sweep of the scoreboard.
         */
-       while (sblkp >= sack_blocks  && cur != NULL) {
+       while (cur != NULL) {
+               if (!(sblkp >= sack_blocks)) {
+                       if (((loss_sblks >= tcprexmtthresh) ||
+                           (loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg))) 
+                               break;
+                       loss_thresh += loss_hiack - cur->end;
+                       loss_hiack = cur->start;
+                       loss_sblks++;
+                       if (!((loss_sblks >= tcprexmtthresh) ||
+                           (loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg))) {
+                               notlost_bytes += cur->end - cur->start;
+                       } else {
+                               break;
+                       }
+                       cur = TAILQ_PREV(cur, sackhole_head, scblink);
+                       continue;
+               }
                if (SEQ_GEQ(sblkp->start, cur->end)) {
                        /*
                         * SACKs data beyond the current hole.  Go to the
@@ -727,6 +756,12 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, 
tcp_seq th_ack)
                         * SACKs data before the current hole.  Go to the
                         * previous hole.
                         */
+                       loss_thresh += loss_hiack - cur->end;
+                       loss_hiack = cur->start;
+                       loss_sblks++;
+                       if (!((loss_sblks >= tcprexmtthresh) ||
+                           (loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg)))
+                               notlost_bytes += cur->end - cur->start;
                        cur = TAILQ_PREV(cur, sackhole_head, scblink);
                        continue;
                }
@@ -742,6 +777,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq 
th_ack)
                                delivered_data += (cur->end - cur->start);
                                temp = cur;
                                cur = TAILQ_PREV(cur, sackhole_head, scblink);
+                               tp->sackhint.hole_bytes -= temp->end - 
temp->start;
                                tcp_sackhole_remove(tp, temp);
                                /*
                                 * The sack block may ack all or part of the
@@ -752,6 +788,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq 
th_ack)
                        } else {
                                /* Move start of hole forward. */
                                delivered_data += (sblkp->end - cur->start);
+                               tp->sackhint.hole_bytes -= sblkp->end - 
cur->start;
                                cur->start = sblkp->end;
                                cur->rxmit = SEQ_MAX(cur->rxmit, cur->start);
                        }
@@ -760,6 +797,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq 
th_ack)
                        if (SEQ_GEQ(sblkp->end, cur->end)) {
                                /* Move end of hole backward. */
                                delivered_data += (cur->end - sblkp->start);
+                               tp->sackhint.hole_bytes -= cur->end - 
sblkp->start;
                                cur->end = sblkp->start;
                                cur->rxmit = SEQ_MIN(cur->rxmit, cur->end);
                                if ((tp->t_flags & TF_LRD) && 
SEQ_GEQ(cur->rxmit, cur->end))
@@ -778,6 +816,13 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, 
tcp_seq th_ack)
                                                    (SEQ_MIN(temp->rxmit,
                                                    temp->end) - temp->start);
                                        }
+                                       tp->sackhint.hole_bytes -= sblkp->end - 
sblkp->start;
+                                       loss_thresh += loss_hiack - temp->end;
+                                       loss_hiack = temp->start;
+                                       loss_sblks++;
+                                       if (!((loss_sblks >= tcprexmtthresh) ||
+                                           (loss_thresh > 
(tcprexmtthresh-1)*tp->t_maxseg)))
+                                               notlost_bytes += temp->end - 
temp->start;
                                        cur->end = sblkp->start;
                                        cur->rxmit = SEQ_MIN(cur->rxmit,
                                            cur->end);
@@ -794,11 +839,25 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, 
tcp_seq th_ack)
                 * we're done with the sack block or the sack hole.
                 * Accordingly, we advance one or the other.
                 */
-               if (SEQ_LEQ(sblkp->start, cur->start))
+               if (SEQ_LEQ(sblkp->start, cur->start)) {
+                       loss_thresh += loss_hiack - cur->end;
+                       loss_hiack = cur->start;
+                       loss_sblks++;
+                       if (!((loss_sblks >= tcprexmtthresh) ||
+                           (loss_thresh > (tcprexmtthresh-1)*tp->t_maxseg)))
+                               notlost_bytes += cur->end - cur->start;
                        cur = TAILQ_PREV(cur, sackhole_head, scblink);
-               else
+               } else {
                        sblkp--;
+               }
        }
+
+       KASSERT(!(TAILQ_EMPTY(&tp->snd_holes) && (tp->sackhint.hole_bytes != 
0)),
+           ("SACK scoreboard empty, but accounting non-zero\n"));
+
+       KASSERT(notlost_bytes <= tp->sackhint.hole_bytes,
+           ("SACK: more bytes marked notlost than in scoreboard holes"));
+
        if (!(to->to_flags & TOF_SACK))
                /*
                 * If this ACK did not contain any
@@ -811,6 +870,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq 
th_ack)
                sack_changed = 0;
        tp->sackhint.delivered_data = delivered_data;
        tp->sackhint.sacked_bytes += delivered_data - left_edge_delta;
+       tp->sackhint.lost_bytes = tp->sackhint.hole_bytes - notlost_bytes;
        KASSERT((delivered_data >= 0), ("delivered_data < 0"));
        KASSERT((tp->sackhint.sacked_bytes >= 0), ("sacked_bytes < 0"));
        return (sack_changed);
@@ -845,6 +905,7 @@ tcp_free_sackholes(struct tcpcb *tp)
 void
 tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th)
 {
+       struct sackhole *temp;
        int num_segs = 1;
        u_int maxseg = tcp_maxseg(tp);
 
@@ -891,8 +952,10 @@ tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th)
                highdata = SEQ_MIN(highdata, tp->snd_recover);
                if (th->th_ack != highdata) {
                        tp->snd_fack = th->th_ack;
-                       (void)tcp_sackhole_insert(tp, SEQ_MAX(th->th_ack,
-                           highdata - maxseg), highdata, NULL);
+                       if ((temp = tcp_sackhole_insert(tp, SEQ_MAX(th->th_ack,
+                           highdata - maxseg), highdata, NULL)) != NULL)
+                               tp->sackhint.hole_bytes += temp->end -
+                                                               temp->start;
                }
        }
        (void) tcp_output(tp);
diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h
index cd5712132522..11509a87c6e7 100644
--- a/sys/netinet/tcp_var.h
+++ b/sys/netinet/tcp_var.h
@@ -128,6 +128,8 @@ struct sackhint {
        uint32_t        recover_fs;     /* Flight Size at the start of Loss 
recovery */
        uint32_t        prr_delivered;  /* Total bytes delivered using PRR */
        uint32_t        prr_out;        /* Bytes sent during IN_RECOVERY */
+       uint32_t        hole_bytes;     /* current number of bytes in 
scoreboard holes */
+       uint32_t        lost_bytes;     /* number of rfc6675 IsLost() bytes */
 };
 
 #define SEGQ_EMPTY(tp) TAILQ_EMPTY(&(tp)->t_segq)

Reply via email to