svn commit: r365501 - head/sys/netinet/tcp_stacks
Author: rrs Date: Wed Sep 9 11:11:50 2020 New Revision: 365501 URL: https://svnweb.freebsd.org/changeset/base/365501 Log: So it turns out that syzkaller hit another crash. It has to do with switching stacks with a SENT_FIN outstanding. Both rack and bbr will only send a FIN if all data is ack'd so this must be enforced. Also if the previous stack sent the FIN we need to make sure in rack that when we manufacture the "unknown" sends that we include the proper HAS_FIN bits. Note for BBR we take a simpler approach and just refuse to switch. Sponsored by: Netflix Inc. Differential Revision:https://reviews.freebsd.org/D26269 Modified: head/sys/netinet/tcp_stacks/bbr.c head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_stacks/bbr.c == --- head/sys/netinet/tcp_stacks/bbr.c Wed Sep 9 09:08:09 2020 (r365500) +++ head/sys/netinet/tcp_stacks/bbr.c Wed Sep 9 11:11:50 2020 (r365501) @@ -10281,6 +10281,8 @@ bbr_handoff_ok(struct tcpcb *tp) */ return (EAGAIN); } + if (tp->t_flags & TF_SENTFIN) + return (EINVAL); if ((tp->t_flags & TF_SACK_PERMIT) || bbr_sack_not_required) { return (0); } Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Wed Sep 9 09:08:09 2020 (r365500) +++ head/sys/netinet/tcp_stacks/rack.c Wed Sep 9 11:11:50 2020 (r365501) @@ -10451,7 +10451,12 @@ rack_init(struct tcpcb *tp) rsm->r_rtr_cnt = 1; rsm->r_rtr_bytes = 0; rsm->r_start = tp->snd_una; - rsm->r_end = tp->snd_max; + if (tp->t_flags & TF_SENTFIN) { + rsm->r_end = tp->snd_max - 1; + rsm->r_flags |= RACK_HAS_FIN; + } else { + rsm->r_end = tp->snd_max; + } rsm->usec_orig_send = us_cts; rsm->r_dupack = 0; insret = RB_INSERT(rack_rb_tree_head, >r_ctl.rc_mtree, rsm); @@ -10518,8 +10523,21 @@ rack_handoff_ok(struct tcpcb *tp) if ((tp->t_state == TCPS_SYN_SENT) || (tp->t_state == TCPS_SYN_RECEIVED)) { /* -* We really don't know you have to get to ESTAB or beyond -* to tell. +* We really don't know if you support sack, +* you have to get to ESTAB or beyond to tell. +*/ + return (EAGAIN); + } + if ((tp->t_flags & TF_SENTFIN) && ((tp->snd_max - tp->snd_una) > 1)) { + /* +* Rack will only send a FIN after all data is acknowledged. +* So in this case we have more data outstanding. We can't +* switch stacks until either all data and only the FIN +* is left (in which case rack_init() now knows how +* to deal with that) all is acknowledged and we +* are only left with incoming data, though why you +* would want to switch to rack after all data is acknowledged +* I have no idea (rrs)! */ return (EAGAIN); } ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r363725 - head/sys/netinet
Author: rrs Date: Fri Jul 31 10:03:32 2020 New Revision: 363725 URL: https://svnweb.freebsd.org/changeset/base/363725 Log: The recent changes to move the ref count increment back from the end of the function created an issue. If one of the routines returns NULL during setup we have inp's with extra references (which is why the increment was at the end). Also the stack switch return code was being ignored and actually has meaning if the stack cannot take over it should return NULL. Fix both of these situation by being sure to test the return code and of course in any case of return NULL (there are 3) make sure we properly reduce the ref count. Sponsored by: Netflix Inc. Differential Revision:https://reviews.freebsd.org/D25903 Modified: head/sys/netinet/tcp_subr.c Modified: head/sys/netinet/tcp_subr.c == --- head/sys/netinet/tcp_subr.c Fri Jul 31 07:37:08 2020(r363724) +++ head/sys/netinet/tcp_subr.c Fri Jul 31 10:03:32 2020(r363725) @@ -1713,6 +1713,7 @@ tcp_newtcpcb(struct inpcb *inp) if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) { if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp, 1); + in_pcbrele_wlocked(inp); refcount_release(>t_fb->tfb_refcnt); uma_zfree(V_tcpcb_zone, tm); return (NULL); @@ -1723,6 +1724,7 @@ tcp_newtcpcb(struct inpcb *inp) if (khelp_init_osd(HELPER_CLASS_TCP, tp->osd)) { if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp, 1); + in_pcbrele_wlocked(inp); refcount_release(>t_fb->tfb_refcnt); uma_zfree(V_tcpcb_zone, tm); return (NULL); @@ -1783,7 +1785,12 @@ tcp_newtcpcb(struct inpcb *inp) tcp_log_tcpcbinit(tp); #endif if (tp->t_fb->tfb_tcp_fb_init) { - (*tp->t_fb->tfb_tcp_fb_init)(tp); + if ((*tp->t_fb->tfb_tcp_fb_init)(tp)) { + refcount_release(>t_fb->tfb_refcnt); + in_pcbrele_wlocked(inp); + uma_zfree(V_tcpcb_zone, tm); + return (NULL); + } } #ifdef STATS if (V_tcp_perconn_stats_enable == 1) ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r362234 - head/sys/netinet/tcp_stacks
Author: rrs Date: Tue Jun 16 18:16:45 2020 New Revision: 362234 URL: https://svnweb.freebsd.org/changeset/base/362234 Log: iSo in doing final checks on OCA firmware with all the latest tweaks the dup-ack checking packet drill script was failing with a number of unexpected acks. So it turns out if you have the default recvwin set up to 1Meg (like OCA's do) and you have no window scaling (like the dupack checking code) then we have another case where we are always trying to update the rwnd and sending an ack when we should not. Sponsored by: Netflix Inc. Differential Revision:https://reviews.freebsd.org/D25298 Modified: head/sys/netinet/tcp_stacks/bbr.c head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_stacks/bbr.c == --- head/sys/netinet/tcp_stacks/bbr.c Tue Jun 16 17:45:23 2020 (r362233) +++ head/sys/netinet/tcp_stacks/bbr.c Tue Jun 16 18:16:45 2020 (r362234) @@ -12157,8 +12157,8 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeva * have gotten more data into the socket buffer to * send. */ - recwin = min(max(sbspace(>so_rcv), 0), - TCP_MAXWIN << tp->rcv_scale); + recwin = lmin(lmax(sbspace(>so_rcv), 0), + (long)TCP_MAXWIN << tp->rcv_scale); if ((bbr_window_update_needed(tp, so, recwin, maxseg) == 0) && ((tcp_outflags[tp->t_state] & TH_RST) == 0) && ((sbavail(sb) + ((tcp_outflags[tp->t_state] & TH_FIN) ? 1 : 0)) <= @@ -12839,8 +12839,8 @@ recheck_resend: ipoptlen == 0) tso = 1; - recwin = min(max(sbspace(>so_rcv), 0), - TCP_MAXWIN << tp->rcv_scale); + recwin = lmin(lmax(sbspace(>so_rcv), 0), + (long)TCP_MAXWIN << tp->rcv_scale); /* * Sender silly window avoidance. We transmit under the following * conditions when len is non-zero: Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Tue Jun 16 17:45:23 2020 (r362233) +++ head/sys/netinet/tcp_stacks/rack.c Tue Jun 16 18:16:45 2020 (r362234) @@ -12750,7 +12750,8 @@ again: flags &= ~TH_FIN; } } - recwin = sbspace(>so_rcv); + recwin = lmin(lmax(sbspace(>so_rcv), 0), + (long)TCP_MAXWIN << tp->rcv_scale); /* * Sender silly window avoidance. We transmit under the following @@ -13656,8 +13657,6 @@ send: if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) && recwin < (long)(tp->rcv_adv - tp->rcv_nxt)) recwin = (long)(tp->rcv_adv - tp->rcv_nxt); - if (recwin > (long)TCP_MAXWIN << tp->rcv_scale) - recwin = (long)TCP_MAXWIN << tp->rcv_scale; } /* ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r362225 - head/sys/netinet/tcp_stacks
Author: rrs Date: Tue Jun 16 12:26:23 2020 New Revision: 362225 URL: https://svnweb.freebsd.org/changeset/base/362225 Log: So it turns out rack has a shortcoming in dup-ack counting. It counts the dupacks but then does not properly respond to them. This is because a few missing bits are not present. BBR actually does properly respond (though it also sends a TLP which is interesting and maybe something to fix).. Sponsored by: Netflix Inc. Differential Revision:https://reviews.freebsd.org/D25294 Modified: head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Tue Jun 16 12:21:55 2020 (r362224) +++ head/sys/netinet/tcp_stacks/rack.c Tue Jun 16 12:26:23 2020 (r362225) @@ -4588,7 +4588,7 @@ activate_rxt: goto activate_rxt; } /* Convert from ms to usecs */ - if (rsm->r_flags & RACK_SACK_PASSED) { + if ((rsm->r_flags & RACK_SACK_PASSED) || (rsm->r_dupack >= DUP_ACK_THRESHOLD)) { if ((tp->t_flags & TF_SENTFIN) && ((tp->snd_max - tp->snd_una) == 1) && (rsm->r_flags & RACK_HAS_FIN)) { @@ -6237,7 +6237,7 @@ rack_log_output(struct tcpcb *tp, struct tcpopt *to, i * or FIN if seq_out is adding more on and a FIN is present * (and we are not resending). */ - if ((th_flags & TH_SYN) && (seq_out == tp->iss)) + if ((th_flags & TH_SYN) && (seq_out == tp->iss)) len++; if (th_flags & TH_FIN) len++; @@ -8190,6 +8190,7 @@ rack_strike_dupack(struct tcp_rack *rack) rsm->r_dupack++; if (rsm->r_dupack >= DUP_ACK_THRESHOLD) { rack->r_wanted_output = 1; + rack->r_timer_override = 1; rack_log_retran_reason(rack, rsm, __LINE__, 1, 3); } else { rack_log_retran_reason(rack, rsm, __LINE__, 0, 3); @@ -11359,7 +11360,8 @@ check_it: if (rsm->r_flags & RACK_ACKED) { return (NULL); } - if ((rsm->r_flags & RACK_SACK_PASSED) == 0) { + if (((rsm->r_flags & RACK_SACK_PASSED) == 0) && + (rsm->r_dupack < DUP_ACK_THRESHOLD)) { /* Its not yet ready */ return (NULL); } ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r362113 - in head/sys/netinet: . tcp_stacks
Author: rrs Date: Fri Jun 12 19:56:19 2020 New Revision: 362113 URL: https://svnweb.freebsd.org/changeset/base/362113 Log: So it turns out with the right window scaling you can get the code in all stacks to always want to do a window update, even when no data can be sent. Now in cases where you are not pacing thats probably ok, you just send an extra window update or two. However with bbr (and rack if its paced) every time the pacer goes off its going to send a "window update". Also in testing bbr I have found that if we are not responding to data right away we end up staying in startup but incorrectly holding a pacing gain of 192 (a loss). This is because the idle window code does not restict itself to only work with PROBE_BW. In all other states you dont want it doing a PROBE_BW state change. Sponsored by: Netflix Inc. Differential Revision:https://reviews.freebsd.org/D25247 Modified: head/sys/netinet/tcp_output.c head/sys/netinet/tcp_stacks/bbr.c head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_output.c == --- head/sys/netinet/tcp_output.c Fri Jun 12 18:41:12 2020 (r362112) +++ head/sys/netinet/tcp_output.c Fri Jun 12 19:56:19 2020 (r362113) @@ -655,7 +655,10 @@ after_sack_rexmit: adv = recwin; if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) { oldwin = (tp->rcv_adv - tp->rcv_nxt); - adv -= oldwin; + if (adv > oldwin) + adv -= oldwin; + else + adv = 0; } else oldwin = 0; Modified: head/sys/netinet/tcp_stacks/bbr.c == --- head/sys/netinet/tcp_stacks/bbr.c Fri Jun 12 18:41:12 2020 (r362112) +++ head/sys/netinet/tcp_stacks/bbr.c Fri Jun 12 19:56:19 2020 (r362113) @@ -8078,7 +8078,7 @@ bbr_restart_after_idle(struct tcp_bbr *bbr, uint32_t c bbr->r_ctl.rc_bbr_hptsi_gain = bbr->r_ctl.rc_startup_pg; bbr->r_ctl.rc_bbr_cwnd_gain = bbr->r_ctl.rc_startup_pg; bbr_log_type_statechange(bbr, cts, __LINE__); - } else { + } else if (bbr->rc_bbr_state == BBR_STATE_PROBE_BW) { bbr_substate_change(bbr, cts, __LINE__, 1); } } @@ -12000,21 +12000,27 @@ bbr_window_update_needed(struct tcpcb *tp, struct sock * "adv" is the amount we could increase the window, taking into * account that we are limited by TCP_MAXWIN << tp->rcv_scale. */ - uint32_t adv; + int32_t adv; int32_t oldwin; - adv = min(recwin, TCP_MAXWIN << tp->rcv_scale); + adv = recwin; if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) { oldwin = (tp->rcv_adv - tp->rcv_nxt); - adv -= oldwin; + if (adv > oldwin) + adv -= oldwin; + else { + /* We can't increase the window */ + adv = 0; + } } else oldwin = 0; /* -* If the new window size ends up being the same as the old size -* when it is scaled, then don't force a window update. +* If the new window size ends up being the same as or less +* than the old size when it is scaled, then don't force +* a window update. */ - if (oldwin >> tp->rcv_scale == (adv + oldwin) >> tp->rcv_scale) + if (oldwin >> tp->rcv_scale >= (adv + oldwin) >> tp->rcv_scale) return (0); if (adv >= (2 * maxseg) && Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Fri Jun 12 18:41:12 2020 (r362112) +++ head/sys/netinet/tcp_stacks/rack.c Fri Jun 12 19:56:19 2020 (r362113) @@ -12845,18 +12845,24 @@ again: int32_t adv; int oldwin; - adv = min(recwin, (long)TCP_MAXWIN << tp->rcv_scale); + adv = recwin; if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) { oldwin = (tp->rcv_adv - tp->rcv_nxt); - adv -= oldwin; + if (adv > oldwin) + adv -= oldwin; + else { + /* We can't increase the window */ + adv = 0; + } } else oldwin = 0; /* -* If the new window size ends up being the same as the old -* size when it is scaled, then don't force a window update. +
svn commit: r361926 - in head/sys/netinet: . tcp_stacks
Author: rrs Date: Mon Jun 8 11:48:07 2020 New Revision: 361926 URL: https://svnweb.freebsd.org/changeset/base/361926 Log: An important statistic in determining if a server process (or client) is being delayed is to know the time to first byte in and time to first byte out. Currently we have no way to know these all we have is t_starttime. That (t_starttime) tells us what time the 3 way handshake completed. We don't know when the first request came in or how quickly we responded. Nor from a client perspective do we know how long from when we sent out the first byte before the server responded. This small change adds the ability to track the TTFB's. This will show up in BB logging which then can be pulled for later analysis. Note that currently the tracking is via the ticks variable of all three variables. This provides a very rough estimate (hz=1000 its 1ms). A follow-on set of work will be to change all three of these values into something with a much finer resolution (either microseconds or nanoseconds), though we may want to make the resolution configurable so that on lower powered machines we could still use the much cheaper ticks variable. Sponsored by: Netflix Inc. Differential Revision:https://reviews.freebsd.org/D24902 Modified: head/sys/netinet/tcp_input.c head/sys/netinet/tcp_log_buf.c head/sys/netinet/tcp_log_buf.h head/sys/netinet/tcp_stacks/bbr.c head/sys/netinet/tcp_stacks/rack.c head/sys/netinet/tcp_usrreq.c head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cMon Jun 8 09:39:48 2020 (r361925) +++ head/sys/netinet/tcp_input.cMon Jun 8 11:48:07 2020 (r361926) @@ -1841,6 +1841,15 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru tcp_clean_sackreport(tp); TCPSTAT_INC(tcps_preddat); tp->rcv_nxt += tlen; + if (tlen && + ((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) && + (tp->t_fbyte_in == 0)) { + tp->t_fbyte_in = ticks; + if (tp->t_fbyte_in == 0) + tp->t_fbyte_in = 1; + if (tp->t_fbyte_out && tp->t_fbyte_in) + tp->t_flags2 |= TF2_FBYTES_COMPLETE; + } /* * Pull snd_wl1 up to prevent seq wrap relative to * th_seq. @@ -3016,6 +3025,15 @@ dodata: /* XXX */ else tp->t_flags |= TF_ACKNOW; tp->rcv_nxt += tlen; + if (tlen && + ((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) && + (tp->t_fbyte_in == 0)) { + tp->t_fbyte_in = ticks; + if (tp->t_fbyte_in == 0) + tp->t_fbyte_in = 1; + if (tp->t_fbyte_out && tp->t_fbyte_in) + tp->t_flags2 |= TF2_FBYTES_COMPLETE; + } thflags = th->th_flags & TH_FIN; TCPSTAT_INC(tcps_rcvpack); TCPSTAT_ADD(tcps_rcvbyte, tlen); Modified: head/sys/netinet/tcp_log_buf.c == --- head/sys/netinet/tcp_log_buf.c Mon Jun 8 09:39:48 2020 (r361925) +++ head/sys/netinet/tcp_log_buf.c Mon Jun 8 11:48:07 2020 (r361926) @@ -1693,6 +1693,9 @@ retry: COPY_STAT(snd_numholes); COPY_STAT(snd_scale); COPY_STAT(rcv_scale); + COPY_STAT_T(flags2); + COPY_STAT_T(fbyte_in); + COPY_STAT_T(fbyte_out); #undef COPY_STAT #undef COPY_STAT_T log_buf->tlb_flex1 = 0; Modified: head/sys/netinet/tcp_log_buf.h == --- head/sys/netinet/tcp_log_buf.h Mon Jun 8 09:39:48 2020 (r361925) +++ head/sys/netinet/tcp_log_buf.h Mon Jun 8 11:48:07 2020 (r361926) @@ -32,7 +32,7 @@ #defineTCP_LOG_REASON_LEN 32 #defineTCP_LOG_TAG_LEN 32 -#defineTCP_LOG_BUF_VER (8) +#defineTCP_LOG_BUF_VER (9) /* * Because the (struct tcp_log_buffer) includes 8-byte uint64_t's, it requires @@ -143,6 +143,7 @@ struct tcp_log_buffer uint32_ttlb_rttvar; /* TCPCB t_rttvar */ uint32_ttlb_rcv_up; /* TCPCB rcv_up */ uint32_ttlb_rcv_adv;/* TCPCB rcv_adv */ + uint32_t
svn commit: r361752 - head/sys/netinet
Author: rrs Date: Wed Jun 3 14:16:40 2020 New Revision: 361752 URL: https://svnweb.freebsd.org/changeset/base/361752 Log: We should never allow either the broadcast or IN_ADDR_ANY to be connected to or sent to. This was fond when working with Michael Tuexen and Skyzaller. Skyzaller seems to want to use either of these two addresses to connect to at times. And it really is an error to do so, so lets not allow that behavior. Sponsored by: Netflix Inc. Differential Revision:https://reviews.freebsd.org/D24852 Modified: head/sys/netinet/tcp_usrreq.c Modified: head/sys/netinet/tcp_usrreq.c == --- head/sys/netinet/tcp_usrreq.c Wed Jun 3 14:07:31 2020 (r361751) +++ head/sys/netinet/tcp_usrreq.c Wed Jun 3 14:16:40 2020 (r361752) @@ -552,6 +552,10 @@ tcp_usr_connect(struct socket *so, struct sockaddr *na if (sinp->sin_family == AF_INET && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) return (EAFNOSUPPORT); + if ((sinp->sin_family == AF_INET) && + ((ntohl(sinp->sin_addr.s_addr) == INADDR_BROADCAST) || +(sinp->sin_addr.s_addr == INADDR_ANY))) + return(EAFNOSUPPORT); if ((error = prison_remote_ip4(td->td_ucred, >sin_addr)) != 0) return (error); @@ -652,6 +656,11 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *n error = EAFNOSUPPORT; goto out; } + if ((ntohl(sin.sin_addr.s_addr) == INADDR_BROADCAST) || + (sin.sin_addr.s_addr == INADDR_ANY)) { + error = EAFNOSUPPORT; + goto out; + } if ((error = prison_remote_ip4(td->td_ucred, _addr)) != 0) goto out; @@ -1019,6 +1028,13 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf goto out; } if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) { + if (m) + m_freem(m); + error = EAFNOSUPPORT; + goto out; + } + if ((ntohl(sinp->sin_addr.s_addr) == INADDR_BROADCAST) || + (sinp->sin_addr.s_addr == INADDR_ANY)) { if (m) m_freem(m); error = EAFNOSUPPORT; ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r361751 - in head/sys/netinet: . tcp_stacks
Author: rrs Date: Wed Jun 3 14:07:31 2020 New Revision: 361751 URL: https://svnweb.freebsd.org/changeset/base/361751 Log: This fixes a couple of skyzaller crashes. Most of them have to do with TFO. Even the default stack had one of the issues: 1) We need to make sure for rack that we don't advance snd_nxt beyond iss when we are not doing fast open. We otherwise can get a bunch of SYN's sent out incorrectly with the seq number advancing. 2) When we complete the 3-way handshake we should not ever append to reassembly if the tlen is 0, if TFO is enabled prior to this fix we could still call the reasemmbly. Note this effects all three stacks. 3) Rack like its cousin BBR should track if a SYN is on a send map entry. 4) Both bbr and rack need to only consider len incremented on a SYN if the starting seq is iss, otherwise we don't increment len which may mean we return without adding a sendmap entry. This work was done in collaberation with Michael Tuexen, thanks for all the testing! Sponsored by: Netflix Inc Differential Revision:https://reviews.freebsd.org/D25000 Modified: head/sys/netinet/tcp_input.c head/sys/netinet/tcp_stacks/bbr.c head/sys/netinet/tcp_stacks/rack.c head/sys/netinet/tcp_stacks/tcp_rack.h Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cWed Jun 3 13:51:53 2020 (r361750) +++ head/sys/netinet/tcp_input.cWed Jun 3 14:07:31 2020 (r361751) @@ -2989,7 +2989,7 @@ dodata: /* XXX */ */ tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) && IS_FASTOPEN(tp->t_flags)); - if ((tlen || (thflags & TH_FIN) || tfo_syn) && + if ((tlen || (thflags & TH_FIN) || (tfo_syn && tlen > 0)) && TCPS_HAVERCVDFIN(tp->t_state) == 0) { tcp_seq save_start = th->th_seq; tcp_seq save_rnxt = tp->rcv_nxt; Modified: head/sys/netinet/tcp_stacks/bbr.c == --- head/sys/netinet/tcp_stacks/bbr.c Wed Jun 3 13:51:53 2020 (r361750) +++ head/sys/netinet/tcp_stacks/bbr.c Wed Jun 3 14:07:31 2020 (r361751) @@ -6028,7 +6028,7 @@ bbr_log_output(struct tcp_bbr *bbr, struct tcpcb *tp, * or FIN if seq_out is adding more on and a FIN is present * (and we are not resending). */ - if (th_flags & TH_SYN) + if ((th_flags & TH_SYN) && (tp->iss == seq_out)) len++; if (th_flags & TH_FIN) len++; @@ -8369,7 +8369,7 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, st */ tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) && IS_FASTOPEN(tp->t_flags)); - if ((tlen || (thflags & TH_FIN) || tfo_syn) && + if ((tlen || (thflags & TH_FIN) || (tfo_syn && tlen > 0)) && TCPS_HAVERCVDFIN(tp->t_state) == 0) { tcp_seq save_start = th->th_seq; tcp_seq save_rnxt = tp->rcv_nxt; Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Wed Jun 3 13:51:53 2020 (r361750) +++ head/sys/netinet/tcp_stacks/rack.c Wed Jun 3 14:07:31 2020 (r361751) @@ -6237,7 +6237,7 @@ rack_log_output(struct tcpcb *tp, struct tcpopt *to, i * or FIN if seq_out is adding more on and a FIN is present * (and we are not resending). */ - if (th_flags & TH_SYN) + if ((th_flags & TH_SYN) && (seq_out == tp->iss)) len++; if (th_flags & TH_FIN) len++; @@ -6280,6 +6280,7 @@ again: rsm->usec_orig_send = us_cts; if (th_flags & TH_SYN) { /* The data space is one beyond snd_una */ + rsm->r_flags |= RACK_HAS_SIN; rsm->r_start = seq_out + 1; rsm->r_end = rsm->r_start + (len - 1); } else { @@ -8724,7 +8725,7 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, s */ tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) && IS_FASTOPEN(tp->t_flags)); - if ((tlen || (thflags & TH_FIN) || tfo_syn) && + if ((tlen || (thflags & TH_FIN) || (tfo_syn && tlen > 0)) && TCPS_HAVERCVDFIN(tp->t_state) == 0) { tcp_seq save_start = th->th_seq; tcp_seq save_rnxt = tp->rcv_nxt; @@ -12563,8 +12564,10 @@ again: len = 0; } /* Without fast-open there should never be data sent on a SYN */ - if ((flags & TH_SYN) &&
svn commit: r361080 - head/sys/netinet/tcp_stacks
Author: rrs Date: Fri May 15 14:00:12 2020 New Revision: 361080 URL: https://svnweb.freebsd.org/changeset/base/361080 Log: This fixes several skyzaller issues found with the help of Michael Tuexen. There was some accounting errors with TCPFO for bbr and also for both rack and bbr there was a FO case where we should be jumping to the just_return_nolock label to exit instead of returning 0. This of course caused no timer to be running and thus the stuck sessions. Reported by: Michael Tuexen and Skyzaller Sponsored by: Netflix Inc. Differential Revision:https://reviews.freebsd.org/D24852 Modified: head/sys/netinet/tcp_stacks/bbr.c head/sys/netinet/tcp_stacks/rack.c head/sys/netinet/tcp_stacks/rack_bbr_common.c Modified: head/sys/netinet/tcp_stacks/bbr.c == --- head/sys/netinet/tcp_stacks/bbr.c Fri May 15 13:53:10 2020 (r361079) +++ head/sys/netinet/tcp_stacks/bbr.c Fri May 15 14:00:12 2020 (r361080) @@ -4975,6 +4975,15 @@ bbr_remxt_tmr(struct tcpcb *tp) rsm->r_flags &= ~(BBR_ACKED | BBR_SACK_PASSED | BBR_WAS_SACKPASS); bbr_log_type_rsmclear(bbr, cts, rsm, old_flags, __LINE__); } else { + if ((tp->t_state < TCPS_ESTABLISHED) && + (rsm->r_start == tp->snd_una)) { + /* +* Special case for TCP FO. Where +* we sent more data beyond the snd_max. +* We don't mark that as lost and stop here. +*/ + break; + } if ((rsm->r_flags & BBR_MARKED_LOST) == 0) { bbr->r_ctl.rc_lost += rsm->r_end - rsm->r_start; bbr->r_ctl.rc_lost_bytes += rsm->r_end - rsm->r_start; @@ -12315,7 +12324,8 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeva (tp->t_state == TCPS_SYN_SENT)) && SEQ_GT(tp->snd_max, tp->snd_una) && /* initial SYN or SYN|ACK sent */ (tp->t_rxtshift == 0)) {/* not a retransmit */ - return (0); + len = 0; + goto just_return_nolock; } /* * Before sending anything check for a state update. For hpts @@ -14286,6 +14296,7 @@ nomore: (hw_tls == 0) && (len > 0) && ((flags & TH_RST) == 0) && + ((flags & TH_SYN) == 0) && (IN_RECOVERY(tp->t_flags) == 0) && (bbr->rc_in_persist == 0) && (tot_len < bbr->r_ctl.rc_pace_max_segs)) { Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Fri May 15 13:53:10 2020 (r361079) +++ head/sys/netinet/tcp_stacks/rack.c Fri May 15 14:00:12 2020 (r361080) @@ -3873,6 +3873,7 @@ skip_measurement: * the next send will trigger us picking up the missing data. */ if (rack->r_ctl.rc_first_appl && + TCPS_HAVEESTABLISHED(tp->t_state) && rack->r_ctl.rc_app_limited_cnt && (SEQ_GT(rack->r_ctl.rc_first_appl->r_start, th_ack)) && ((rack->r_ctl.rc_first_appl->r_start - th_ack) > @@ -11741,6 +11742,13 @@ rack_start_gp_measurement(struct tcpcb *tp, struct tcp struct rack_sendmap *my_rsm = NULL; struct rack_sendmap fe; + if (tp->t_state < TCPS_ESTABLISHED) { + /* +* We don't start any measurements if we are +* not at least established. +*/ + return; + } tp->t_flags |= TF_GPUTINPROG; rack->r_ctl.rc_gp_lowrtt = 0x; rack->r_ctl.rc_gp_high_rwnd = rack->rc_tp->snd_wnd; @@ -12109,8 +12117,10 @@ rack_output(struct tcpcb *tp) ((tp->t_state == TCPS_SYN_RECEIVED) || (tp->t_state == TCPS_SYN_SENT)) && SEQ_GT(tp->snd_max, tp->snd_una) && /* initial SYN or SYN|ACK sent */ - (tp->t_rxtshift == 0)) /* not a retransmit */ - return (0); + (tp->t_rxtshift == 0)) { /* not a retransmit */ + cwnd_to_use = rack->r_ctl.cwnd_to_use = tp->snd_cwnd; + goto just_return_nolock; + } /* * Determine length of data that should be transmitted, and flags * that will be used. If there is some data or critical controls Modified: head/sys/netinet/tcp_stacks/rack_bbr_common.c == --- head/sys/netinet/tcp_stacks/rack_bbr_common.c Fri May 15 13:53:10 2020(r361079) +++ head/sys/netinet/tcp_stacks/rack_bbr_common.c Fri May 15 14:00:12 2020
svn commit: r360798 - head/sys/netinet/tcp_stacks
Author: rrs Date: Thu May 7 20:29:38 2020 New Revision: 360798 URL: https://svnweb.freebsd.org/changeset/base/360798 Log: When in the SYN-SENT state bbr and rack will not properly send an ACK but instead start the D-ACK timer. This causes so_reuseport_lb_test to fail since it slows down how quickly the program runs until the timeout occurs and fails the test Sponsored by: Netflix inc. Differential Revision:https://reviews.freebsd.org/D24747 Modified: head/sys/netinet/tcp_stacks/bbr.c head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_stacks/bbr.c == --- head/sys/netinet/tcp_stacks/bbr.c Thu May 7 20:27:32 2020 (r360797) +++ head/sys/netinet/tcp_stacks/bbr.c Thu May 7 20:29:38 2020 (r360798) @@ -4078,6 +4078,7 @@ bbr_cong_signal(struct tcpcb *tp, struct tcphdr *th, u */ #define DELAY_ACK(tp, bbr, nsegs) \ (((tp->t_flags & TF_RXWIN0SENT) == 0) &&\ +((tp->t_flags & TF_DELACK) == 0) &&\ ((bbr->bbr_segs_rcvd + nsegs) < tp->t_delayed_ack) && \ (tp->t_delayed_ack || (tp->t_flags & TF_NEEDSYN))) @@ -8992,7 +8993,7 @@ bbr_do_syn_sent(struct mbuf *m, struct tcphdr *th, str * If there's data, delay ACK; if there's also a FIN ACKNOW * will be turned on later. */ - if (DELAY_ACK(tp, bbr, 1) && tlen != 0 && (tfo_partial == 0)) { + if (DELAY_ACK(tp, bbr, 1) && tlen != 0 && !tfo_partial) { bbr->bbr_segs_rcvd += 1; tp->t_flags |= TF_DELACK; bbr_timer_cancel(bbr, __LINE__, bbr->r_ctl.rc_rcvtime); Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Thu May 7 20:27:32 2020 (r360797) +++ head/sys/netinet/tcp_stacks/rack.c Thu May 7 20:29:38 2020 (r360798) @@ -9320,7 +9320,15 @@ rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, st * If there's data, delay ACK; if there's also a FIN ACKNOW * will be turned on later. */ - rack_handle_delayed_ack(tp, rack, tlen, tfo_partial); + if (DELAY_ACK(tp, tlen) && tlen != 0 && !tfo_partial) { + rack_timer_cancel(tp, rack, + rack->r_ctl.rc_rcvtime, __LINE__); + tp->t_flags |= TF_DELACK; + } else { + rack->r_wanted_output = 1; + tp->t_flags |= TF_ACKNOW; + rack->rc_dack_toggle = 0; + } if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) && (V_tcp_do_ecn == 1)) { tp->t_flags2 |= TF2_ECN_PERMIT; ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r360776 - head/sys/netinet/tcp_stacks
Author: rrs Date: Thu May 7 10:46:02 2020 New Revision: 360776 URL: https://svnweb.freebsd.org/changeset/base/360776 Log: NF has an internal option that changes the tcp_mcopy_m routine slightly (has a few extra arguments). Recently that changed to only have one arg extra so that two ifdefs around the call are no longer needed. Lets take out the extra ifdef and arg. Sponsored by: Netflix Inc Differential Revision: https://reviews.freebsd.org/D24736 Modified: head/sys/netinet/tcp_stacks/bbr.c head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_stacks/bbr.c == --- head/sys/netinet/tcp_stacks/bbr.c Thu May 7 08:58:08 2020 (r360775) +++ head/sys/netinet/tcp_stacks/bbr.c Thu May 7 10:46:02 2020 (r360776) @@ -13420,9 +13420,6 @@ send: #endif orig_len = len; m->m_next = tcp_m_copym( -#ifdef NETFLIX_COPY_ARGS - tp, -#endif mb, moff, , if_hw_tsomaxsegcount, if_hw_tsomaxsegsize, msb, Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Thu May 7 08:58:08 2020 (r360775) +++ head/sys/netinet/tcp_stacks/rack.c Thu May 7 10:46:02 2020 (r360776) @@ -13353,9 +13353,6 @@ send: else msb = sb; m->m_next = tcp_m_copym( -#ifdef NETFLIX_COPY_ARGS - tp, -#endif mb, moff, , if_hw_tsomaxsegcount, if_hw_tsomaxsegsize, msb, ((rsm == NULL) ? hw_tls : 0) ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r360644 - head/sys/netinet/tcp_stacks
Author: rrs Date: Mon May 4 23:02:58 2020 New Revision: 360644 URL: https://svnweb.freebsd.org/changeset/base/360644 Log: This fixes two issues found by ankitrahej...@gmail.com 1) When BBR retransmits the syn it was messing up the snd_max 2) When we need to send a RST we might not send it when we should Reported by: ankitrahej...@gmail.com Sponsored by: Netflix.com Differential Revision: https://reviews.freebsd.org/D24693 Modified: head/sys/netinet/tcp_stacks/bbr.c Modified: head/sys/netinet/tcp_stacks/bbr.c == --- head/sys/netinet/tcp_stacks/bbr.c Mon May 4 22:59:39 2020 (r360643) +++ head/sys/netinet/tcp_stacks/bbr.c Mon May 4 23:02:58 2020 (r360644) @@ -12159,6 +12159,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeva recwin = min(max(sbspace(>so_rcv), 0), TCP_MAXWIN << tp->rcv_scale); if ((bbr_window_update_needed(tp, so, recwin, maxseg) == 0) && + ((tcp_outflags[tp->t_state] & TH_RST) == 0) && ((sbavail(sb) + ((tcp_outflags[tp->t_state] & TH_FIN) ? 1 : 0)) <= (tp->snd_max - tp->snd_una))) { /* @@ -12916,9 +12917,13 @@ recheck_resend: if (tp->t_flags & TF_ACKNOW) { goto send; } - if (((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0)) { + if (flags & TH_RST) { + /* Always send a RST if one is due */ goto send; } + if ((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0) { + goto send; + } /* * If our state indicates that FIN should be sent and we have not * yet done so, then we need to send. @@ -14029,7 +14034,11 @@ out: } if (flags & (TH_SYN | TH_FIN) && (rsm == NULL)) { if (flags & TH_SYN) { - tp->snd_max++; + /* +* Smack the snd_max to iss + 1 +* if its a FO we will add len below. +*/ + tp->snd_max = tp->iss + 1; } if ((flags & TH_FIN) && ((tp->t_flags & TF_SENTFIN) == 0)) { tp->snd_max++; ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r360639 - head/sys/netinet/tcp_stacks
Author: rrs Date: Mon May 4 20:28:53 2020 New Revision: 360639 URL: https://svnweb.freebsd.org/changeset/base/360639 Log: This commit brings things into sync with the advancements that have been made in rack and adds a few fixes in BBR. This also removes any possibility of incorrectly doing OOB data the stacks do not support it. Should fix the skyzaller crashes seen in the past. Still to fix is the BBR issue just reported this weekend with the SYN and on sending a RST. Note that this version of rack can now do pacing as well. Sponsored by:Netflix Inc Differential Revision:https://reviews.freebsd.org/D24576 Modified: head/sys/netinet/tcp_stacks/bbr.c head/sys/netinet/tcp_stacks/rack.c head/sys/netinet/tcp_stacks/rack_bbr_common.c head/sys/netinet/tcp_stacks/rack_bbr_common.h head/sys/netinet/tcp_stacks/tcp_bbr.h head/sys/netinet/tcp_stacks/tcp_rack.h Modified: head/sys/netinet/tcp_stacks/bbr.c == --- head/sys/netinet/tcp_stacks/bbr.c Mon May 4 20:19:57 2020 (r360638) +++ head/sys/netinet/tcp_stacks/bbr.c Mon May 4 20:28:53 2020 (r360639) @@ -1,7 +1,5 @@ /*- - * Copyright (c) 2016-9 - * Netflix Inc. - * All rights reserved. + * Copyright (c) 2016-2020 Netflix, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -72,6 +70,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -1853,28 +1852,6 @@ bbr_init_sysctls(void) _clear_lost, 0, sysctl_bbr_clear_lost, "IU", "Clear lost counters"); } -static inline int32_t -bbr_progress_timeout_check(struct tcp_bbr *bbr) -{ - if (bbr->rc_tp->t_maxunacktime && bbr->rc_tp->t_acktime && - TSTMP_GT(ticks, bbr->rc_tp->t_acktime)) { - if uint32_t)ticks - bbr->rc_tp->t_acktime)) >= bbr->rc_tp->t_maxunacktime) { - /* -* There is an assumption here that the caller will -* drop the connection, so we increment the -* statistics. -*/ - bbr_log_progress_event(bbr, bbr->rc_tp, ticks, PROGRESS_DROP, __LINE__); - BBR_STAT_INC(bbr_progress_drops); -#ifdef NETFLIX_STATS - KMOD_TCPSTAT_INC(tcps_progdrops); -#endif - return (1); - } - } - return (0); -} - static void bbr_counter_destroy(void) { @@ -1884,6 +1861,8 @@ bbr_counter_destroy(void) COUNTER_ARRAY_FREE(bbr_state_lost, BBR_MAX_STAT); COUNTER_ARRAY_FREE(bbr_state_time, BBR_MAX_STAT); COUNTER_ARRAY_FREE(bbr_state_resend, BBR_MAX_STAT); + counter_u64_free(bbr_nohdwr_pacing_enobuf); + counter_u64_free(bbr_hdwr_pacing_enobuf); counter_u64_free(bbr_flows_whdwr_pacing); counter_u64_free(bbr_flows_nohdwr_pacing); @@ -4643,7 +4622,8 @@ bbr_timeout_tlp(struct tcpcb *tp, struct tcp_bbr *bbr, /* Its not time yet */ return (0); } - if (bbr_progress_timeout_check(bbr)) { + if (ctf_progress_timeout_check(tp, true)) { + bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT); return (1); } @@ -4815,9 +4795,8 @@ bbr_timeout_delack(struct tcpcb *tp, struct tcp_bbr *b } /* - * Persists timer, here we simply need to setup the - * FORCE-DATA flag the output routine will send - * the one byte send. + * Here we send a KEEP-ALIVE like probe to the + * peer, we do not send data. * * We only return 1, saying don't proceed, if all timers * are stopped (destroyed PCB?). @@ -4845,7 +4824,8 @@ bbr_timeout_persist(struct tcpcb *tp, struct tcp_bbr * /* * Have we exceeded the user specified progress time? */ - if (bbr_progress_timeout_check(bbr)) { + if (ctf_progress_timeout_check(tp, true)) { + bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__); tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT); goto out; } @@ -4859,6 +4839,7 @@ bbr_timeout_persist(struct tcpcb *tp, struct tcp_bbr * (ticks - tp->t_rcvtime >= tcp_maxpersistidle || ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) { KMOD_TCPSTAT_INC(tcps_persistdrop); + tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX); tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT); goto out; } @@ -4875,6 +4856,7 @@ bbr_timeout_persist(struct tcpcb *tp, struct tcp_bbr * if (tp->t_state > TCPS_CLOSE_WAIT && (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) { KMOD_TCPSTAT_INC(tcps_persistdrop); +
svn commit: r360638 - head/sys/netinet
Author: rrs Date: Mon May 4 20:19:57 2020 New Revision: 360638 URL: https://svnweb.freebsd.org/changeset/base/360638 Log: Adjust the fb to have a way to ask the underlying stack if it can support the PRUS option (OOB). And then have the new function call that to validate and give the correct error response if needed to the user (rack and bbr do not support obsoleted OOB data). Sponsoered by: Netflix Inc. Differential Revision: https://reviews.freebsd.org/D24574 Modified: head/sys/netinet/tcp_usrreq.c head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_usrreq.c == --- head/sys/netinet/tcp_usrreq.c Mon May 4 18:40:56 2020 (r360637) +++ head/sys/netinet/tcp_usrreq.c Mon May 4 20:19:57 2020 (r360638) @@ -133,6 +133,8 @@ static void tcp_disconnect(struct tcpcb *); static voidtcp_usrclosed(struct tcpcb *); static voidtcp_fill_info(struct tcpcb *, struct tcp_info *); +static int tcp_pru_options_support(struct tcpcb *tp, int flags); + #ifdef TCPDEBUG #defineTCPDEBUG0 int ostate = 0 #defineTCPDEBUG1() ostate = tp ? tp->t_state : 0 @@ -979,6 +981,15 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf goto out; } tp = intotcpcb(inp); + if (flags & PRUS_OOB) { + if ((error = tcp_pru_options_support(tp, PRUS_OOB)) != 0) { + if (control) + m_freem(control); + if (m && (flags & PRUS_NOTREADY) == 0) + m_freem(m); + goto out; + } + } TCPDEBUG1(); if (nam != NULL && tp->t_state < TCPS_SYN_SENT) { switch (nam->sa_family) { @@ -1362,6 +1373,24 @@ tcp_usr_close(struct socket *so) NET_EPOCH_EXIT(et); } +static int +tcp_pru_options_support(struct tcpcb *tp, int flags) +{ + /* +* If the specific TCP stack has a pru_options +* specified then it does not always support +* all the PRU_XX options and we must ask it. +* If the function is not specified then all +* of the PRU_XX options are supported. +*/ + int ret = 0; + + if (tp->t_fb->tfb_pru_options) { + ret = (*tp->t_fb->tfb_pru_options)(tp, flags); + } + return (ret); +} + /* * Receive out-of-band data. */ @@ -1381,6 +1410,10 @@ tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int goto out; } tp = intotcpcb(inp); + error = tcp_pru_options_support(tp, PRUS_OOB); + if (error) { + goto out; + } TCPDEBUG1(); if ((so->so_oobmark == 0 && (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) || Modified: head/sys/netinet/tcp_var.h == --- head/sys/netinet/tcp_var.h Mon May 4 18:40:56 2020(r360637) +++ head/sys/netinet/tcp_var.h Mon May 4 20:19:57 2020(r360638) @@ -345,6 +345,7 @@ struct tcp_function_block { void(*tfb_tcp_rexmit_tmr)(struct tcpcb *); int (*tfb_tcp_handoff_ok)(struct tcpcb *); void(*tfb_tcp_mtu_chg)(struct tcpcb *); + int (*tfb_pru_options)(struct tcpcb *, int); volatile uint32_t tfb_refcnt; uint32_t tfb_flags; uint8_t tfb_id; ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r360385 - head/sys/netinet
Author: rrs Date: Mon Apr 27 16:30:29 2020 New Revision: 360385 URL: https://svnweb.freebsd.org/changeset/base/360385 Log: This change does a small prepratory step in getting the latest rack and bbr in from the NF repo. When those come in the OOB data handling will be fixed where Skyzaller crashes. Differential Revision:https://reviews.freebsd.org/D24575 Modified: head/sys/netinet/tcp.h head/sys/netinet/tcp_log_buf.h head/sys/netinet/tcp_subr.c head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp.h == --- head/sys/netinet/tcp.h Mon Apr 27 16:12:32 2020(r360384) +++ head/sys/netinet/tcp.h Mon Apr 27 16:30:29 2020(r360385) @@ -181,6 +181,9 @@ struct tcphdr { #defineTCP_CONGESTION 64 /* get/set congestion control algorithm */ #defineTCP_CCALGOOPT 65 /* get/set cc algorithm specific options */ #define TCP_DELACK 72 /* socket option for delayed ack */ +#define TCP_FIN_IS_RST 73 /* A fin from the peer is treated has a RST */ +#define TCP_LOG_LIMIT 74 /* Limit to number of records in tcp-log */ +#define TCP_SHARED_CWND_ALLOWED 75 /* Use of a shared cwnd is allowed */ #defineTCP_KEEPINIT128 /* N, time to establish connection */ #defineTCP_KEEPIDLE256 /* L,N,X start keeplives after this period */ #defineTCP_KEEPINTVL 512 /* L,N interval between keepalives */ @@ -190,10 +193,11 @@ struct tcphdr { #defineTCP_PCAP_IN 4096/* number of input packets to keep */ #define TCP_FUNCTION_BLK 8192 /* Set the tcp function pointers to the specified stack */ /* Options for Rack and BBR */ +#define TCP_RACK_MBUF_QUEUE 1050 /* Do we allow mbuf queuing if supported */ #define TCP_RACK_PROP1051 /* RACK proportional rate reduction (bool) */ #define TCP_RACK_TLP_REDUCE 1052 /* RACK TLP cwnd reduction (bool) */ #define TCP_RACK_PACE_REDUCE 1053 /* RACK Pacing reduction factor (divisor) */ -#define TCP_RACK_PACE_MAX_SEG 1054 /* Max segments in a pace */ +#define TCP_RACK_PACE_MAX_SEG 1054 /* Max TSO size we will send */ #define TCP_RACK_PACE_ALWAYS 1055 /* Use the always pace method */ #define TCP_RACK_PROP_RATE1056 /* The proportional reduction rate */ #define TCP_RACK_PRR_SENDALOT 1057 /* Allow PRR to send more than one seg */ @@ -236,7 +240,7 @@ struct tcphdr { #define TCP_RACK_IDLE_REDUCE_HIGH 1092 /* Reduce the highest cwnd seen to IW on idle */ #define TCP_RACK_MIN_PACE 1093/* Do we enforce rack min pace time */ #define TCP_RACK_MIN_PACE_SEG 1094/* If so what is the seg threshould */ -#define TCP_RACK_GP_INCREASE 1094/* After 4.1 its the GP increase */ +#define TCP_RACK_GP_INCREASE 1094/* After 4.1 its the GP increase in older rack */ #define TCP_RACK_TLP_USE 1095 #define TCP_BBR_ACK_COMP_ALG 1096/* Not used */ #define TCP_BBR_TMR_PACE_OH1096/* Recycled in 4.2 */ @@ -248,7 +252,8 @@ struct tcphdr { #define TCP_BBR_PROBE_RTT_GAIN 1101 #define TCP_BBR_PROBE_RTT_LEN 1102 #define TCP_BBR_SEND_IWND_IN_TSO 1103 /* Do we burst out whole iwin size chunks at start? */ -#define TCP_BBR_USE_RACK_CHEAT 1104/* Do we use the rack cheat for pacing rxt's */ +#define TCP_BBR_USE_RACK_RR 1104 /* Do we use the rack rapid recovery for pacing rxt's */ +#define TCP_BBR_USE_RACK_CHEAT TCP_BBR_USE_RACK_RR /* Compat. */ #define TCP_BBR_HDWR_PACE 1105/* Enable/disable hardware pacing */ #define TCP_BBR_UTTER_MAX_TSO 1106/* Do we enforce an utter max TSO size */ #define TCP_BBR_EXTRA_STATE1107/* Special exit-persist catch up */ @@ -256,6 +261,24 @@ struct tcphdr { #define TCP_BBR_MIN_TOPACEOUT 1109/* Do we suspend pacing until */ #define TCP_BBR_TSTMP_RAISES 1110/* Can a timestamp measurement raise the b/w */ #define TCP_BBR_POLICER_DETECT /* Turn on/off google mode policer detection */ +#define TCP_BBR_RACK_INIT_RATE 1112/* Set an initial pacing rate for when we have no b/w in kbits per sec */ +#define TCP_RACK_RR_CONF 1113 /* Rack rapid recovery configuration control*/ +#define TCP_RACK_CHEAT_NOT_CONF_RATE TCP_RACK_RR_CONF +#define TCP_RACK_GP_INCREASE_CA 1114 /* GP increase for Congestion Avoidance */ +#define TCP_RACK_GP_INCREASE_SS 1115 /* GP increase for Slow Start */ +#define TCP_RACK_GP_INCREASE_REC 1116 /* GP increase for Recovery */ +#define TCP_RACK_FORCE_MSEG1117/* Override to use the user set max-seg value */ +#define TCP_RACK_PACE_RATE_CA 1118 /* Pacing rate for Congestion Avoidance */ +#define TCP_RACK_PACE_RATE_SS 1119 /* Pacing rate for Slow Start */ +#define TCP_RACK_PACE_RATE_REC 1120 /* Pacing rate for Recovery */ +#define TCP_NO_PRR 1122 /* If pacing, don't use prr */ +#define TCP_RACK_NONRXT_CFG_RATE 1123 /* In recovery does a non-rxt use the cfg rate */ +#define TCP_SHARED_CWND_ENABLE
svn commit: r358332 - in head/sys: net netinet
Author: rrs Date: Wed Feb 26 13:48:33 2020 New Revision: 358332 URL: https://svnweb.freebsd.org/changeset/base/358332 Log: This commit expands tcp_ratelimit to be able to handle cards like the mlx-c5 and c6 that require a "setup" routine before the tcp_ratelimit code can declare and use a rate. I add the setup routine to if_var as well as fix tcp_ratelimit to call it. I also revisit the rates so that in the case of a mlx card of type c5/6 we will use about 100 rates concentrated in the range where the most gain can be had (1-200Mbps). Note that I have tested these on a c5 and they work and perform well. In fact in an unloaded system they pace right to the correct rate (great job mlx!). There will be a further commit here from Hans that will add the respective changes to the mlx driver to support this work (which I was testing with). Sponsored by: Netflix Inc. Differential Revision:ttps://reviews.freebsd.org/D23647 Modified: head/sys/net/if_var.h head/sys/netinet/tcp_ratelimit.c head/sys/netinet/tcp_ratelimit.h Modified: head/sys/net/if_var.h == --- head/sys/net/if_var.h Wed Feb 26 13:23:52 2020(r358331) +++ head/sys/net/if_var.h Wed Feb 26 13:48:33 2020(r358332) @@ -252,6 +252,7 @@ union if_snd_tag_query_params { */ #define RT_IS_FIXED_TABLE 0x0004 /* A fixed table is attached */ #define RT_IS_UNUSABLE 0x0008/* It is not usable for this */ +#define RT_IS_SETUP_REQ 0x0010/* The interface setup must be called before use */ struct if_ratelimit_query_results { const uint64_t *rate_table; /* Pointer to table if present */ @@ -268,8 +269,8 @@ typedef int (if_snd_tag_query_t)(struct m_snd_tag *, u typedef void (if_snd_tag_free_t)(struct m_snd_tag *); typedef void (if_ratelimit_query_t)(struct ifnet *, struct if_ratelimit_query_results *); +typedef int (if_ratelimit_setup_t)(struct ifnet *, uint64_t, uint32_t); - /* * Structure defining a network interface. */ @@ -368,7 +369,7 @@ struct ifnet { if_init_fn_tif_init;/* Init routine */ int (*if_resolvemulti) /* validate/resolve multicast */ (struct ifnet *, struct sockaddr **, struct sockaddr *); - if_qflush_fn_t if_qflush; /* flush any queue */ + if_qflush_fn_t if_qflush; /* flush any queue */ if_transmit_fn_t if_transmit; /* initiate output routine */ void(*if_reassign) /* reassign to vnet routine */ @@ -411,6 +412,7 @@ struct ifnet { if_snd_tag_query_t *if_snd_tag_query; if_snd_tag_free_t *if_snd_tag_free; if_ratelimit_query_t *if_ratelimit_query; + if_ratelimit_setup_t *if_ratelimit_setup; /* Ethernet PCP */ uint8_t if_pcp; @@ -555,7 +557,7 @@ struct ifaddr { u_int ifa_refcnt; /* references to this structure */ counter_u64_t ifa_ipackets; - counter_u64_t ifa_opackets; + counter_u64_t ifa_opackets; counter_u64_t ifa_ibytes; counter_u64_t ifa_obytes; struct epoch_context ifa_epoch_ctx; @@ -769,7 +771,7 @@ void if_setstartfn(if_t ifp, void (*)(if_t)); void if_settransmitfn(if_t ifp, if_transmit_fn_t); void if_setqflushfn(if_t ifp, if_qflush_fn_t); void if_setgetcounterfn(if_t ifp, if_get_counter_t); - + /* Revisit the below. These are inline functions originally */ int drbr_inuse_drv(if_t ifp, struct buf_ring *br); struct mbuf* drbr_dequeue_drv(if_t ifp, struct buf_ring *br); Modified: head/sys/netinet/tcp_ratelimit.c == --- head/sys/netinet/tcp_ratelimit.cWed Feb 26 13:23:52 2020 (r358331) +++ head/sys/netinet/tcp_ratelimit.cWed Feb 26 13:48:33 2020 (r358332) @@ -66,45 +66,199 @@ __FBSDID("$FreeBSD$"); * For the purposes of each send, what is the size * of an ethernet frame. */ -#ifndef ETHERNET_SEGMENT_SIZE -#define ETHERNET_SEGMENT_SIZE 1500 -#endif MALLOC_DEFINE(M_TCPPACE, "tcp_hwpace", "TCP Hardware pacing memory"); #ifdef RATELIMIT +/* + * The following preferred table will seem weird to + * the casual viewer. Why do we not have any rates below + * 1Mbps? Why do we have a rate at 1.44Mbps called common? + * Why do the rates cluster in the 1-100Mbps range more + * than others? Why does the table jump around at the beginnign + * and then be more consistently raising? + * + * Let me try to answer those questions. A lot of + * this is dependant on the hardware. We have three basic + * supporters of rate limiting + * + * Chelsio - Supporting 16 configurable rates. + * Mlx - c4 supporting 13 fixed rates. + * Mlx - c5 & c6 supporting 127 configurable rates. + * + * The c4 is why we have a common rate that is available + * in all rate tables. This is a
svn commit: r357823 - head/sys/netinet
Author: rrs Date: Wed Feb 12 15:26:56 2020 New Revision: 357823 URL: https://svnweb.freebsd.org/changeset/base/357823 Log: Lets get the real correct version.. gessh. I need more coffee evidently. Sponsored by: Netflix Modified: head/sys/netinet/tcp_ratelimit.c Modified: head/sys/netinet/tcp_ratelimit.c == --- head/sys/netinet/tcp_ratelimit.cWed Feb 12 14:50:13 2020 (r357822) +++ head/sys/netinet/tcp_ratelimit.cWed Feb 12 15:26:56 2020 (r357823) @@ -49,9 +49,11 @@ __FBSDID("$FreeBSD$"); #include #include #include -#define TCPSTATES /* for logging */ +#include +#include #include #include +#define TCPSTATES /* for logging */ #include #ifdef INET6 #include @@ -284,7 +286,7 @@ rs_defer_destroy(struct tcp_rate_set *rs) /* Set flag to only defer once. */ rs->rs_flags |= RS_FUNERAL_SCHD; - epoch_call(net_epoch, >rs_epoch_ctx, rs_destroy); + NET_EPOCH_CALL(rs_destroy, >rs_epoch_ctx); } #ifdef INET @@ -878,7 +880,7 @@ rt_setup_rate(struct inpcb *inp, struct ifnet *ifp, ui struct epoch_tracker et; int err; - epoch_enter_preempt(net_epoch_preempt, ); + NET_EPOCH_ENTER(et); use_real_interface: CK_LIST_FOREACH(rs, _rs, next) { /* @@ -911,14 +913,14 @@ use_real_interface: */ if (rs->rs_disable && error) *error = ENODEV; - epoch_exit_preempt(net_epoch_preempt, ); + NET_EPOCH_EXIT(et); return (NULL); } if ((rs == NULL) || (rs->rs_disable != 0)) { if (rs->rs_disable && error) *error = ENOSPC; - epoch_exit_preempt(net_epoch_preempt, ); + NET_EPOCH_EXIT(et); return (NULL); } if (rs->rs_flags & RS_IS_DEFF) { @@ -929,7 +931,7 @@ use_real_interface: if (tifp == NULL) { if (rs->rs_disable && error) *error = ENOTSUP; - epoch_exit_preempt(net_epoch_preempt, ); + NET_EPOCH_EXIT(et); return (NULL); } goto use_real_interface; @@ -938,7 +940,7 @@ use_real_interface: ((rs->rs_flows_using + 1) > rs->rs_flow_limit)) { if (error) *error = ENOSPC; - epoch_exit_preempt(net_epoch_preempt, ); + NET_EPOCH_EXIT(et); return (NULL); } rte = tcp_find_suitable_rate(rs, bytes_per_sec, flags); @@ -962,7 +964,7 @@ use_real_interface: */ atomic_add_64(>rs_flows_using, 1); } - epoch_exit_preempt(net_epoch_preempt, ); + NET_EPOCH_EXIT(et); return (rte); } ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r357818 - in head/sys/netinet: . cc
Author: rrs Date: Wed Feb 12 13:31:36 2020 New Revision: 357818 URL: https://svnweb.freebsd.org/changeset/base/357818 Log: White space cleanup -- remove trailing tab's or spaces from any line. Sponsored by: Netflix Inc. Modified: head/sys/netinet/cc/cc_cdg.c head/sys/netinet/cc/cc_dctcp.c head/sys/netinet/cc/cc_htcp.c head/sys/netinet/icmp6.h head/sys/netinet/if_ether.c head/sys/netinet/igmp.c head/sys/netinet/in.c head/sys/netinet/in.h head/sys/netinet/in_mcast.c head/sys/netinet/in_pcb.c head/sys/netinet/in_pcb.h head/sys/netinet/in_proto.c head/sys/netinet/in_rmx.c head/sys/netinet/ip_divert.c head/sys/netinet/ip_dummynet.h head/sys/netinet/ip_fastfwd.c head/sys/netinet/ip_fw.h head/sys/netinet/ip_icmp.c head/sys/netinet/ip_id.c head/sys/netinet/ip_input.c head/sys/netinet/ip_mroute.c head/sys/netinet/ip_options.c head/sys/netinet/ip_reass.c head/sys/netinet/raw_ip.c head/sys/netinet/siftr.c head/sys/netinet/tcp.h head/sys/netinet/tcp_fastopen.c head/sys/netinet/tcp_fsm.h head/sys/netinet/tcp_input.c head/sys/netinet/tcp_log_buf.c head/sys/netinet/tcp_log_buf.h head/sys/netinet/tcp_lro.c head/sys/netinet/tcp_lro.h head/sys/netinet/tcp_output.c head/sys/netinet/tcp_ratelimit.c head/sys/netinet/tcp_ratelimit.h head/sys/netinet/tcp_reass.c head/sys/netinet/tcp_sack.c head/sys/netinet/tcp_subr.c head/sys/netinet/tcp_syncache.c head/sys/netinet/tcp_timer.c head/sys/netinet/tcp_timer.h head/sys/netinet/tcp_usrreq.c head/sys/netinet/tcp_var.h head/sys/netinet/udp.h head/sys/netinet/udp_usrreq.c head/sys/netinet/udp_var.h head/sys/netinet/udplite.h Modified: head/sys/netinet/cc/cc_cdg.c == --- head/sys/netinet/cc/cc_cdg.cWed Feb 12 13:07:09 2020 (r357817) +++ head/sys/netinet/cc/cc_cdg.cWed Feb 12 13:31:36 2020 (r357818) @@ -607,7 +607,7 @@ cdg_ack_received(struct cc_var *ccv, uint16_t ack_type congestion = prob_backoff(qdiff_max); else if (cdg_data->max_qtrend > 0) congestion = prob_backoff(cdg_data->max_qtrend); - + /* Update estimate of queue state. */ if (cdg_data->min_qtrend > 0 && cdg_data->max_qtrend <= 0) { Modified: head/sys/netinet/cc/cc_dctcp.c == --- head/sys/netinet/cc/cc_dctcp.c Wed Feb 12 13:07:09 2020 (r357817) +++ head/sys/netinet/cc/cc_dctcp.c Wed Feb 12 13:31:36 2020 (r357818) @@ -274,9 +274,9 @@ dctcp_cong_signal(struct cc_var *ccv, uint32_t type) dctcp_data->bytes_total = 0; dctcp_data->save_sndnxt = CCV(ccv, snd_nxt); } else - CCV(ccv, snd_ssthresh) = + CCV(ccv, snd_ssthresh) = max((cwin - (((uint64_t)cwin * - dctcp_data->alpha) >> (DCTCP_SHIFT+1))), + dctcp_data->alpha) >> (DCTCP_SHIFT+1))), 2 * mss); CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh); ENTER_CONGRECOVERY(CCV(ccv, t_flags)); Modified: head/sys/netinet/cc/cc_htcp.c == --- head/sys/netinet/cc/cc_htcp.c Wed Feb 12 13:07:09 2020 (r357817) +++ head/sys/netinet/cc/cc_htcp.c Wed Feb 12 13:31:36 2020 (r357818) @@ -364,7 +364,7 @@ htcp_post_recovery(struct cc_var *ccv) pipe = tcp_compute_pipe(ccv->ccvc.tcp); else pipe = CCV(ccv, snd_max) - ccv->curack; - + if (pipe < CCV(ccv, snd_ssthresh)) /* * Ensure that cwnd down not collape to 1 MSS under Modified: head/sys/netinet/icmp6.h == --- head/sys/netinet/icmp6.hWed Feb 12 13:07:09 2020(r357817) +++ head/sys/netinet/icmp6.hWed Feb 12 13:31:36 2020(r357818) @@ -344,7 +344,7 @@ struct nd_opt_mtu { /* MTU option */ #defineND_OPT_NONCE_LEN((1 * 8) - 2) #if ((ND_OPT_NONCE_LEN + 2) % 8) != 0 #error "(ND_OPT_NONCE_LEN + 2) must be a multiple of 8." -#endif +#endif struct nd_opt_nonce { /* nonce option */ u_int8_tnd_opt_nonce_type; u_int8_tnd_opt_nonce_len; @@ -607,7 +607,7 @@ struct icmp6stat { * for netinet6 code, it is already available in icp6s_outhist[].
svn commit: r357817 - head/sys/netinet
Author: rrs Date: Wed Feb 12 13:07:09 2020 New Revision: 357817 URL: https://svnweb.freebsd.org/changeset/base/357817 Log: Whitespace, remove from three files trailing white space (leftover presents from emacs). Sponsored by: Netflix Inc. Modified: head/sys/netinet/tcp_hpts.c head/sys/netinet/tcp_hpts.h head/sys/netinet/tcp_ratelimit.c Modified: head/sys/netinet/tcp_hpts.c == --- head/sys/netinet/tcp_hpts.c Wed Feb 12 13:04:19 2020(r357816) +++ head/sys/netinet/tcp_hpts.c Wed Feb 12 13:07:09 2020(r357817) @@ -33,7 +33,7 @@ __FBSDID("$FreeBSD$"); * Some notes about usage. * * The tcp_hpts system is designed to provide a high precision timer - * system for tcp. Its main purpose is to provide a mechanism for + * system for tcp. Its main purpose is to provide a mechanism for * pacing packets out onto the wire. It can be used in two ways * by a given TCP stack (and those two methods can be used simultaneously). * @@ -59,22 +59,22 @@ __FBSDID("$FreeBSD$"); * to prevent output processing until the time alotted has gone by. * Of course this is a bare bones example and the stack will probably * have more consideration then just the above. - * + * * Now the second function (actually two functions I guess :D) - * the tcp_hpts system provides is the ability to either abort - * a connection (later) or process input on a connection. + * the tcp_hpts system provides is the ability to either abort + * a connection (later) or process input on a connection. * Why would you want to do this? To keep processor locality * and or not have to worry about untangling any recursive * locks. The input function now is hooked to the new LRO - * system as well. + * system as well. * * In order to use the input redirection function the - * tcp stack must define an input function for + * tcp stack must define an input function for * tfb_do_queued_segments(). This function understands * how to dequeue a array of packets that were input and - * knows how to call the correct processing routine. + * knows how to call the correct processing routine. * - * Locking in this is important as well so most likely the + * Locking in this is important as well so most likely the * stack will need to define the tfb_do_segment_nounlock() * splitting tfb_do_segment() into two parts. The main processing * part that does not unlock the INP and returns a value of 1 or 0. @@ -83,7 +83,7 @@ __FBSDID("$FreeBSD$"); * The remains of tfb_do_segment() then become just a simple call * to the tfb_do_segment_nounlock() function and check the return * code and possibly unlock. - * + * * The stack must also set the flag on the INP that it supports this * feature i.e. INP_SUPPORTS_MBUFQ. The LRO code recoginizes * this flag as well and will queue packets when it is set. @@ -99,11 +99,11 @@ __FBSDID("$FreeBSD$"); * * There is a common functions within the rack_bbr_common code * version i.e. ctf_do_queued_segments(). This function - * knows how to take the input queue of packets from - * tp->t_in_pkts and process them digging out - * all the arguments, calling any bpf tap and + * knows how to take the input queue of packets from + * tp->t_in_pkts and process them digging out + * all the arguments, calling any bpf tap and * calling into tfb_do_segment_nounlock(). The common - * function (ctf_do_queued_segments()) requires that + * function (ctf_do_queued_segments()) requires that * you have defined the tfb_do_segment_nounlock() as * described above. * @@ -113,9 +113,9 @@ __FBSDID("$FreeBSD$"); * a stack wants to drop a connection it calls: * * tcp_set_inp_to_drop(tp, ETIMEDOUT) - * - * To schedule the tcp_hpts system to call - * + * + * To schedule the tcp_hpts system to call + * *tcp_drop(tp, drop_reason) * * at a future point. This is quite handy to prevent locking @@ -284,7 +284,7 @@ sysctl_net_inet_tcp_hpts_max_sleep(SYSCTL_HANDLER_ARGS error = sysctl_handle_int(oidp, , 0, req); if (error == 0 && req->newptr) { if ((new < (NUM_OF_HPTSI_SLOTS / 4)) || - (new > HPTS_MAX_SLEEP_ALLOWED)) + (new > HPTS_MAX_SLEEP_ALLOWED)) error = EINVAL; else hpts_sleep_max = new; @@ -311,7 +311,7 @@ tcp_hpts_log(struct tcp_hpts_entry *hpts, struct tcpcb int ticks_to_run, int idx) { union tcp_log_stackspecific log; - + memset(_bbr, 0, sizeof(log.u_bbr)); log.u_bbr.flex1 = hpts->p_nxt_slot; log.u_bbr.flex2 = hpts->p_cur_slot; @@ -616,7 +616,7 @@ tcp_hpts_remove_locked_input(struct tcp_hpts_entry *hp * Valid values in the flags are * HPTS_REMOVE_OUTPUT - remove from the output of the hpts. * HPTS_REMOVE_INPUT - remove from the input of the hpts. - * Note that you can use one or both values
svn commit: r357816 - head/sys/netinet
Author: rrs Date: Wed Feb 12 13:04:19 2020 New Revision: 357816 URL: https://svnweb.freebsd.org/changeset/base/357816 Log: This small fix makes it so we properly follow the RFC and only enable ECN when both the CWR and ECT bits our set within the SYN packet. Sponsored by: Netflix Inc. Differential Revision:https://reviews.freebsd.org/D23645 Modified: head/sys/netinet/tcp_syncache.c Modified: head/sys/netinet/tcp_syncache.c == --- head/sys/netinet/tcp_syncache.c Wed Feb 12 12:40:06 2020 (r357815) +++ head/sys/netinet/tcp_syncache.c Wed Feb 12 13:04:19 2020 (r357816) @@ -1668,7 +1668,8 @@ skip_alloc: sc->sc_peer_mss = to->to_mss; /* peer mss may be zero */ if (ltflags & TF_NOOPT) sc->sc_flags |= SCF_NOOPT; - if ((th->th_flags & (TH_ECE|TH_CWR)) && V_tcp_do_ecn) + if (((th->th_flags & (TH_ECE|TH_CWR)) == (TH_ECE|TH_CWR)) && + V_tcp_do_ecn) sc->sc_flags |= SCF_ECN; if (V_tcp_syncookies) ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r357815 - head/sys/netinet/tcp_stacks
Author: rrs Date: Wed Feb 12 12:40:06 2020 New Revision: 357815 URL: https://svnweb.freebsd.org/changeset/base/357815 Log: Remove all trailing white space from the BBR/Rack fold. Bits left around by emacs (thanks emacs). Modified: head/sys/netinet/tcp_stacks/bbr.c head/sys/netinet/tcp_stacks/rack.c head/sys/netinet/tcp_stacks/rack_bbr_common.c head/sys/netinet/tcp_stacks/rack_bbr_common.h head/sys/netinet/tcp_stacks/sack_filter.c head/sys/netinet/tcp_stacks/tcp_bbr.h head/sys/netinet/tcp_stacks/tcp_rack.h Modified: head/sys/netinet/tcp_stacks/bbr.c == --- head/sys/netinet/tcp_stacks/bbr.c Wed Feb 12 12:36:55 2020 (r357814) +++ head/sys/netinet/tcp_stacks/bbr.c Wed Feb 12 12:40:06 2020 (r357815) @@ -208,7 +208,7 @@ static int32_t bbr_min_measurements_req = 1;/* We nee * to prevent it from being ok * to have no measurements). */ static int32_t bbr_no_pacing_until = 4; - + static int32_t bbr_min_usec_delta = 2; /* 20,000 usecs */ static int32_t bbr_min_peer_delta = 20;/* 20 units */ static int32_t bbr_delta_percent = 150;/* 15.0 % */ @@ -380,9 +380,9 @@ static int32_t bbr_rto_max_sec = 4; /* 4 seconds */ static int32_t bbr_hptsi_per_second = 1000; /* - * For hptsi under bbr_cross_over connections what is delay + * For hptsi under bbr_cross_over connections what is delay * target 7ms (in usec) combined with a seg_max of 2 - * gets us close to identical google behavior in + * gets us close to identical google behavior in * TSO size selection (possibly more 1MSS sends). */ static int32_t bbr_hptsi_segments_delay_tar = 7000; @@ -596,9 +596,9 @@ activate_rxt: rsm = TAILQ_FIRST(>r_ctl.rc_tmap); if (rsm) { idx = rsm->r_rtr_cnt - 1; - if (TSTMP_GEQ(rsm->r_tim_lastsent[idx], bbr->r_ctl.rc_tlp_rxt_last_time)) + if (TSTMP_GEQ(rsm->r_tim_lastsent[idx], bbr->r_ctl.rc_tlp_rxt_last_time)) tstmp_touse = rsm->r_tim_lastsent[idx]; - else + else tstmp_touse = bbr->r_ctl.rc_tlp_rxt_last_time; if (TSTMP_GT(tstmp_touse, cts)) time_since_sent = cts - tstmp_touse; @@ -673,9 +673,9 @@ activate_rxt: } time_since_sent = 0; idx = rsm->r_rtr_cnt - 1; - if (TSTMP_GEQ(rsm->r_tim_lastsent[idx], bbr->r_ctl.rc_tlp_rxt_last_time)) + if (TSTMP_GEQ(rsm->r_tim_lastsent[idx], bbr->r_ctl.rc_tlp_rxt_last_time)) tstmp_touse = rsm->r_tim_lastsent[idx]; - else + else tstmp_touse = bbr->r_ctl.rc_tlp_rxt_last_time; if (TSTMP_GT(tstmp_touse, cts)) time_since_sent = cts - tstmp_touse; @@ -695,11 +695,11 @@ activate_rxt: } if ((bbr->rc_tlp_rtx_out == 1) && (rsm->r_start == bbr->r_ctl.rc_last_tlp_seq)) { - /* -* Second retransmit of the same TLP + /* +* Second retransmit of the same TLP * lets not. */ - bbr->rc_tlp_rtx_out = 0; + bbr->rc_tlp_rtx_out = 0; goto activate_rxt; } if (rsm->r_start != bbr->r_ctl.rc_last_tlp_seq) { @@ -766,7 +766,7 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb prev_delay = bbr->r_ctl.rc_last_delay_val; if (bbr->r_ctl.rc_last_delay_val && (slot == 0)) { - /* + /* * If a previous pacer delay was in place we * are not coming from the output side (where * we calculate a delay, more likely a timer). @@ -777,7 +777,7 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb delay_calc = cts - bbr->rc_pacer_started; if (delay_calc <= slot) slot -= delay_calc; - } + } } /* Do we have early to make up for by pushing out the pacing time? */ if (bbr->r_agg_early_set) { @@ -804,8 +804,8 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb if (bbr->rc_in_persist == 0) { delayed_ack = bbr_delack_time; } else { - /* -* We are in persists and
svn commit: r357814 - head/sys/netinet/tcp_stacks
Author: rrs Date: Wed Feb 12 12:36:55 2020 New Revision: 357814 URL: https://svnweb.freebsd.org/changeset/base/357814 Log: Now that all of the stats framework is in FreeBSD the bits that disabled stats when netflix-stats is not defined is no longer needed. Lets remove these bits so that we will properly use stats per its definition in BBR and Rack. Sponsored by: Netflix Inc Differential Revision:https://reviews.freebsd.org/D23088 Modified: head/sys/netinet/tcp_stacks/rack.c head/sys/netinet/tcp_stacks/rack_bbr_common.h Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Wed Feb 12 12:23:46 2020 (r357813) +++ head/sys/netinet/tcp_stacks/rack.c Wed Feb 12 12:36:55 2020 (r357814) @@ -1681,7 +1681,7 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *r tp->t_stats_gput_prev); tp->t_flags &= ~TF_GPUTINPROG; tp->t_stats_gput_prev = gput; - +#ifdef NETFLIX_PEAKRATE if (tp->t_maxpeakrate) { /* * We update t_peakrate_thr. This gives us roughly @@ -1689,6 +1689,7 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *r */ tcp_update_peakrate_thr(tp); } +#endif } #endif if (tp->snd_cwnd > tp->snd_ssthresh) { Modified: head/sys/netinet/tcp_stacks/rack_bbr_common.h == --- head/sys/netinet/tcp_stacks/rack_bbr_common.h Wed Feb 12 12:23:46 2020(r357813) +++ head/sys/netinet/tcp_stacks/rack_bbr_common.h Wed Feb 12 12:36:55 2020(r357814) @@ -27,11 +27,6 @@ * __FBSDID("$FreeBSD$"); */ -/* XXXLAS: Couple STATS to NETFLIX_STATS until stats(3) is fully upstreamed. */ -#ifndefNETFLIX_STATS -#undef STATS -#endif - /* Common defines and such used by both RACK and BBR */ /* Special values for mss accounting array */ #define TCP_MSS_ACCT_JUSTRET 0 ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r356417 - in head/sys/netinet: . tcp_stacks
Author: rrs Date: Mon Jan 6 15:29:14 2020 New Revision: 356417 URL: https://svnweb.freebsd.org/changeset/base/356417 Log: This catches rack up in the recent changes to ECN and also commonizes the functions that both the freebsd and rack stack uses. Sponsored by:Netflix Inc Differential Revision:https://reviews.freebsd.org/D23052 Modified: head/sys/netinet/tcp_input.c head/sys/netinet/tcp_stacks/rack.c head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cMon Jan 6 13:21:10 2020 (r356416) +++ head/sys/netinet/tcp_input.cMon Jan 6 15:29:14 2020 (r356417) @@ -514,7 +514,7 @@ cc_post_recovery(struct tcpcb *tp, struct tcphdr *th) (tlen <= tp->t_maxseg) && \ (V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN))) -static void inline +void inline cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos) { INP_WLOCK_ASSERT(tp->t_inpcb); Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Mon Jan 6 13:21:10 2020 (r356416) +++ head/sys/netinet/tcp_stacks/rack.c Mon Jan 6 15:29:14 2020 (r356417) @@ -7715,6 +7715,10 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr TCPSTAT_INC(tcps_ecn_ect1); break; } + + /* Process a packet differently from RFC3168. */ + cc_ecnpkt_handler(tp, th, iptos); + /* Congestion experienced. */ if (thflags & TH_ECE) { rack_cong_signal(tp, th, CC_ECN); Modified: head/sys/netinet/tcp_var.h == --- head/sys/netinet/tcp_var.h Mon Jan 6 13:21:10 2020(r356416) +++ head/sys/netinet/tcp_var.h Mon Jan 6 15:29:14 2020(r356417) @@ -891,6 +891,7 @@ voidcc_ack_received(struct tcpcb *tp, struct tcphdr * uint16_t nsegs, uint16_t type); void cc_conn_init(struct tcpcb *tp); void cc_post_recovery(struct tcpcb *tp, struct tcphdr *th); +voidcc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos); void cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type); #ifdef TCP_HHOOK void hhook_run_tcp_est_in(struct tcpcb *tp, ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r356414 - head/sys/netinet
Author: rrs Date: Mon Jan 6 12:48:06 2020 New Revision: 356414 URL: https://svnweb.freebsd.org/changeset/base/356414 Log: This change adds a small feature to the tcp logging code. Basically a connection can now have a separate tag added to the id. Obtained from:Lawrence Stewart Sponsored by: Netflix Inc Differential Revision:https://reviews.freebsd.org/D22866 Modified: head/sys/netinet/tcp_log_buf.c head/sys/netinet/tcp_log_buf.h head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_log_buf.c == --- head/sys/netinet/tcp_log_buf.c Mon Jan 6 10:52:13 2020 (r356413) +++ head/sys/netinet/tcp_log_buf.c Mon Jan 6 12:48:06 2020 (r356414) @@ -43,7 +43,7 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include +#include /* Must come after qmath.h and tree.h */ #include #include @@ -78,6 +78,7 @@ static u_long tcp_log_auto_ratio = 0; static volatile u_long tcp_log_auto_ratio_cur = 0; static uint32_t tcp_log_auto_mode = TCP_LOG_STATE_TAIL; static bool tcp_log_auto_all = false; +static uint32_t tcp_disable_all_bb_logs = 0; RB_PROTOTYPE_STATIC(tcp_log_id_tree, tcp_log_id_bucket, tlb_rb, tcp_log_id_cmp) @@ -111,6 +112,10 @@ SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_id_tcpc SYSCTL_U32(_net_inet_tcp_bb, OID_AUTO, log_version, CTLFLAG_RD, _log_version, 0, "Version of log formats exported"); +SYSCTL_U32(_net_inet_tcp_bb, OID_AUTO, disable_all, CTLFLAG_RW, +_disable_all_bb_logs, TCP_LOG_STATE_HEAD_AUTO, +"Disable all BB logging for all connections"); + SYSCTL_ULONG(_net_inet_tcp_bb, OID_AUTO, log_auto_ratio, CTLFLAG_RW, _log_auto_ratio, 0, "Do auto capturing for 1 out of N sessions"); @@ -156,7 +161,18 @@ SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, freed, #ifdef INVARIANTS #defineTCPLOG_DEBUG_RINGBUF #endif +/* Number of requests to consider a PBCID "active". */ +#defineACTIVE_REQUEST_COUNT10 +/* Statistic tracking for "active" PBCIDs. */ +static counter_u64_t tcp_log_pcb_ids_cur; +static counter_u64_t tcp_log_pcb_ids_tot; + +SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, pcb_ids_cur, CTLFLAG_RD, +_log_pcb_ids_cur, "Number of pcb IDs allocated in the system"); +SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, pcb_ids_tot, CTLFLAG_RD, +_log_pcb_ids_tot, "Total number of pcb IDs that have been allocated"); + struct tcp_log_mem { STAILQ_ENTRY(tcp_log_mem) tlm_queue; @@ -240,10 +256,14 @@ struct tcp_log_id_bucket * (struct tcp_log_id_bucket *) and (char *) interchangeably. */ chartlb_id[TCP_LOG_ID_LEN]; + chartlb_tag[TCP_LOG_TAG_LEN]; RB_ENTRY(tcp_log_id_bucket) tlb_rb; struct tcp_log_id_head tlb_head; struct mtx tlb_mtx; volatile u_int tlb_refcnt; + volatile u_int tlb_reqcnt; + uint32_ttlb_loglimit; + uint8_t tlb_logstate; }; struct tcp_log_id_node @@ -285,6 +305,7 @@ tcp_log_selectauto(void) * this session. */ if (tcp_log_auto_ratio && + (tcp_disable_all_bb_logs == 0) && (atomic_fetchadd_long(_log_auto_ratio_cur, 1) % tcp_log_auto_ratio) == 0) return (true); @@ -337,6 +358,7 @@ tcp_log_remove_bucket(struct tcp_log_id_bucket *tlb) #endif } TCPID_BUCKET_LOCK_DESTROY(tlb); + counter_u64_add(tcp_log_pcb_ids_cur, (int64_t)-1); uma_zfree(tcp_log_bucket_zone, tlb); } @@ -484,7 +506,53 @@ tcp_log_grow_tlb(char *tlb_id, struct tcpcb *tp) #endif } +static void +tcp_log_increment_reqcnt(struct tcp_log_id_bucket *tlb) +{ + + atomic_fetchadd_int(>tlb_reqcnt, 1); +} + /* + * Associate the specified tag with a particular TCP log ID. + * Called with INPCB locked. Returns with it unlocked. + * Returns 0 on success or EOPNOTSUPP if the connection has no TCP log ID. + */ +int +tcp_log_set_tag(struct tcpcb *tp, char *tag) +{ + struct tcp_log_id_bucket *tlb; + int tree_locked; + + INP_WLOCK_ASSERT(tp->t_inpcb); + + tree_locked = TREE_UNLOCKED; + tlb = tp->t_lib; + if (tlb == NULL) { + INP_WUNLOCK(tp->t_inpcb); + return (EOPNOTSUPP); + } + + TCPID_BUCKET_REF(tlb); + INP_WUNLOCK(tp->t_inpcb); + TCPID_BUCKET_LOCK(tlb); + strlcpy(tlb->tlb_tag, tag, TCP_LOG_TAG_LEN); + if (!tcp_log_unref_bucket(tlb, _locked, NULL)) + TCPID_BUCKET_UNLOCK(tlb); + + if (tree_locked == TREE_WLOCKED) { + TCPID_TREE_WLOCK_ASSERT(); + TCPID_TREE_WUNLOCK(); + } else if (tree_locked == TREE_RLOCKED) { + TCPID_TREE_RLOCK_ASSERT(); + TCPID_TREE_RUNLOCK(); + } else +
svn commit: r355859 - in head/sys/netinet: . tcp_stacks
Author: rrs Date: Tue Dec 17 16:08:07 2019 New Revision: 355859 URL: https://svnweb.freebsd.org/changeset/base/355859 Log: This commit is a bit of a re-arrange of deck chairs. It gets both rack and bbr ready for the completion of the STATs framework in FreeBSD. For now if you don't have both NF_stats and stats on it disables them. As soon as the rest of the stats framework lands we can remove that restriction and then just uses stats when defined. Sponsored by: Netflix Inc. Differential Revision:https://reviews.freebsd.org/D22479 Modified: head/sys/netinet/tcp_stacks/bbr.c head/sys/netinet/tcp_stacks/rack.c head/sys/netinet/tcp_stacks/rack_bbr_common.c head/sys/netinet/tcp_stacks/rack_bbr_common.h head/sys/netinet/tcp_stacks/sack_filter.c head/sys/netinet/tcp_stacks/tcp_bbr.h head/sys/netinet/tcp_stacks/tcp_rack.h head/sys/netinet/tcp_subr.c head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_stacks/bbr.c == --- head/sys/netinet/tcp_stacks/bbr.c Tue Dec 17 15:56:48 2019 (r355858) +++ head/sys/netinet/tcp_stacks/bbr.c Tue Dec 17 16:08:07 2019 (r355859) @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2016-2019 + * Copyright (c) 2016-9 * Netflix Inc. * All rights reserved. * @@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include "opt_ratelimit.h" #include "opt_kern_tls.h" #include +#include #include #include #ifdef TCP_HHOOK @@ -57,9 +58,9 @@ __FBSDID("$FreeBSD$"); #endif #include #include +#ifdef STATS #include #include -#ifdef NETFLIX_STATS #include /* Must come after qmath.h and tree.h */ #endif #include @@ -161,9 +162,8 @@ static int32_t bbr_num_pktepo_for_del_limit = BBR_NUM_ static int32_t bbr_hardware_pacing_limit = 8000; static int32_t bbr_quanta = 3; /* How much extra quanta do we get? */ static int32_t bbr_no_retran = 0; -static int32_t bbr_tcp_map_entries_limit = 1500; -static int32_t bbr_tcp_map_split_limit = 256; + static int32_t bbr_error_base_paceout = 1; /* usec to pace */ static int32_t bbr_max_net_error_cnt = 10; /* Should the following be dynamic too -- loss wise */ @@ -3381,8 +3381,8 @@ bbr_alloc(struct tcp_bbr *bbr) static struct bbr_sendmap * bbr_alloc_full_limit(struct tcp_bbr *bbr) { - if ((bbr_tcp_map_entries_limit > 0) && - (bbr->r_ctl.rc_num_maps_alloced >= bbr_tcp_map_entries_limit)) { + if ((V_tcp_map_entries_limit > 0) && + (bbr->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) { BBR_STAT_INC(bbr_alloc_limited); if (!bbr->alloc_limit_reported) { bbr->alloc_limit_reported = 1; @@ -3402,8 +3402,8 @@ bbr_alloc_limit(struct tcp_bbr *bbr, uint8_t limit_typ if (limit_type) { /* currently there is only one limit type */ - if (bbr_tcp_map_split_limit > 0 && - bbr->r_ctl.rc_num_split_allocs >= bbr_tcp_map_split_limit) { + if (V_tcp_map_split_limit > 0 && + bbr->r_ctl.rc_num_split_allocs >= V_tcp_map_split_limit) { BBR_STAT_INC(bbr_split_limited); if (!bbr->alloc_limit_reported) { bbr->alloc_limit_reported = 1; @@ -3685,7 +3685,7 @@ bbr_ack_received(struct tcpcb *tp, struct tcp_bbr *bbr uint32_t cwnd, target_cwnd, saved_bytes, maxseg; int32_t meth; -#ifdef NETFLIX_STATS +#ifdef STATS if ((tp->t_flags & TF_GPUTINPROG) && SEQ_GEQ(th->th_ack, tp->gput_ack)) { /* @@ -6510,7 +6510,7 @@ tcp_bbr_xmit_timer_commit(struct tcp_bbr *bbr, struct } TCPSTAT_INC(tcps_rttupdated); tp->t_rttupdated++; -#ifdef NETFLIX_STATS +#ifdef STATS stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, rtt_ticks)); #endif /* @@ -8490,6 +8490,7 @@ dodata: /* XXX */ return (0); } } + #endif if (DELAY_ACK(tp, bbr, nsegs) || tfo_syn) { bbr->bbr_segs_rcvd += max(1, nsegs); @@ -8698,6 +8699,7 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, * reassembly queue and we have enough buffer space to take it. */ nsegs = max(1, m->m_pkthdr.lro_nsegs); + #ifdef NETFLIX_SB_LIMITS if (so->so_rcv.sb_shlim) { mcnt = m_memcnt(m); @@ -8746,6 +8748,7 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, newsize, so, NULL)) so->so_rcv.sb_flags &= ~SB_AUTOSIZE; m_adj(m, drop_hdrlen); /* delayed header drop */ + #ifdef NETFLIX_SB_LIMITS appended = #endif @@ -11561,7 +11564,7 @@ bbr_do_segment_nounlock(struct mbuf *m, struct tcphdr * the scale is zero.
svn commit: r354013 - head/sys/netinet/tcp_stacks
Author: rrs Date: Thu Oct 24 05:54:30 2019 New Revision: 354013 URL: https://svnweb.freebsd.org/changeset/base/354013 Log: Fix a small bug in bbr when running under a VM. Basically what happens is we are more delayed in the pacer calling in so we remove the stack from the pacer and recalculate how much time is left after all data has been acknowledged. However the comparision was backwards so we end up with a negative value in the last_pacing_delay time which causes us to add in a huge value to the next pacing time thus stalling the connection. Reported by: vm2.fina...@gmail.com Modified: head/sys/netinet/tcp_stacks/bbr.c Modified: head/sys/netinet/tcp_stacks/bbr.c == --- head/sys/netinet/tcp_stacks/bbr.c Thu Oct 24 04:12:38 2019 (r354012) +++ head/sys/netinet/tcp_stacks/bbr.c Thu Oct 24 05:54:30 2019 (r354013) @@ -11814,12 +11814,13 @@ bbr_do_segment_nounlock(struct mbuf *m, struct tcphdr uint32_t del; del = lcts - bbr->rc_pacer_started; - if (del > bbr->r_ctl.rc_last_delay_val) { + if (bbr->r_ctl.rc_last_delay_val > del) { BBR_STAT_INC(bbr_force_timer_start); bbr->r_ctl.rc_last_delay_val -= del; bbr->rc_pacer_started = lcts; } else { /* We are late */ + bbr->r_ctl.rc_last_delay_val = 0; BBR_STAT_INC(bbr_force_output); (void)tp->t_fb->tfb_tcp_output(tp); } @@ -12278,8 +12279,9 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeva * We are early setup to adjust * our slot time. */ + uint64_t merged_val; + bbr->r_ctl.rc_agg_early += (bbr->r_ctl.rc_last_delay_val - delay_calc); - bbr->r_ctl.rc_last_delay_val = 0; bbr->r_agg_early_set = 1; if (bbr->r_ctl.rc_hptsi_agg_delay) { if (bbr->r_ctl.rc_hptsi_agg_delay >= bbr->r_ctl.rc_agg_early) { @@ -12292,9 +12294,13 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeva bbr->r_ctl.rc_hptsi_agg_delay = 0; } } + merged_val = bbr->rc_pacer_started; + merged_val <<= 32; + merged_val |= bbr->r_ctl.rc_last_delay_val; bbr_log_pacing_delay_calc(bbr, inp->inp_hpts_calls, -bbr->r_ctl.rc_agg_early, cts, 3, 0, +bbr->r_ctl.rc_agg_early, cts, delay_calc, merged_val, bbr->r_agg_early_set, 3); + bbr->r_ctl.rc_last_delay_val = 0; BBR_STAT_INC(bbr_early); delay_calc = 0; } ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r353490 - head/sys/netinet/tcp_stacks
Author: rrs Date: Mon Oct 14 13:10:29 2019 New Revision: 353490 URL: https://svnweb.freebsd.org/changeset/base/353490 Log: if_hw_tsomaxsegsize needs to be initialized to zero, just like in bbr.c and tcp_output.c Modified: head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Mon Oct 14 13:04:04 2019 (r353489) +++ head/sys/netinet/tcp_stacks/rack.c Mon Oct 14 13:10:29 2019 (r353490) @@ -8115,7 +8115,7 @@ rack_output(struct tcpcb *tp) struct mbuf *m; struct mbuf *mb; uint32_t if_hw_tsomaxsegcount = 0; - uint32_t if_hw_tsomaxsegsize; + uint32_t if_hw_tsomaxsegsize = 0; int32_t maxseg; long tot_len_this_send = 0; struct ip *ip = NULL; ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r353156 - in head/sys: netinet sys
Author: rrs Date: Sun Oct 6 22:29:02 2019 New Revision: 353156 URL: https://svnweb.freebsd.org/changeset/base/353156 Log: Brad Davis identified a problem with the new LRO code, VLAN's no longer worked. The problem was that the defines used the same space as the VLAN id. This commit does three things. 1) Move the LRO used fields to the PH_per fields. This is safe since the entire PH_per is used for IP reassembly which LRO code will not hit. 2) Remove old unused pace fields that are not used in mbuf.h 3) The VLAN processing is not in the mbuf queueing code. Consequently if a VLAN submits to Rack or BBR we need to bypass the mbuf queueing for now until rack_bbr_common is updated to handle the VLAN properly. Reported by: Brad Davis Modified: head/sys/netinet/tcp_lro.c head/sys/sys/mbuf.h Modified: head/sys/netinet/tcp_lro.c == --- head/sys/netinet/tcp_lro.c Sun Oct 6 22:18:03 2019(r353155) +++ head/sys/netinet/tcp_lro.c Sun Oct 6 22:29:02 2019(r353156) @@ -875,7 +875,14 @@ tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *l /* Now lets lookup the inp first */ CURVNET_SET(lc->ifp->if_vnet); - if (tcplro_stacks_wanting_mbufq == 0) + /* +* XXXRRS Currently the common input handler for +* mbuf queuing cannot handle VLAN Tagged. This needs +* to be fixed and the or condition removed (i.e. the +* common code should do the right lookup for the vlan +* tag and anything else that the vlan_input() does). +*/ + if ((tcplro_stacks_wanting_mbufq == 0) || (le->m_head->m_flags & M_VLANTAG)) goto skip_lookup; INP_INFO_RLOCK_ET(_tcbinfo, et); switch (le->eh_type) { Modified: head/sys/sys/mbuf.h == --- head/sys/sys/mbuf.h Sun Oct 6 22:18:03 2019(r353155) +++ head/sys/sys/mbuf.h Sun Oct 6 22:29:02 2019(r353156) @@ -194,18 +194,13 @@ struct pkthdr { }; #defineether_vtag PH_per.sixteen[0] #definePH_vt PH_per -#definevt_nrecssixteen[0] -#definetso_segsz PH_per.sixteen[1] -#definelro_nsegs tso_segsz -#definecsum_phsum PH_per.sixteen[2] -#definecsum_data PH_per.thirtytwo[1] -#define lro_lenPH_per.sixteen[0] /* inbound during LRO */ -#define lro_csum PH_per.sixteen[1] /* inbound during LRO */ -#define pace_thoff PH_loc.sixteen[0] -#define pace_tlen PH_loc.sixteen[1] -#define pace_drphdrlen PH_loc.sixteen[2] -#define pace_tos PH_loc.eight[6] -#define pace_lock PH_loc.eight[7] +#definevt_nrecssixteen[0]/* mld and v6-ND */ +#definetso_segsz PH_per.sixteen[1] /* inbound after LRO */ +#definelro_nsegs tso_segsz /* inbound after LRO */ +#definecsum_data PH_per.thirtytwo[1] /* inbound from hardware up */ +#define lro_lenPH_loc.sixteen[0] /* inbound during LRO (no reassembly) */ +#define lro_csum PH_loc.sixteen[1] /* inbound during LRO (no reassembly) */ +/* Note PH_loc is used during IP reassembly (all 8 bytes as a ptr) */ /* * Description of external storage mapped into mbuf; valid only if M_EXT is ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r352661 - head/sys/netinet/tcp_stacks
Author: rrs Date: Tue Sep 24 20:36:43 2019 New Revision: 352661 URL: https://svnweb.freebsd.org/changeset/base/352661 Log: lets put (void) in a couple of functions to keep older platforms that are stuck with gcc happy (ppc). The changes are needed in both bbr and rack. Obtained from:Michael Tuexen (mtuexen@) Modified: head/sys/netinet/tcp_stacks/bbr.c head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_stacks/bbr.c == --- head/sys/netinet/tcp_stacks/bbr.c Tue Sep 24 20:11:55 2019 (r352660) +++ head/sys/netinet/tcp_stacks/bbr.c Tue Sep 24 20:36:43 2019 (r352661) @@ -1174,7 +1174,7 @@ sysctl_bbr_clear_lost(SYSCTL_HANDLER_ARGS) } static void -bbr_init_sysctls() +bbr_init_sysctls(void) { struct sysctl_oid *bbr_probertt; struct sysctl_oid *bbr_hptsi; @@ -1875,7 +1875,7 @@ bbr_progress_timeout_check(struct tcp_bbr *bbr) } static void -bbr_counter_destroy() +bbr_counter_destroy(void) { COUNTER_ARRAY_FREE(bbr_stat_arry, BBR_STAT_SIZE); COUNTER_ARRAY_FREE(bbr_opts_arry, BBR_OPTS_SIZE); Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Tue Sep 24 20:11:55 2019 (r352660) +++ head/sys/netinet/tcp_stacks/rack.c Tue Sep 24 20:36:43 2019 (r352661) @@ -514,7 +514,7 @@ sysctl_rack_clear(SYSCTL_HANDLER_ARGS) static void -rack_init_sysctls() +rack_init_sysctls(void) { struct sysctl_oid *rack_counters; struct sysctl_oid *rack_attack; @@ -1512,7 +1512,7 @@ rack_log_sad(struct tcp_rack *rack, int event) #endif static void -rack_counter_destroy() +rack_counter_destroy(void) { counter_u64_free(rack_badfr); counter_u64_free(rack_badfr_bytes); ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r352657 - in head/sys: conf kern modules/tcp modules/tcp/bbr netinet netinet/tcp_stacks sys
Hmm It looks like BBR needs an update too since it calls the inpcb detach of the ratelimit function too… I may need to reassess this since it should use only the tcp_ratelimit interfaces… but for now an simple ifdef will work make sure to pick up r352660 (and actually it might be best to include ratelimit.. it costs little and makes it so if you do get a nic that supports rate limiting you will be able to take advantage of it) R > On Sep 24, 2019, at 1:06 PM, Randall Stewart wrote: > > Right > > Thats because GENERIC does not add the optional TCP stacks. > > Ok the problem is fixed with r352659 > > The tcp_ratelimit.h had a mixed up ifdef > > i.e. > > #ifdef RATELIMIT > #ifdef _KERNEL > > definitions > > #else > > macro definitions that return error > #endif > #endif > > > Which should have been the opposite > > #ifdef _KERNEL > #ifdef RATELIMIT > > definitions > > #else > > > macros def’s returning errors > > #endif > #endif > > Reversing that will fix the issue if you add the extra stacks but fail to add > RATELIMIT > > R > >> On Sep 24, 2019, at 1:01 PM, Li-Wen Hsu wrote: >> >> I mean the head (r352657) world and GENERIC kernel can be built >> successfully on 12.0-R, which is we guaranteed. >> Also the LINT kernel build is fine on CI: >> https://ci.freebsd.org/job/FreeBSD-head-amd64-LINT/13781/ >> >> So I was curious about the build environment of that build failure. >> >> Best, >> Li-Wen >> >> On Tue, Sep 24, 2019 at 9:55 PM Randall Stewart wrote: >>> >>> 12.0R would not have BBR .. its only in head… hmm it could be a issue with >>> TCP_RATELIMIT not defined >>> though I did compile GENERIC without the extra stacks (and without rate >>> limit and hpts) and that >>> compiled ok.. >>> >>> R >>> >>>> On Sep 24, 2019, at 12:49 PM, Li-Wen Hsu wrote: >>>> >>>> On Tue, Sep 24, 2019 at 9:29 PM O. Hartmann wrote: >>>>> >>>>> -BEGIN PGP SIGNED MESSAGE- >>>>> Hash: SHA256 >>>>> >>>>> Am Tue, 24 Sep 2019 18:18:11 + (UTC) >>>>> Randall Stewart schrieb: >>>>> >>>>>> Author: rrs >>>>>> Date: Tue Sep 24 18:18:11 2019 >>>>>> New Revision: 352657 >>>>>> URL: https://svnweb.freebsd.org/changeset/base/352657 >>>> >>>> ... >>>> >>>>> This break kernel builds: >>>>> >>>>> [...] >>>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:9: >>>>> error: implicit >>>>> declaration of function 'tcp_chg_pacing_rate' is invalid in C99 >>>>> [-Werror,-Wimplicit-function-declaration] nrte = >>>>> tcp_chg_pacing_rate(bbr->r_ctl.crte, ^ >>>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:9: >>>>> error: this function >>>>> declaration is not a prototype [-Werror,-Wstrict-prototypes] >>>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:7: >>>>> error: incompatible >>>>> integer to pointer conversion assigning to 'const struct >>>>> tcp_hwrate_limit_table *' from 'int' >>>>> [-Werror,-Wint-conversion] nrte = tcp_chg_pacing_rate(bbr->r_ctl.crte, ^ >>>>> --- all_subdir_toecore --- Building >>>>> /usr/obj/usr/src/amd64.amd64/sys/THOR/modules/usr/src/sys/modules/toecore/toecore.ko >>>>> --- >>>>> all_subdir_tcp --- >>>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:10443:4: >>>>> error: implicit declaration of function 'tcp_rel_pacing_rate' is invalid >>>>> in C99 >>>>> [-Werror,-Wimplicit-function-declaration] >>>>> tcp_rel_pacing_rate(bbr->r_ctl.crte, bbr->rc_tp); ^ >>>>> - --- all_subdir_tpm --- >>>>> ===> tpm (all) >>>>> - --- all_subdir_tcp --- >>>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:10443:4: >>>>> error: this function >>>>> declaration is not a prototype [-Werror,-Wstrict-prototypes] --- >>>>> all_subdir_trm --- >>>>> ===> trm (all) >>>>> - --- all_subdir_tcp --- >>>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:14307:21: >>>>> error: implicit >>>>> declaration of function 'tcp_set_pacing_rate' is invalid in C99 >>>>> [-Werror,-Wimplicit-function-declaration] bbr->r_ctl.crte = >>>>> tcp_set_pacing_rate(bbr->rc_tp, >>>> >>>> CI completed a clean build on 12.0-R: >>>> https://ci.freebsd.org/job/FreeBSD-head-amd64-build/14672/ >>>> >>>> What's your build environment and platform? >>>> >>>> Best, >>>> Li-Wen >>> >>> -- >>> Randall Stewart >>> r...@netflix.com >>> >>> >>> > > -- > Randall Stewart > r...@netflix.com > > > -- Randall Stewart r...@netflix.com ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r352660 - head/sys/netinet/tcp_stacks
Author: rrs Date: Tue Sep 24 20:11:55 2019 New Revision: 352660 URL: https://svnweb.freebsd.org/changeset/base/352660 Log: don't call in_ratelmit detach when RATELIMIT is not compiled in the kernel. Modified: head/sys/netinet/tcp_stacks/bbr.c Modified: head/sys/netinet/tcp_stacks/bbr.c == --- head/sys/netinet/tcp_stacks/bbr.c Tue Sep 24 20:04:31 2019 (r352659) +++ head/sys/netinet/tcp_stacks/bbr.c Tue Sep 24 20:11:55 2019 (r352660) @@ -14784,10 +14784,12 @@ bbr_set_sockopt(struct socket *so, struct sockopt *sop bbr->bbr_attempt_hdwr_pace = 0; } else { bbr->bbr_hdw_pace_ena = 0; +#ifdef RATELIMIT if (bbr->bbr_hdrw_pacing) { bbr->bbr_hdrw_pacing = 0; in_pcbdetach_txrtlmt(bbr->rc_inp); } +#endif } break; ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r352657 - in head/sys: conf kern modules/tcp modules/tcp/bbr netinet netinet/tcp_stacks sys
Right Thats because GENERIC does not add the optional TCP stacks. Ok the problem is fixed with r352659 The tcp_ratelimit.h had a mixed up ifdef i.e. #ifdef RATELIMIT #ifdef _KERNEL definitions #else macro definitions that return error #endif #endif Which should have been the opposite #ifdef _KERNEL #ifdef RATELIMIT definitions #else macros def’s returning errors #endif #endif Reversing that will fix the issue if you add the extra stacks but fail to add RATELIMIT R > On Sep 24, 2019, at 1:01 PM, Li-Wen Hsu wrote: > > I mean the head (r352657) world and GENERIC kernel can be built > successfully on 12.0-R, which is we guaranteed. > Also the LINT kernel build is fine on CI: > https://ci.freebsd.org/job/FreeBSD-head-amd64-LINT/13781/ > > So I was curious about the build environment of that build failure. > > Best, > Li-Wen > > On Tue, Sep 24, 2019 at 9:55 PM Randall Stewart wrote: >> >> 12.0R would not have BBR .. its only in head… hmm it could be a issue with >> TCP_RATELIMIT not defined >> though I did compile GENERIC without the extra stacks (and without rate >> limit and hpts) and that >> compiled ok.. >> >> R >> >>> On Sep 24, 2019, at 12:49 PM, Li-Wen Hsu wrote: >>> >>> On Tue, Sep 24, 2019 at 9:29 PM O. Hartmann wrote: >>>> >>>> -BEGIN PGP SIGNED MESSAGE- >>>> Hash: SHA256 >>>> >>>> Am Tue, 24 Sep 2019 18:18:11 + (UTC) >>>> Randall Stewart schrieb: >>>> >>>>> Author: rrs >>>>> Date: Tue Sep 24 18:18:11 2019 >>>>> New Revision: 352657 >>>>> URL: https://svnweb.freebsd.org/changeset/base/352657 >>> >>> ... >>> >>>> This break kernel builds: >>>> >>>> [...] >>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:9: >>>> error: implicit >>>> declaration of function 'tcp_chg_pacing_rate' is invalid in C99 >>>> [-Werror,-Wimplicit-function-declaration] nrte = >>>> tcp_chg_pacing_rate(bbr->r_ctl.crte, ^ >>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:9: >>>> error: this function >>>> declaration is not a prototype [-Werror,-Wstrict-prototypes] >>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:7: >>>> error: incompatible >>>> integer to pointer conversion assigning to 'const struct >>>> tcp_hwrate_limit_table *' from 'int' >>>> [-Werror,-Wint-conversion] nrte = tcp_chg_pacing_rate(bbr->r_ctl.crte, ^ >>>> --- all_subdir_toecore --- Building >>>> /usr/obj/usr/src/amd64.amd64/sys/THOR/modules/usr/src/sys/modules/toecore/toecore.ko >>>> --- >>>> all_subdir_tcp --- >>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:10443:4: >>>> error: implicit declaration of function 'tcp_rel_pacing_rate' is invalid >>>> in C99 >>>> [-Werror,-Wimplicit-function-declaration] >>>> tcp_rel_pacing_rate(bbr->r_ctl.crte, bbr->rc_tp); ^ >>>> - --- all_subdir_tpm --- >>>> ===> tpm (all) >>>> - --- all_subdir_tcp --- >>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:10443:4: >>>> error: this function >>>> declaration is not a prototype [-Werror,-Wstrict-prototypes] --- >>>> all_subdir_trm --- >>>> ===> trm (all) >>>> - --- all_subdir_tcp --- >>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:14307:21: >>>> error: implicit >>>> declaration of function 'tcp_set_pacing_rate' is invalid in C99 >>>> [-Werror,-Wimplicit-function-declaration] bbr->r_ctl.crte = >>>> tcp_set_pacing_rate(bbr->rc_tp, >>> >>> CI completed a clean build on 12.0-R: >>> https://ci.freebsd.org/job/FreeBSD-head-amd64-build/14672/ >>> >>> What's your build environment and platform? >>> >>> Best, >>> Li-Wen >> >> -- >> Randall Stewart >> r...@netflix.com >> >> >> -- Randall Stewart r...@netflix.com ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r352659 - head/sys/netinet
Author: rrs Date: Tue Sep 24 20:04:31 2019 New Revision: 352659 URL: https://svnweb.freebsd.org/changeset/base/352659 Log: Fix the ifdefs in tcp_ratelimit.h. They were reversed so that instead of functions only being inside the _KERNEL and the absence of RATELIMIT causing us to have NULL/error returning interfaces we ended up with non-kernel getting the error path. opps.. Modified: head/sys/netinet/tcp_ratelimit.h Modified: head/sys/netinet/tcp_ratelimit.h == --- head/sys/netinet/tcp_ratelimit.hTue Sep 24 20:01:20 2019 (r352658) +++ head/sys/netinet/tcp_ratelimit.hTue Sep 24 20:04:31 2019 (r352659) @@ -87,8 +87,8 @@ CK_LIST_HEAD(head_tcp_rate_set, tcp_rate_set); #define RS_PACING_LT 0x0008 /* Less than requested rate */ #define RS_PACING_SUB_OK 0x0010 /* If a rate can't be found get the * next best rate (highest or lowest). */ -#ifdef RATELIMIT #ifdef _KERNEL +#ifdef RATELIMIT #define DETAILED_RATELIMIT_SYSCTL 1/* * Undefine this if you don't want * detailed rates to appear in ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r352657 - in head/sys: conf kern modules/tcp modules/tcp/bbr netinet netinet/tcp_stacks sys
Ok I have found it Its a reversal in an ifdef in tcp_ratelimit.h .. it supposed to be that if its not define (RATELIMIT) the main interfaces return errors.. and the ifdef kernel/ratelimit is reversed of what it should be. Let me fix that :) R > On Sep 24, 2019, at 12:55 PM, Randall Stewart wrote: > > 12.0R would not have BBR .. its only in head… hmm it could be a issue with > TCP_RATELIMIT not defined > though I did compile GENERIC without the extra stacks (and without rate limit > and hpts) and that > compiled ok.. > > R > >> On Sep 24, 2019, at 12:49 PM, Li-Wen Hsu wrote: >> >> On Tue, Sep 24, 2019 at 9:29 PM O. Hartmann wrote: >>> >>> -BEGIN PGP SIGNED MESSAGE- >>> Hash: SHA256 >>> >>> Am Tue, 24 Sep 2019 18:18:11 + (UTC) >>> Randall Stewart schrieb: >>> >>>> Author: rrs >>>> Date: Tue Sep 24 18:18:11 2019 >>>> New Revision: 352657 >>>> URL: https://svnweb.freebsd.org/changeset/base/352657 >> >> ... >> >>> This break kernel builds: >>> >>> [...] >>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:9: >>> error: implicit >>> declaration of function 'tcp_chg_pacing_rate' is invalid in C99 >>> [-Werror,-Wimplicit-function-declaration] nrte = >>> tcp_chg_pacing_rate(bbr->r_ctl.crte, ^ >>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:9: >>> error: this function >>> declaration is not a prototype [-Werror,-Wstrict-prototypes] >>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:7: >>> error: incompatible >>> integer to pointer conversion assigning to 'const struct >>> tcp_hwrate_limit_table *' from 'int' >>> [-Werror,-Wint-conversion] nrte = tcp_chg_pacing_rate(bbr->r_ctl.crte, ^ >>> --- all_subdir_toecore --- Building >>> /usr/obj/usr/src/amd64.amd64/sys/THOR/modules/usr/src/sys/modules/toecore/toecore.ko >>> --- >>> all_subdir_tcp --- >>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:10443:4: >>> error: implicit declaration of function 'tcp_rel_pacing_rate' is invalid in >>> C99 >>> [-Werror,-Wimplicit-function-declaration] >>> tcp_rel_pacing_rate(bbr->r_ctl.crte, bbr->rc_tp); ^ >>> - --- all_subdir_tpm --- >>> ===> tpm (all) >>> - --- all_subdir_tcp --- >>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:10443:4: >>> error: this function >>> declaration is not a prototype [-Werror,-Wstrict-prototypes] --- >>> all_subdir_trm --- >>> ===> trm (all) >>> - --- all_subdir_tcp --- >>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:14307:21: >>> error: implicit >>> declaration of function 'tcp_set_pacing_rate' is invalid in C99 >>> [-Werror,-Wimplicit-function-declaration] bbr->r_ctl.crte = >>> tcp_set_pacing_rate(bbr->rc_tp, >> >> CI completed a clean build on 12.0-R: >> https://ci.freebsd.org/job/FreeBSD-head-amd64-build/14672/ >> >> What's your build environment and platform? >> >> Best, >> Li-Wen > > -- > Randall Stewart > r...@netflix.com > > > -- Randall Stewart r...@netflix.com ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r352657 - in head/sys: conf kern modules/tcp modules/tcp/bbr netinet netinet/tcp_stacks sys
12.0R would not have BBR .. its only in head… hmm it could be a issue with TCP_RATELIMIT not defined though I did compile GENERIC without the extra stacks (and without rate limit and hpts) and that compiled ok.. R > On Sep 24, 2019, at 12:49 PM, Li-Wen Hsu wrote: > > On Tue, Sep 24, 2019 at 9:29 PM O. Hartmann wrote: >> >> -BEGIN PGP SIGNED MESSAGE- >> Hash: SHA256 >> >> Am Tue, 24 Sep 2019 18:18:11 + (UTC) >> Randall Stewart schrieb: >> >>> Author: rrs >>> Date: Tue Sep 24 18:18:11 2019 >>> New Revision: 352657 >>> URL: https://svnweb.freebsd.org/changeset/base/352657 > > ... > >> This break kernel builds: >> >> [...] >> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:9: >> error: implicit >> declaration of function 'tcp_chg_pacing_rate' is invalid in C99 >> [-Werror,-Wimplicit-function-declaration] nrte = >> tcp_chg_pacing_rate(bbr->r_ctl.crte, ^ >> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:9: >> error: this function >> declaration is not a prototype [-Werror,-Wstrict-prototypes] >> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:7: >> error: incompatible >> integer to pointer conversion assigning to 'const struct >> tcp_hwrate_limit_table *' from 'int' >> [-Werror,-Wint-conversion] nrte = tcp_chg_pacing_rate(bbr->r_ctl.crte, ^ >> --- all_subdir_toecore --- Building >> /usr/obj/usr/src/amd64.amd64/sys/THOR/modules/usr/src/sys/modules/toecore/toecore.ko >> --- >> all_subdir_tcp --- >> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:10443:4: >> error: implicit declaration of function 'tcp_rel_pacing_rate' is invalid in >> C99 >> [-Werror,-Wimplicit-function-declaration] >> tcp_rel_pacing_rate(bbr->r_ctl.crte, bbr->rc_tp); ^ >> - --- all_subdir_tpm --- >> ===> tpm (all) >> - --- all_subdir_tcp --- >> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:10443:4: >> error: this function >> declaration is not a prototype [-Werror,-Wstrict-prototypes] --- >> all_subdir_trm --- >> ===> trm (all) >> - --- all_subdir_tcp --- >> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:14307:21: >> error: implicit >> declaration of function 'tcp_set_pacing_rate' is invalid in C99 >> [-Werror,-Wimplicit-function-declaration] bbr->r_ctl.crte = >> tcp_set_pacing_rate(bbr->rc_tp, > > CI completed a clean build on 12.0-R: > https://ci.freebsd.org/job/FreeBSD-head-amd64-build/14672/ > > What's your build environment and platform? > > Best, > Li-Wen -- Randall Stewart r...@netflix.com ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r352657 - in head/sys: conf kern modules/tcp modules/tcp/bbr netinet netinet/tcp_stacks sys
This is strange I built this and have it running on my machine with the standard make buildkern KERNCONF=myconf and make installkern KERNCONF=myconf Why can I build and it blow up.. last time I saw this I was building in amd64/compile and was getting a warning that somehow is an error.. but this time it *should* have built fine :( R > On Sep 24, 2019, at 12:28 PM, O. Hartmann wrote: > > -BEGIN PGP SIGNED MESSAGE- > Hash: SHA256 > > Am Tue, 24 Sep 2019 18:18:11 +0000 (UTC) > Randall Stewart schrieb: > >> Author: rrs >> Date: Tue Sep 24 18:18:11 2019 >> New Revision: 352657 >> URL: https://svnweb.freebsd.org/changeset/base/352657 >> >> Log: >> This commit adds BBR (Bottleneck Bandwidth and RTT) congestion control. This >> is a completely separate TCP stack (tcp_bbr.ko) that will be built only if >> you add the make options WITH_EXTRA_TCP_STACKS=1 and also include the option >> TCPHPTS. You can also include the RATELIMIT option if you have a NIC >> interface that >> supports hardware pacing, BBR understands how to use such a feature. >> >> Note that this commit also adds in a general purpose time-filter which >> allows you to have a min-filter or max-filter. A filter allows you to >> have a low (or high) value for some period of time and degrade slowly >> to another value has time passes. You can find out the details of >> BBR by looking at the original paper at: >> >> https://queue.acm.org/detail.cfm?id=3022184 >> >> or consult many other web resources you can find on the web >> referenced by "BBR congestion control". It should be noted that >> BBRv1 (which this is) does tend to unfairness in cases of small >> buffered paths, and it will usually get less bandwidth in the case >> of large BDP paths(when competing with new-reno or cubic flows). BBR >> is still an active research area and we do plan on implementing V2 >> of BBR to see if it is an improvement over V1. >> >> Sponsored by: Netflix Inc. >> Differential Revision: https://reviews.freebsd.org/D21582 >> >> Added: >> head/sys/kern/subr_filter.c (contents, props changed) >> head/sys/modules/tcp/bbr/ >> head/sys/modules/tcp/bbr/Makefile (contents, props changed) >> head/sys/netinet/tcp_stacks/bbr.c (contents, props changed) >> head/sys/netinet/tcp_stacks/tcp_bbr.h (contents, props changed) >> head/sys/sys/tim_filter.h (contents, props changed) >> Modified: >> head/sys/conf/files >> head/sys/modules/tcp/Makefile >> head/sys/netinet/ip_output.c >> head/sys/netinet/ip_var.h >> head/sys/netinet/tcp.h >> head/sys/netinet/tcp_stacks/rack.c >> head/sys/netinet/tcp_stacks/rack_bbr_common.c >> head/sys/netinet/tcp_stacks/rack_bbr_common.h >> head/sys/netinet/tcp_stacks/sack_filter.c >> head/sys/netinet/tcp_stacks/sack_filter.h >> head/sys/netinet/tcp_stacks/tcp_rack.h >> head/sys/sys/mbuf.h >> >> Modified: head/sys/conf/files >> == >> --- head/sys/conf/files Tue Sep 24 17:06:32 2019(r352656) >> +++ head/sys/conf/files Tue Sep 24 18:18:11 2019(r352657) >> @@ -3808,6 +3808,7 @@ kern/subr_epoch.c standard >> kern/subr_eventhandler.c standard >> kern/subr_fattime.c standard >> kern/subr_firmware.c optional firmware >> +kern/subr_filter.c standard >> kern/subr_gtaskqueue.c standard >> kern/subr_hash.c standard >> kern/subr_hints.cstandard >> >> Added: head/sys/kern/subr_filter.c >> == >> --- /dev/null00:00:00 1970 (empty, because file is newly added) >> +++ head/sys/kern/subr_filter.c Tue Sep 24 18:18:11 2019 >> (r352657) >> @@ -0,0 +1,482 @@ >> +/*- >> + * Copyright (c) 2016-2019 Netflix, Inc. >> + * All rights reserved. >> + * >> + * Redistribution and use in source and binary forms, with or without >> + * modification, are permitted provided that the following conditions >> + * are met: >> + * 1. Redistributions of source code must retain the above copyright >> + *notice, this list of conditions and the following disclaimer. >> + * 2. Redistributions in binary form must reproduce the above copyright >> + *notice, this list of conditions and the following disclaimer in the >> + *documentation and/or other materials provided with the distribution. &g
svn commit: r352657 - in head/sys: conf kern modules/tcp modules/tcp/bbr netinet netinet/tcp_stacks sys
Author: rrs Date: Tue Sep 24 18:18:11 2019 New Revision: 352657 URL: https://svnweb.freebsd.org/changeset/base/352657 Log: This commit adds BBR (Bottleneck Bandwidth and RTT) congestion control. This is a completely separate TCP stack (tcp_bbr.ko) that will be built only if you add the make options WITH_EXTRA_TCP_STACKS=1 and also include the option TCPHPTS. You can also include the RATELIMIT option if you have a NIC interface that supports hardware pacing, BBR understands how to use such a feature. Note that this commit also adds in a general purpose time-filter which allows you to have a min-filter or max-filter. A filter allows you to have a low (or high) value for some period of time and degrade slowly to another value has time passes. You can find out the details of BBR by looking at the original paper at: https://queue.acm.org/detail.cfm?id=3022184 or consult many other web resources you can find on the web referenced by "BBR congestion control". It should be noted that BBRv1 (which this is) does tend to unfairness in cases of small buffered paths, and it will usually get less bandwidth in the case of large BDP paths(when competing with new-reno or cubic flows). BBR is still an active research area and we do plan on implementing V2 of BBR to see if it is an improvement over V1. Sponsored by: Netflix Inc. Differential Revision:https://reviews.freebsd.org/D21582 Added: head/sys/kern/subr_filter.c (contents, props changed) head/sys/modules/tcp/bbr/ head/sys/modules/tcp/bbr/Makefile (contents, props changed) head/sys/netinet/tcp_stacks/bbr.c (contents, props changed) head/sys/netinet/tcp_stacks/tcp_bbr.h (contents, props changed) head/sys/sys/tim_filter.h (contents, props changed) Modified: head/sys/conf/files head/sys/modules/tcp/Makefile head/sys/netinet/ip_output.c head/sys/netinet/ip_var.h head/sys/netinet/tcp.h head/sys/netinet/tcp_stacks/rack.c head/sys/netinet/tcp_stacks/rack_bbr_common.c head/sys/netinet/tcp_stacks/rack_bbr_common.h head/sys/netinet/tcp_stacks/sack_filter.c head/sys/netinet/tcp_stacks/sack_filter.h head/sys/netinet/tcp_stacks/tcp_rack.h head/sys/sys/mbuf.h Modified: head/sys/conf/files == --- head/sys/conf/files Tue Sep 24 17:06:32 2019(r352656) +++ head/sys/conf/files Tue Sep 24 18:18:11 2019(r352657) @@ -3808,6 +3808,7 @@ kern/subr_epoch.c standard kern/subr_eventhandler.c standard kern/subr_fattime.cstandard kern/subr_firmware.c optional firmware +kern/subr_filter.c standard kern/subr_gtaskqueue.c standard kern/subr_hash.c standard kern/subr_hints.c standard Added: head/sys/kern/subr_filter.c == --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/sys/kern/subr_filter.c Tue Sep 24 18:18:11 2019(r352657) @@ -0,0 +1,482 @@ +/*- + * Copyright (c) 2016-2019 Netflix, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *notice, this list of conditions and the following disclaimer in the + *documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Author: Randall Stewart + */ +#include +__FBSDID("$FreeBSD$"); +#include +#include +#include +#include + +void +reset_time(struct time_filter *tf, uint32_t time_len) +{ + tf->cur_time_limit = time_len; +} + +void +reset_time_small(struct time_filter_small *tf, uint32_t time_len) +{ + tf->cur_time_limit = time_len; +} + +/* + * A time filter can be a filter for MIN or MAX. + * You call setup_time_filter() with the pointer to + * the filter structure, the
svn commit: r352215 - head/sys/netinet
Author: rrs Date: Wed Sep 11 15:41:36 2019 New Revision: 352215 URL: https://svnweb.freebsd.org/changeset/base/352215 Log: With the recent commit of ktls, we no longer have a sb_tls_flags, its just the sb_flags. Also the ratelimit code, now that the defintion is in sockbuf.h, does not need the ktls.h file (or its predecessor). Sponsored by: Netflix Inc Modified: head/sys/netinet/tcp_ratelimit.c Modified: head/sys/netinet/tcp_ratelimit.c == --- head/sys/netinet/tcp_ratelimit.cWed Sep 11 15:39:28 2019 (r352214) +++ head/sys/netinet/tcp_ratelimit.cWed Sep 11 15:41:36 2019 (r352215) @@ -45,9 +45,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#ifdef KERN_TLS -#include -#endif #include #include #include @@ -1069,7 +1066,7 @@ tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *if return (NULL); } #ifdef KERN_TLS - if (tp->t_inpcb->inp_socket->so_snd.sb_tls_flags & SB_TLS_IFNET) { + if (tp->t_inpcb->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) { /* * We currently can't do both TLS and hardware * pacing ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r351951 - head/sys/netinet
Author: rrs Date: Fri Sep 6 18:29:48 2019 New Revision: 351951 URL: https://svnweb.freebsd.org/changeset/base/351951 Log: This adds in the missing counter initialization which I had forgotten to bring over.. opps. Differential Revision: https://reviews.freebsd.org/D21127 Modified: head/sys/netinet/tcp_subr.c head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_subr.c == --- head/sys/netinet/tcp_subr.c Fri Sep 6 18:25:42 2019(r351950) +++ head/sys/netinet/tcp_subr.c Fri Sep 6 18:29:48 2019(r351951) @@ -1125,6 +1125,13 @@ tcp_init(void) SHUTDOWN_PRI_DEFAULT); EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL, EVENTHANDLER_PRI_ANY); + + tcp_inp_lro_direct_queue = counter_u64_alloc(M_WAITOK); + tcp_inp_lro_wokeup_queue = counter_u64_alloc(M_WAITOK); + tcp_inp_lro_compressed = counter_u64_alloc(M_WAITOK); + tcp_inp_lro_single_push = counter_u64_alloc(M_WAITOK); + tcp_inp_lro_locks_taken = counter_u64_alloc(M_WAITOK); + tcp_inp_lro_sack_wake = counter_u64_alloc(M_WAITOK); #ifdef TCPPCAP tcp_pcap_init(); #endif Modified: head/sys/netinet/tcp_var.h == --- head/sys/netinet/tcp_var.h Fri Sep 6 18:25:42 2019(r351950) +++ head/sys/netinet/tcp_var.h Fri Sep 6 18:29:48 2019(r351951) @@ -887,6 +887,13 @@ struct tcp_function_block * find_and_ref_tcp_fb(struct tcp_function_block *fs); int tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp); +extern counter_u64_t tcp_inp_lro_direct_queue; +extern counter_u64_t tcp_inp_lro_wokeup_queue; +extern counter_u64_t tcp_inp_lro_compressed; +extern counter_u64_t tcp_inp_lro_single_push; +extern counter_u64_t tcp_inp_lro_locks_taken; +extern counter_u64_t tcp_inp_lro_sack_wake; + uint32_t tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *); uint32_t tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *); u_int tcp_maxseg(const struct tcpcb *); ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r351934 - in head/sys: netinet netinet/tcp_stacks sys
Author: rrs Date: Fri Sep 6 14:25:41 2019 New Revision: 351934 URL: https://svnweb.freebsd.org/changeset/base/351934 Log: This adds the final tweaks to LRO that will now allow me to add BBR. These changes make it so you can get an array of timestamps instead of a compressed ack/data segment. BBR uses this to aid with its delivery estimates. We also now (via Drew's suggestions) will not go to the expense of the tcb lookup if no stack registers to want this feature. If HPTS is not present the feature is not present either and you just get the compressed behavior. Sponsored by: Netflix Inc Differential Revision: https://reviews.freebsd.org/D21127 Modified: head/sys/netinet/tcp_lro.c head/sys/netinet/tcp_lro.h head/sys/netinet/tcp_stacks/rack_bbr_common.c head/sys/sys/mbuf.h Modified: head/sys/netinet/tcp_lro.c == --- head/sys/netinet/tcp_lro.c Fri Sep 6 12:29:51 2019(r351933) +++ head/sys/netinet/tcp_lro.c Fri Sep 6 14:25:41 2019(r351934) @@ -44,6 +44,8 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#include #include #include @@ -56,11 +58,14 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#include #include #include #include #include - +#include +#include #include #include @@ -79,11 +84,47 @@ static int tcp_lro_rx2(struct lro_ctrl *lc, struct mbu SYSCTL_NODE(_net_inet_tcp, OID_AUTO, lro, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "TCP LRO"); +static long tcplro_stacks_wanting_mbufq = 0; +counter_u64_t tcp_inp_lro_direct_queue; +counter_u64_t tcp_inp_lro_wokeup_queue; +counter_u64_t tcp_inp_lro_compressed; +counter_u64_t tcp_inp_lro_single_push; +counter_u64_t tcp_inp_lro_locks_taken; +counter_u64_t tcp_inp_lro_sack_wake; + static unsignedtcp_lro_entries = TCP_LRO_ENTRIES; +static int32_t hold_lock_over_compress = 0; +SYSCTL_INT(_net_inet_tcp_lro, OID_AUTO, hold_lock, CTLFLAG_RW, +_lock_over_compress, 0, +"Do we hold the lock over the compress of mbufs?"); SYSCTL_UINT(_net_inet_tcp_lro, OID_AUTO, entries, CTLFLAG_RDTUN | CTLFLAG_MPSAFE, _lro_entries, 0, "default number of LRO entries"); +SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, fullqueue, CTLFLAG_RD, +_inp_lro_direct_queue, "Number of lro's fully queued to transport"); +SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, wokeup, CTLFLAG_RD, +_inp_lro_wokeup_queue, "Number of lro's where we woke up transport via hpts"); +SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, compressed, CTLFLAG_RD, +_inp_lro_compressed, "Number of lro's compressed and sent to transport"); +SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, single, CTLFLAG_RD, +_inp_lro_single_push, "Number of lro's sent with single segment"); +SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, lockcnt, CTLFLAG_RD, +_inp_lro_locks_taken, "Number of lro's inp_wlocks taken"); +SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, sackwakeups, CTLFLAG_RD, +_inp_lro_sack_wake, "Number of wakeups caused by sack/fin"); +void +tcp_lro_reg_mbufq(void) +{ + atomic_fetchadd_long(_stacks_wanting_mbufq, 1); +} + +void +tcp_lro_dereg_mbufq(void) +{ + atomic_fetchadd_long(_stacks_wanting_mbufq, -1); +} + static __inline void tcp_lro_active_insert(struct lro_ctrl *lc, struct lro_head *bucket, struct lro_entry *le) @@ -162,6 +203,36 @@ tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *i return (0); } +static struct tcphdr * +tcp_lro_get_th(struct lro_entry *le, struct mbuf *m) +{ + struct ether_header *eh; + struct tcphdr *th = NULL; +#ifdef INET6 + struct ip6_hdr *ip6 = NULL; /* Keep compiler happy. */ +#endif +#ifdef INET + struct ip *ip4 = NULL; /* Keep compiler happy. */ +#endif + + eh = mtod(m, struct ether_header *); + switch (le->eh_type) { +#ifdef INET6 + case ETHERTYPE_IPV6: + ip6 = (struct ip6_hdr *)(eh + 1); + th = (struct tcphdr *)(ip6 + 1); + break; +#endif +#ifdef INET + case ETHERTYPE_IP: + ip4 = (struct ip *)(eh + 1); + th = (struct tcphdr *)(ip4 + 1); + break; +#endif + } + return (th); +} + void tcp_lro_free(struct lro_ctrl *lc) { @@ -192,7 +263,6 @@ tcp_lro_free(struct lro_ctrl *lc) lc->lro_mbuf_data = NULL; } -#ifdef TCP_LRO_UPDATE_CSUM static uint16_t tcp_lro_csum_th(struct tcphdr *th) { @@ -275,7 +345,6 @@ tcp_lro_rx_csum_fixup(struct lro_entry *le, void *l3hd return (c & 0x); } -#endif static void tcp_lro_rx_done(struct lro_ctrl *lc) @@ -297,7 +366,7 @@ tcp_lro_flush_inactive(struct lro_ctrl *lc, const stru if (LIST_EMPTY(>lro_active)) return; - getmicrotime(); + getmicrouptime(); timevalsub(, timeout); LIST_FOREACH_SAFE(le, >lro_active, next, le_tmp) { if
svn commit: r346094 - head/sys/netinet
Author: rrs Date: Wed Apr 10 18:58:11 2019 New Revision: 346094 URL: https://svnweb.freebsd.org/changeset/base/346094 Log: Fix a small bug in the tcp_log_id where the bucket was unlocked and yet the bucket-unlock flag was not changed to false. This can cause a panic if INVARIANTS is on and we go through the right path (though rare). This fixes the correct bug :) Reported by: syzbot+179a1ad49f3c4c215...@syzkaller.appspotmail.com Reviewed by: tuexen@ Modified: head/sys/netinet/tcp_log_buf.c Modified: head/sys/netinet/tcp_log_buf.c == --- head/sys/netinet/tcp_log_buf.c Wed Apr 10 18:17:27 2019 (r346093) +++ head/sys/netinet/tcp_log_buf.c Wed Apr 10 18:58:11 2019 (r346094) @@ -752,6 +752,7 @@ refind: RECHECK_INP(); if (tp->t_lib != NULL) { TCPID_BUCKET_UNLOCK(tlb); + bucket_locked = false; tlb = NULL; goto restart; } ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r345851 - head/sys/netinet
Author: rrs Date: Wed Apr 3 19:35:07 2019 New Revision: 345851 URL: https://svnweb.freebsd.org/changeset/base/345851 Log: Undo my previous erroneous commit changing the tcp_output kassert. Hmm now the question is where did the tcp_log_id change go :o Modified: head/sys/netinet/tcp_output.c Modified: head/sys/netinet/tcp_output.c == --- head/sys/netinet/tcp_output.c Wed Apr 3 18:35:13 2019 (r345850) +++ head/sys/netinet/tcp_output.c Wed Apr 3 19:35:07 2019 (r345851) @@ -138,8 +138,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto_lowat * non-ACK. */ #define TCP_XMIT_TIMER_ASSERT(tp, len, th_flags) \ - KASSERT(((len) == 0 && ((th_flags) &\ - (TH_SYN | TH_FIN | TH_RST)) != 0) ||\ + KASSERT(((len) == 0 && ((th_flags) & (TH_SYN | TH_FIN)) == 0) ||\ tcp_timer_active((tp), TT_REXMT) || \ tcp_timer_active((tp), TT_PERSIST), \ ("neither rexmt nor persist timer is set")) ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r351328 - head/sys/netinet/tcp_stacks
Author: rrs Date: Wed Aug 21 10:45:28 2019 New Revision: 351328 URL: https://svnweb.freebsd.org/changeset/base/351328 Log: Fix an issue when TSO and Rack play together. Basically an retransmission of the initial SYN (with data) would cause us to strip the SYN and decrement/increase offset/len which then caused us a -1 offset and a panic. Reported by: Larry Rosenman (Michael Tuexen helped me debug this at the IETF) Modified: head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Wed Aug 21 10:42:31 2019 (r351327) +++ head/sys/netinet/tcp_stacks/rack.c Wed Aug 21 10:45:28 2019 (r351328) @@ -7405,9 +7405,6 @@ again: (tp->t_state == TCPS_SYN_RECEIVED)) flags &= ~TH_SYN; #endif - sb_offset--, len++; - if (sbavail(sb) == 0) - len = 0; } /* * Be careful not to send data and/or FIN on SYN segments. This ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r350973 - head/sys/netinet/tcp_stacks
Author: rrs Date: Tue Aug 13 12:41:15 2019 New Revision: 350973 URL: https://svnweb.freebsd.org/changeset/base/350973 Log: Place back in the dependency on HPTS via module depends versus a fatal error in compiling. This was taken out by mistake when I mis-merged from the 18q22p2 sources of rack in NF. Opps. Reported by: sbruno Modified: head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Tue Aug 13 04:54:02 2019 (r350972) +++ head/sys/netinet/tcp_stacks/rack.c Tue Aug 13 12:41:15 2019 (r350973) @@ -128,10 +128,6 @@ uma_zone_t rack_pcb_zone; struct sysctl_ctx_list rack_sysctl_ctx; struct sysctl_oid *rack_sysctl_root; -#ifndef TCPHPTS -#error "fatal error missing option TCPHSTS in the build" -#endif - #define CUM_ACKED 1 #define SACKED 2 @@ -9212,3 +9208,4 @@ static moduledata_t tcp_rack = { MODULE_VERSION(MODNAME, 1); DECLARE_MODULE(MODNAME, tcp_rack, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY); +MODULE_DEPEND(MODNAME, tcphpts, 1, 1, 1); ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r350537 - head/sys/netinet
Author: rrs Date: Fri Aug 2 11:17:07 2019 New Revision: 350537 URL: https://svnweb.freebsd.org/changeset/base/350537 Log: Fix one more atomic for i86 Obtained from:mtue...@freebsd.org Modified: head/sys/netinet/tcp_ratelimit.c Modified: head/sys/netinet/tcp_ratelimit.c == --- head/sys/netinet/tcp_ratelimit.cFri Aug 2 11:05:00 2019 (r350536) +++ head/sys/netinet/tcp_ratelimit.cFri Aug 2 11:17:07 2019 (r350537) @@ -945,7 +945,7 @@ use_real_interface: * We use an atomic here for accounting so we don't have to * use locks when freeing. */ - atomic_add_long(>rs_flows_using, 1); + atomic_add_64(>rs_flows_using, 1); } epoch_exit_preempt(net_epoch_preempt, ); return (rte); ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r350521 - head/sys/netinet
Author: rrs Date: Thu Aug 1 20:26:27 2019 New Revision: 350521 URL: https://svnweb.freebsd.org/changeset/base/350521 Log: Opps use fetchadd_u64 not long to keep old 32 bit platforms happy. Modified: head/sys/netinet/tcp_ratelimit.c Modified: head/sys/netinet/tcp_ratelimit.c == --- head/sys/netinet/tcp_ratelimit.cThu Aug 1 19:45:34 2019 (r350520) +++ head/sys/netinet/tcp_ratelimit.cThu Aug 1 20:26:27 2019 (r350521) @@ -1186,7 +1186,7 @@ tcp_rel_pacing_rate(const struct tcp_hwrate_limit_tabl * in order to release our refcount. */ rs = __DECONST(struct tcp_rate_set *, crs); - pre = atomic_fetchadd_long(>rs_flows_using, -1); + pre = atomic_fetchadd_64(>rs_flows_using, -1); if (pre == 1) { mtx_lock(_mtx); /* ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r350501 - in head/sys: conf dev/cxgbe dev/mlx5/mlx5_en net netinet
*/ + if (!INP_TRY_UPGRADE(inp)) + return; + did_upgrade = 1; + } else { + did_upgrade = 0; + } + + /* +* NOTE: The so_max_pacing_rate value is read unlocked, +* because atomic updates are not required since the variable +* is checked at every mbuf we send. It is assumed that the +* variable read itself will be atomic. +*/ + max_pacing_rate = socket->so_max_pacing_rate; + + error = in_pcboutput_txrtlmt_locked(inp, ifp, mb, max_pacing_rate); + if (did_upgrade) INP_DOWNGRADE(inp); } @@ -3424,4 +3478,14 @@ in_pcboutput_eagain(struct inpcb *inp) if (did_upgrade) INP_DOWNGRADE(inp); } + +static void +rl_init(void *st) +{ + rate_limit_active = counter_u64_alloc(M_WAITOK); + rate_limit_alloc_fail = counter_u64_alloc(M_WAITOK); + rate_limit_set_ok = counter_u64_alloc(M_WAITOK); +} + +SYSINIT(rl, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, rl_init, NULL); #endif /* RATELIMIT */ Modified: head/sys/netinet/in_pcb.h == --- head/sys/netinet/in_pcb.h Thu Aug 1 14:13:04 2019(r350500) +++ head/sys/netinet/in_pcb.h Thu Aug 1 14:17:31 2019(r350501) @@ -883,8 +883,13 @@ struct sockaddr * in_sockaddr(in_port_t port, struct in_addr *addr); void in_pcbsosetlabel(struct socket *so); #ifdef RATELIMIT -intin_pcbattach_txrtlmt(struct inpcb *, struct ifnet *, uint32_t, uint32_t, uint32_t); +int +in_pcboutput_txrtlmt_locked(struct inpcb *, struct ifnet *, + struct mbuf *, uint32_t); +intin_pcbattach_txrtlmt(struct inpcb *, struct ifnet *, uint32_t, uint32_t, + uint32_t, struct m_snd_tag **); void in_pcbdetach_txrtlmt(struct inpcb *); +voidin_pcbdetach_tag(struct ifnet *ifp, struct m_snd_tag *mst); intin_pcbmodify_txrtlmt(struct inpcb *, uint32_t); intin_pcbquery_txrtlmt(struct inpcb *, uint32_t *); intin_pcbquery_txrlevel(struct inpcb *, uint32_t *); Added: head/sys/netinet/tcp_ratelimit.c == --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/sys/netinet/tcp_ratelimit.cThu Aug 1 14:17:31 2019 (r350501) @@ -0,0 +1,1234 @@ +/*- + * + * SPDX-License-Identifier: BSD-3-Clause + * + * Copyright (c) 2018-2019 + * Netflix Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *notice, this list of conditions and the following disclaimer in the + *documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ +/** + * Author: Randall Stewart + */ + +#include +__FBSDID("$FreeBSD$"); +#include "opt_inet.h" +#include "opt_inet6.h" +#include "opt_ipsec.h" +#include "opt_tcpdebug.h" +#include "opt_ratelimit.h" +#include +#include +#include +#include +#include +#include +#ifdef KERN_TLS +#include +#endif +#include +#include +#include +#include +#define TCPSTATES /* for logging */ +#include +#include +#include +#ifdef INET6 +#include +#endif +#include +#ifndef USECS_IN_SECOND +#define USECS_IN_SECOND 100 +#endif +/* + * For the purposes of each send, what is the size + * of an ethernet frame. + */ +#ifndef ETHERNET_SEGMENT_SIZE +#define ETHERNET_SEGMENT_SIZE 1500 +#endif +MALLOC_DEFINE(M_TCPPACE, "tcp_hwpace", "TCP Hardware pacing memory"); +#ifdef RATELIMIT + +#define COMMON_RATE 180500 +uint64_t desired_rates[] = { + 62500, /* 500Kbps */ + 180500, /* 1.44Mpbs */ + 375000, /* 3Mbps */ + 50, /* 4Mbps */
svn commit: r349987 - in head/sys/netinet: . tcp_stacks
Author: rrs Date: Sun Jul 14 16:05:47 2019 New Revision: 349987 URL: https://svnweb.freebsd.org/changeset/base/349987 Log: This is the second in a number of patches needed to get BBRv1 into the tree. This fixes the DSACK bug but is also needed by BBR. We have yet to go two more one will be for the pacing code (tcp_ratelimit.c) and the second will be for the new updated LRO code that allows a transport to know the arrival times of packets and (tcp_lro.c). After that we should finally be able to get BBRv1 into head. Sponsored by: Netflix Inc Differential Revision:https://reviews.freebsd.org/D20908 Modified: head/sys/netinet/tcp_output.c head/sys/netinet/tcp_sack.c head/sys/netinet/tcp_stacks/rack.c head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_output.c == --- head/sys/netinet/tcp_output.c Sun Jul 14 12:04:39 2019 (r349986) +++ head/sys/netinet/tcp_output.c Sun Jul 14 16:05:47 2019 (r349987) @@ -1508,7 +1508,13 @@ timer: if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max)) tp->snd_max = tp->snd_nxt + xlen; } - + if ((error == 0) && + (TCPS_HAVEESTABLISHED(tp->t_state) && +(tp->t_flags & TF_SACK_PERMIT) && +tp->rcv_numsacks > 0)) { + /* Clean up any DSACK's sent */ + tcp_clean_dsack_blocks(tp); + } if (error) { /* Record the error. */ TCP_LOG_EVENT(tp, NULL, >so_rcv, >so_snd, TCP_LOG_OUT, Modified: head/sys/netinet/tcp_sack.c == --- head/sys/netinet/tcp_sack.c Sun Jul 14 12:04:39 2019(r349986) +++ head/sys/netinet/tcp_sack.c Sun Jul 14 16:05:47 2019(r349987) @@ -279,6 +279,45 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_sta tp->rcv_numsacks = num_head + num_saved; } +void +tcp_clean_dsack_blocks(struct tcpcb *tp) +{ + struct sackblk saved_blks[MAX_SACK_BLKS]; + int num_saved, i; + + INP_WLOCK_ASSERT(tp->t_inpcb); + /* +* Clean up any DSACK blocks that +* are in our queue of sack blocks. +* +*/ + num_saved = 0; + for (i = 0; i < tp->rcv_numsacks; i++) { + tcp_seq start = tp->sackblks[i].start; + tcp_seq end = tp->sackblks[i].end; + if (SEQ_GEQ(start, end) || SEQ_LEQ(start, tp->rcv_nxt)) { + /* +* Discard this D-SACK block. +*/ + continue; + } + /* +* Save this SACK block. +*/ + saved_blks[num_saved].start = start; + saved_blks[num_saved].end = end; + num_saved++; + } + if (num_saved > 0) { + /* +* Copy the saved SACK blocks back. +*/ + bcopy(saved_blks, >sackblks[0], + sizeof(struct sackblk) * num_saved); + } + tp->rcv_numsacks = num_saved; +} + /* * Delete all receiver-side SACK information. */ Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Sun Jul 14 12:04:39 2019 (r349986) +++ head/sys/netinet/tcp_stacks/rack.c Sun Jul 14 16:05:47 2019 (r349987) @@ -5087,9 +5087,8 @@ rack_do_fastnewdata(struct mbuf *m, struct tcphdr *th, /* Clean receiver SACK report if present */ -/* if (tp->rcv_numsacks) + if (tp->rcv_numsacks) tcp_clean_sackreport(tp); -*/ TCPSTAT_INC(tcps_preddat); tp->rcv_nxt += tlen; /* @@ -8537,10 +8536,10 @@ out: * retransmit. In persist state, just set snd_max. */ if (error == 0) { -/* if (TCPS_HAVEESTABLISHED(tp->t_state) && + if (TCPS_HAVEESTABLISHED(tp->t_state) && (tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0) - tcp_clean_dsack_blocks(tp);*/ + tcp_clean_dsack_blocks(tp); if (len == 0) counter_u64_add(rack_out_size[TCP_MSS_ACCT_SNDACK], 1); else if (len == 1) { Modified: head/sys/netinet/tcp_var.h == --- head/sys/netinet/tcp_var.h Sun Jul 14 12:04:39 2019(r349986) +++ head/sys/netinet/tcp_var.h Sun Jul 14 16:05:47 2019(r349987) @@ -939,6 +939,7 @@ tcp_seq tcp_new_isn(struct in_conninfo *); int tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq); voidtcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq rcv_lastend); +void
svn commit: r349942 - head/sys/netinet/tcp_stacks
Author: rrs Date: Fri Jul 12 11:45:42 2019 New Revision: 349942 URL: https://svnweb.freebsd.org/changeset/base/349942 Log: add back the comment around the pending DSACK fixes. Modified: head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Fri Jul 12 09:59:21 2019 (r349941) +++ head/sys/netinet/tcp_stacks/rack.c Fri Jul 12 11:45:42 2019 (r349942) @@ -8537,10 +8537,10 @@ out: * retransmit. In persist state, just set snd_max. */ if (error == 0) { - if (TCPS_HAVEESTABLISHED(tp->t_state) && +/* if (TCPS_HAVEESTABLISHED(tp->t_state) && (tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0) - tcp_clean_dsack_blocks(tp); + tcp_clean_dsack_blocks(tp);*/ if (len == 0) counter_u64_add(rack_out_size[TCP_MSS_ACCT_SNDACK], 1); else if (len == 1) { ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r349907 - head/sys/netinet/tcp_stacks
opps.. that was a error on my part I will fix it :) > On Jul 11, 2019, at 4:37 AM, Enji Cooper wrote: > > >> On Jul 10, 2019, at 9:38 PM, Randall Stewart wrote: >> >> Author: rrs >> Date: Thu Jul 11 04:38:33 2019 >> New Revision: 349907 >> URL: https://svnweb.freebsd.org/changeset/base/349907 >> >> Log: >> Update copyright per JBH's suggestions.. thanks. >> >> Modified: >> head/sys/netinet/tcp_stacks/rack.c >> >> Modified: head/sys/netinet/tcp_stacks/rack.c >> == >> --- head/sys/netinet/tcp_stacks/rack.c Thu Jul 11 03:29:25 2019 >> (r349906) >> +++ head/sys/netinet/tcp_stacks/rack.c Thu Jul 11 04:38:33 2019 >> (r349907) >> @@ -1,5 +1,5 @@ >> /*- >> - * Copyright (c) 2016 >> + * Copyright (c) 2016-2019 >> * Netflix Inc. All rights reserved. >> * >> * Redistribution and use in source and binary forms, with or without >> @@ -8537,10 +8537,10 @@ out: >> * retransmit. In persist state, just set snd_max. >> */ >> if (error == 0) { >> -/* if (TCPS_HAVEESTABLISHED(tp->t_state) && >> +if (TCPS_HAVEESTABLISHED(tp->t_state) && >> (tp->t_flags & TF_SACK_PERMIT) && >> tp->rcv_numsacks > 0) >> - tcp_clean_dsack_blocks(tp);*/ >> +tcp_clean_dsack_blocks(tp); > > Removing this commented out code unfortunately broke the build: > https://ci.freebsd.org/job/FreeBSD-head-amd64-LINT/12934/console . > Thanks, > -Enji > -- Randall Stewart r...@netflix.com ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r349893 - in head/sys: modules/tcp/rack netinet netinet/tcp_stacks sys
John: Thanks for the suggestions.. I have committed changes to the two nits. As to M_PROTO1, I see that in the NF world we have removed M_PROTO12 and moved the M_PROTO’s up 1 i.e. M_PROTO1 == 0x2000 So for now it is safe, since the M_TSTMP_LRO is not yet used.. but in my up and coming commits I will have to address this i.e. either do the same thing or just make it use M_PROTO12. There are a couple of places M_PROTO1 is used on the receive path so that would not work there :o After I get the DSACK fixes in my next change to get BBR in will be the LRO work… So maybe I should just settle on using M_PROTO12 for that what do you think? R > On Jul 10, 2019, at 7:28 PM, John Baldwin wrote: > > On 7/10/19 1:40 PM, Randall Stewart wrote: >> Author: rrs >> Date: Wed Jul 10 20:40:39 2019 >> New Revision: 349893 >> URL: https://svnweb.freebsd.org/changeset/base/349893 >> >> Log: >> This commit updates rack to what is basically being used at NF as >> well as sets in some of the groundwork for committing BBR. The >> hpts system is updated as well as some other needed utilities >> for the entrance of BBR. This is actually part 1 of 3 more >> needed commits which will finally complete with BBRv1 being >> added as a new tcp stack. >> >> Sponsored by: Netflix Inc. >> Differential Revision: https://reviews.freebsd.org/D20834 > > Is it safe for M_TSTMP_LRO to conflict with M_PROTO1? > > Also, it seems you changed the copyright range on rack.c from > 2016-2019 to just 2016 which I suspect is an accident. > > I would suggest using #error here: > > #ifndef TCPHPTS > fatal error missing option TCPHSTS in the build; > #endif > > -- > John Baldwin -- Randall Stewart r...@netflix.com ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r349908 - head/sys/netinet/tcp_stacks
Author: rrs Date: Thu Jul 11 04:40:58 2019 New Revision: 349908 URL: https://svnweb.freebsd.org/changeset/base/349908 Log: Update to jhb's other suggestion, use #error when we are missing HPTS. Modified: head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Thu Jul 11 04:38:33 2019 (r349907) +++ head/sys/netinet/tcp_stacks/rack.c Thu Jul 11 04:40:58 2019 (r349908) @@ -129,7 +129,7 @@ struct sysctl_ctx_list rack_sysctl_ctx; struct sysctl_oid *rack_sysctl_root; #ifndef TCPHPTS -fatal error missing option TCPHSTS in the build; +#error "fatal error missing option TCPHSTS in the build" #endif #define CUM_ACKED 1 ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r349907 - head/sys/netinet/tcp_stacks
Author: rrs Date: Thu Jul 11 04:38:33 2019 New Revision: 349907 URL: https://svnweb.freebsd.org/changeset/base/349907 Log: Update copyright per JBH's suggestions.. thanks. Modified: head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Thu Jul 11 03:29:25 2019 (r349906) +++ head/sys/netinet/tcp_stacks/rack.c Thu Jul 11 04:38:33 2019 (r349907) @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2016 + * Copyright (c) 2016-2019 * Netflix Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -8537,10 +8537,10 @@ out: * retransmit. In persist state, just set snd_max. */ if (error == 0) { -/* if (TCPS_HAVEESTABLISHED(tp->t_state) && + if (TCPS_HAVEESTABLISHED(tp->t_state) && (tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0) - tcp_clean_dsack_blocks(tp);*/ + tcp_clean_dsack_blocks(tp); if (len == 0) counter_u64_add(rack_out_size[TCP_MSS_ACCT_SNDACK], 1); else if (len == 1) { ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r349893 - in head/sys: modules/tcp/rack netinet netinet/tcp_stacks sys
Author: rrs Date: Wed Jul 10 20:40:39 2019 New Revision: 349893 URL: https://svnweb.freebsd.org/changeset/base/349893 Log: This commit updates rack to what is basically being used at NF as well as sets in some of the groundwork for committing BBR. The hpts system is updated as well as some other needed utilities for the entrance of BBR. This is actually part 1 of 3 more needed commits which will finally complete with BBRv1 being added as a new tcp stack. Sponsored by: Netflix Inc. Differential Revision:https://reviews.freebsd.org/D20834 Added: head/sys/netinet/tcp_stacks/rack_bbr_common.c (contents, props changed) Modified: head/sys/modules/tcp/rack/Makefile head/sys/netinet/in_pcb.h head/sys/netinet/tcp.h head/sys/netinet/tcp_hpts.c head/sys/netinet/tcp_hpts.h head/sys/netinet/tcp_log_buf.h head/sys/netinet/tcp_stacks/rack.c head/sys/netinet/tcp_stacks/rack_bbr_common.h head/sys/netinet/tcp_var.h head/sys/sys/mbuf.h Modified: head/sys/modules/tcp/rack/Makefile == --- head/sys/modules/tcp/rack/Makefile Wed Jul 10 19:57:48 2019 (r349892) +++ head/sys/modules/tcp/rack/Makefile Wed Jul 10 20:40:39 2019 (r349893) @@ -6,7 +6,7 @@ STACKNAME= rack KMOD= tcp_${STACKNAME} -SRCS= rack.c sack_filter.c +SRCS= rack.c sack_filter.c rack_bbr_common.c SRCS+= opt_inet.h opt_inet6.h opt_ipsec.h SRCS+= opt_tcpdebug.h Modified: head/sys/netinet/in_pcb.h == --- head/sys/netinet/in_pcb.h Wed Jul 10 19:57:48 2019(r349892) +++ head/sys/netinet/in_pcb.h Wed Jul 10 20:40:39 2019(r349893) @@ -759,7 +759,9 @@ int inp_so_options(const struct inpcb *inp); #defineINP_ORIGDSTADDR 0x0800 /* receive IP dst address/port */ #define INP_CANNOT_DO_ECN 0x1000 /* The stack does not do ECN */ #defineINP_REUSEPORT_LB0x2000 /* SO_REUSEPORT_LB option is set */ - +#define INP_SUPPORTS_MBUFQ 0x4000 /* Supports the mbuf queue method of LRO */ +#define INP_MBUF_QUEUE_READY 0x8000 /* The transport is pacing, inputs can be queued */ +#define INP_DONT_SACK_QUEUE0x0001 /* If a sack arrives do not wake me */ /* * Flags passed to in_pcblookup*() functions. */ Modified: head/sys/netinet/tcp.h == --- head/sys/netinet/tcp.h Wed Jul 10 19:57:48 2019(r349892) +++ head/sys/netinet/tcp.h Wed Jul 10 20:40:39 2019(r349893) @@ -201,9 +201,8 @@ struct tcphdr { #define TCP_RACK_TLP_THRESH 1063 /* RACK TLP theshold i.e. srtt+(srtt/N) */ #define TCP_RACK_PKT_DELAY1064 /* RACK added ms i.e. rack-rtt + reord + N */ #define TCP_RACK_TLP_INC_VAR 1065 /* Does TLP include rtt variance in t-o */ -#define TCP_RACK_SESS_CWV 1066 /* Enable RFC7611 cwnd validation on sess */ #define TCP_BBR_IWINTSO 1067 /* Initial TSO window for BBRs first sends */ -#define TCP_BBR_RECFORCE 1068 /* Enter recovery force out a segment disregard pacer */ +#define TCP_BBR_RECFORCE 1068 /* Enter recovery force out a segment disregard pacer no longer valid */ #define TCP_BBR_STARTUP_PG1069 /* Startup pacing gain */ #define TCP_BBR_DRAIN_PG 1070 /* Drain pacing gain */ #define TCP_BBR_RWND_IS_APP 1071 /* Rwnd limited is considered app limited */ @@ -211,14 +210,18 @@ struct tcphdr { #define TCP_BBR_ONE_RETRAN1073 /* Is only one segment allowed out during retran */ #define TCP_BBR_STARTUP_LOSS_EXIT 1074 /* Do we exit a loss during startup if not 20% incr */ #define TCP_BBR_USE_LOWGAIN 1075 /* lower the gain in PROBE_BW enable */ -#define TCP_BBR_LOWGAIN_THRESH 1076 /* How many cycles do we stay in lowgain */ -#define TCP_BBR_LOWGAIN_HALF 1077 /* Do we halfstep lowgain down */ -#define TCP_BBR_LOWGAIN_FD1078 /* Do we force a drain when lowgain in place */ +#define TCP_BBR_LOWGAIN_THRESH 1076 /* Unused after 2.3 morphs to TSLIMITS >= 2.3 */ +#define TCP_BBR_TSLIMITS 1076 /* Do we use experimental Timestamp limiting for our algo */ +#define TCP_BBR_LOWGAIN_HALF 1077 /* Unused after 2.3 */ +#define TCP_BBR_PACE_OH1077 /* Reused in 4.2 for pacing overhead setting */ +#define TCP_BBR_LOWGAIN_FD1078 /* Unused after 2.3 */ +#define TCP_BBR_HOLD_TARGET 1078 /* For 4.3 on */ #define TCP_BBR_USEDEL_RATE 1079 /* Enable use of delivery rate for loss recovery */ #define TCP_BBR_MIN_RTO 1080 /* Min RTO in milliseconds */ #define TCP_BBR_MAX_RTO 1081 /* Max RTO in milliseconds */ #define TCP_BBR_REC_OVER_HPTS 1082 /* Recovery override htps settings 0/1/3 */ -#define TCP_BBR_UNLIMITED 1083 /* Does BBR, in non-recovery not use cwnd */ +#define TCP_BBR_UNLIMITED 1083 /* Not used before 2.3 and morphs to algorithm >= 2.3 */ +#define TCP_BBR_ALGORITHM
svn commit: r346094 - head/sys/netinet
Author: rrs Date: Wed Apr 10 18:58:11 2019 New Revision: 346094 URL: https://svnweb.freebsd.org/changeset/base/346094 Log: Fix a small bug in the tcp_log_id where the bucket was unlocked and yet the bucket-unlock flag was not changed to false. This can cause a panic if INVARIANTS is on and we go through the right path (though rare). This fixes the correct bug :) Reported by: syzbot+179a1ad49f3c4c215...@syzkaller.appspotmail.com Reviewed by: tuexen@ Modified: head/sys/netinet/tcp_log_buf.c Modified: head/sys/netinet/tcp_log_buf.c == --- head/sys/netinet/tcp_log_buf.c Wed Apr 10 18:17:27 2019 (r346093) +++ head/sys/netinet/tcp_log_buf.c Wed Apr 10 18:58:11 2019 (r346094) @@ -752,6 +752,7 @@ refind: RECHECK_INP(); if (tp->t_lib != NULL) { TCPID_BUCKET_UNLOCK(tlb); + bucket_locked = false; tlb = NULL; goto restart; } ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r345851 - head/sys/netinet
Author: rrs Date: Wed Apr 3 19:35:07 2019 New Revision: 345851 URL: https://svnweb.freebsd.org/changeset/base/345851 Log: Undo my previous erroneous commit changing the tcp_output kassert. Hmm now the question is where did the tcp_log_id change go :o Modified: head/sys/netinet/tcp_output.c Modified: head/sys/netinet/tcp_output.c == --- head/sys/netinet/tcp_output.c Wed Apr 3 18:35:13 2019 (r345850) +++ head/sys/netinet/tcp_output.c Wed Apr 3 19:35:07 2019 (r345851) @@ -138,8 +138,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto_lowat * non-ACK. */ #define TCP_XMIT_TIMER_ASSERT(tp, len, th_flags) \ - KASSERT(((len) == 0 && ((th_flags) &\ - (TH_SYN | TH_FIN | TH_RST)) != 0) ||\ + KASSERT(((len) == 0 && ((th_flags) & (TH_SYN | TH_FIN)) == 0) ||\ tcp_timer_active((tp), TT_REXMT) || \ tcp_timer_active((tp), TT_PERSIST), \ ("neither rexmt nor persist timer is set")) ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r345527 - head/sys/netinet
Author: rrs Date: Tue Mar 26 10:41:27 2019 New Revision: 345527 URL: https://svnweb.freebsd.org/changeset/base/345527 Log: Fix a small bug in the tcp_log_id where the bucket was unlocked and yet the bucket-unlock flag was not changed to false. This can cause a panic if INVARIANTS is on and we go through the right path (though rare). Reported by: syzbot+179a1ad49f3c4c215...@syzkaller.appspotmail.com Reviewed by: tuexen@ MFC after:1 week Modified: head/sys/netinet/tcp_output.c Modified: head/sys/netinet/tcp_output.c == --- head/sys/netinet/tcp_output.c Tue Mar 26 09:46:17 2019 (r345526) +++ head/sys/netinet/tcp_output.c Tue Mar 26 10:41:27 2019 (r345527) @@ -138,7 +138,8 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto_lowat * non-ACK. */ #define TCP_XMIT_TIMER_ASSERT(tp, len, th_flags) \ - KASSERT(((len) == 0 && ((th_flags) & (TH_SYN | TH_FIN)) == 0) ||\ + KASSERT(((len) == 0 && ((th_flags) &\ + (TH_SYN | TH_FIN | TH_RST)) != 0) ||\ tcp_timer_active((tp), TT_REXMT) || \ tcp_timer_active((tp), TT_PERSIST), \ ("neither rexmt nor persist timer is set")) ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r344099 - head/sys/net
> On Feb 13, 2019, at 1:10 PM, John Baldwin wrote: > > On 2/13/19 10:03 AM, Randall Stewart wrote: >> oh and one other thing.. >> >> It was *not* a random IFP.. it was the IFP to the lagg. >> >> I.e. an alloc() was done to the lagg.. and the free was >> done back to the same IFP (that provided the allocate). > > Yes, that's wrong. Suppose the route changes so that my traffic is now over > em0 instead of lagg0 (where em0 isn't a member of the lagg), how do you > expect if_lagg_free to invoke em0's free routine? In your case it does, > but only by accident. It doesn't work in the other case I described which > is if you have non-lagg interfaces and a route moves from cc0 to em0. In > that case your existing code that is using the wrong ifp will just panic. > > These aren't real alloc routines as the lagg and vlan ones don't allocate > anything, they pass along the request to the child and the child allocates > the tag. Only ifnet's that actually allocate tags should need to free them, > and you should be using tag->ifp to as the ifp whose if_snd_tag_free works. But thats what the lagg’s routine does, use the tag sent in to find the real ifp (where the tag was allocated) and call the if_snd_tag_free() on that. Its not an accident it works, it calls the free of the actual interface where the allocation came from. I don’t see how it would panic. R > >> R >> >>> On Feb 13, 2019, at 1:02 PM, Randall Stewart wrote: >>> >>> I disagree. If you define an alloc it is only >>> reciprocal that you should define a free. >>> >>> The code in question that hit this was changed (its in a version >>> of rack that has the rate-limit and TLS code).. but I think these >>> things *should* be balanced.. if you provide an Allocate, you >>> should also provide a Free… >>> >>> R >>> >>> >>>> On Feb 13, 2019, at 12:09 PM, John Baldwin wrote: >>>> >>>> On 2/13/19 6:57 AM, Randall Stewart wrote: >>>>> Author: rrs >>>>> Date: Wed Feb 13 14:57:59 2019 >>>>> New Revision: 344099 >>>>> URL: https://svnweb.freebsd.org/changeset/base/344099 >>>>> >>>>> Log: >>>>> This commit adds the missing release mechanism for the >>>>> ratelimiting code. The two modules (lagg and vlan) did have >>>>> allocation routines, and even though they are indirect (and >>>>> vector down to the underlying interfaces) they both need to >>>>> have a free routine (that also vectors down to the actual interface). >>>>> >>>>> Sponsored by: Netflix Inc. >>>>> Differential Revision:https://reviews.freebsd.org/D19032 >>>> >>>> Hmm, I don't understand why you'd ever invoke if_snd_tag_free from anything >>>> but 'tag->ifp' rather than some other ifp. What if the route for a >>>> connection >>>> moves so that a tag allocated on cc0 is now on a route that goes over em0? >>>> You can't expect em0 to have an if_snd_tag_free routine that will know to >>>> go invoke cxgbe's snd_tag_free. I think you should always be using >>>> 'tag->ifp->if_snd_tag_free' to free tags and never using any other ifp. >>>> >>>> That is, I think this should be reverted and that instead you need to fix >>>> the code invoking if_snd_tag_free to invoke it on the tag's ifp instead of >>>> some random other ifp. >>>> >>>> -- >>>> John Baldwin >>>> >>>> >>> >>> -- >>> Randall Stewart >>> r...@netflix.com >>> >>> >>> >> >> -- >> Randall Stewart >> r...@netflix.com >> >> >> > > > -- > John Baldwin -- Randall Stewart r...@netflix.com ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r344099 - head/sys/net
oh and one other thing.. It was *not* a random IFP.. it was the IFP to the lagg. I.e. an alloc() was done to the lagg.. and the free was done back to the same IFP (that provided the allocate). R > On Feb 13, 2019, at 1:02 PM, Randall Stewart wrote: > > I disagree. If you define an alloc it is only > reciprocal that you should define a free. > > The code in question that hit this was changed (its in a version > of rack that has the rate-limit and TLS code).. but I think these > things *should* be balanced.. if you provide an Allocate, you > should also provide a Free… > > R > > >> On Feb 13, 2019, at 12:09 PM, John Baldwin wrote: >> >> On 2/13/19 6:57 AM, Randall Stewart wrote: >>> Author: rrs >>> Date: Wed Feb 13 14:57:59 2019 >>> New Revision: 344099 >>> URL: https://svnweb.freebsd.org/changeset/base/344099 >>> >>> Log: >>> This commit adds the missing release mechanism for the >>> ratelimiting code. The two modules (lagg and vlan) did have >>> allocation routines, and even though they are indirect (and >>> vector down to the underlying interfaces) they both need to >>> have a free routine (that also vectors down to the actual interface). >>> >>> Sponsored by: Netflix Inc. >>> Differential Revision: https://reviews.freebsd.org/D19032 >> >> Hmm, I don't understand why you'd ever invoke if_snd_tag_free from anything >> but 'tag->ifp' rather than some other ifp. What if the route for a >> connection >> moves so that a tag allocated on cc0 is now on a route that goes over em0? >> You can't expect em0 to have an if_snd_tag_free routine that will know to >> go invoke cxgbe's snd_tag_free. I think you should always be using >> 'tag->ifp->if_snd_tag_free' to free tags and never using any other ifp. >> >> That is, I think this should be reverted and that instead you need to fix >> the code invoking if_snd_tag_free to invoke it on the tag's ifp instead of >> some random other ifp. >> >> -- >> John Baldwin >> >> > > -- > Randall Stewart > r...@netflix.com > > > -- Randall Stewart r...@netflix.com ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r344099 - head/sys/net
I disagree. If you define an alloc it is only reciprocal that you should define a free. The code in question that hit this was changed (its in a version of rack that has the rate-limit and TLS code).. but I think these things *should* be balanced.. if you provide an Allocate, you should also provide a Free… R > On Feb 13, 2019, at 12:09 PM, John Baldwin wrote: > > On 2/13/19 6:57 AM, Randall Stewart wrote: >> Author: rrs >> Date: Wed Feb 13 14:57:59 2019 >> New Revision: 344099 >> URL: https://svnweb.freebsd.org/changeset/base/344099 >> >> Log: >> This commit adds the missing release mechanism for the >> ratelimiting code. The two modules (lagg and vlan) did have >> allocation routines, and even though they are indirect (and >> vector down to the underlying interfaces) they both need to >> have a free routine (that also vectors down to the actual interface). >> >> Sponsored by: Netflix Inc. >> Differential Revision: https://reviews.freebsd.org/D19032 > > Hmm, I don't understand why you'd ever invoke if_snd_tag_free from anything > but 'tag->ifp' rather than some other ifp. What if the route for a connection > moves so that a tag allocated on cc0 is now on a route that goes over em0? > You can't expect em0 to have an if_snd_tag_free routine that will know to > go invoke cxgbe's snd_tag_free. I think you should always be using > 'tag->ifp->if_snd_tag_free' to free tags and never using any other ifp. > > That is, I think this should be reverted and that instead you need to fix > the code invoking if_snd_tag_free to invoke it on the tag's ifp instead of > some random other ifp. > > -- > John Baldwin > > -- Randall Stewart r...@netflix.com ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r344099 - head/sys/net
Author: rrs Date: Wed Feb 13 14:57:59 2019 New Revision: 344099 URL: https://svnweb.freebsd.org/changeset/base/344099 Log: This commit adds the missing release mechanism for the ratelimiting code. The two modules (lagg and vlan) did have allocation routines, and even though they are indirect (and vector down to the underlying interfaces) they both need to have a free routine (that also vectors down to the actual interface). Sponsored by: Netflix Inc. Differential Revision:https://reviews.freebsd.org/D19032 Modified: head/sys/net/if_lagg.c head/sys/net/if_vlan.c Modified: head/sys/net/if_lagg.c == --- head/sys/net/if_lagg.c Wed Feb 13 14:39:16 2019(r344098) +++ head/sys/net/if_lagg.c Wed Feb 13 14:57:59 2019(r344099) @@ -133,6 +133,7 @@ static int lagg_ioctl(struct ifnet *, u_long, caddr_t) static int lagg_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); +static voidlagg_snd_tag_free(struct m_snd_tag *); #endif static int lagg_setmulti(struct lagg_port *); static int lagg_clrmulti(struct lagg_port *); @@ -514,6 +515,7 @@ lagg_clone_create(struct if_clone *ifc, int unit, cadd ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST; #ifdef RATELIMIT ifp->if_snd_tag_alloc = lagg_snd_tag_alloc; + ifp->if_snd_tag_free = lagg_snd_tag_free; #endif ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS; @@ -1568,6 +1570,13 @@ lagg_snd_tag_alloc(struct ifnet *ifp, /* forward allocation request */ return (ifp->if_snd_tag_alloc(ifp, params, ppmt)); } + +static void +lagg_snd_tag_free(struct m_snd_tag *tag) +{ + tag->ifp->if_snd_tag_free(tag); +} + #endif static int Modified: head/sys/net/if_vlan.c == --- head/sys/net/if_vlan.c Wed Feb 13 14:39:16 2019(r344098) +++ head/sys/net/if_vlan.c Wed Feb 13 14:57:59 2019(r344099) @@ -267,6 +267,7 @@ static int vlan_ioctl(struct ifnet *ifp, u_long cmd, c #ifdef RATELIMIT static int vlan_snd_tag_alloc(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); +static void vlan_snd_tag_free(struct m_snd_tag *); #endif static void vlan_qflush(struct ifnet *ifp); static int vlan_setflag(struct ifnet *ifp, int flag, int status, @@ -1047,6 +1048,7 @@ vlan_clone_create(struct if_clone *ifc, char *name, si ifp->if_ioctl = vlan_ioctl; #ifdef RATELIMIT ifp->if_snd_tag_alloc = vlan_snd_tag_alloc; + ifp->if_snd_tag_free = vlan_snd_tag_free; #endif ifp->if_flags = VLAN_IFFLAGS; ether_ifattach(ifp, eaddr); @@ -1933,5 +1935,11 @@ vlan_snd_tag_alloc(struct ifnet *ifp, return (EOPNOTSUPP); /* forward allocation request */ return (ifp->if_snd_tag_alloc(ifp, params, ppmt)); +} + +static void +vlan_snd_tag_free(struct m_snd_tag *tag) +{ + tag->ifp->if_snd_tag_free(tag); } #endif ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r338102 - in head/sys/netinet: . tcp_stacks
Author: rrs Date: Mon Aug 20 12:43:18 2018 New Revision: 338102 URL: https://svnweb.freebsd.org/changeset/base/338102 Log: This change represents a substantial restructure of the way we reassembly inbound tcp segments. The old algorithm just blindly dropped in segments without coalescing. This meant that every segment could take up greater and greater room on the linked list of segments. This of course is now subject to a tighter limit (100) of segments which in a high BDP situation will cause us to be a lot more in-efficent as we drop segments beyond 100 entries that we receive. What this restructure does is cause the reassembly buffer to coalesce segments putting an emphasis on the two common cases (which avoid walking the list of segments) i.e. where we add to the back of the queue of segments and where we add to the front. We also have the reassembly buffer supporting a couple of debug options (black box logging as well as counters for code coverage). These are compiled out by default but can be added by uncommenting the defines. Sponsored by: Netflix Inc. Differential Revision:https://reviews.freebsd.org/D16626 Modified: head/sys/netinet/tcp_input.c head/sys/netinet/tcp_log_buf.h head/sys/netinet/tcp_reass.c head/sys/netinet/tcp_stacks/rack.c head/sys/netinet/tcp_subr.c head/sys/netinet/tcp_usrreq.c head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cMon Aug 20 12:31:39 2018 (r338101) +++ head/sys/netinet/tcp_input.cMon Aug 20 12:43:18 2018 (r338102) @@ -1734,7 +1734,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru tp->snd_nxt == tp->snd_max && tiwin && tiwin == tp->snd_wnd && ((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) && - LIST_EMPTY(>t_segq) && + SEGQ_EMPTY(tp) && ((to.to_flags & TOF_TS) == 0 || TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) { @@ -2440,7 +2440,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru * later; if not, do so now to pass queued data to user. */ if (tlen == 0 && (thflags & TH_FIN) == 0) - (void) tcp_reass(tp, (struct tcphdr *)0, 0, + (void) tcp_reass(tp, (struct tcphdr *)0, NULL, 0, (struct mbuf *)0); tp->snd_wl1 = th->th_seq - 1; /* FALLTHROUGH */ @@ -3017,7 +3017,7 @@ dodata: /* XXX */ * fast retransmit can work). */ if (th->th_seq == tp->rcv_nxt && - LIST_EMPTY(>t_segq) && + SEGQ_EMPTY(tp) && (TCPS_HAVEESTABLISHED(tp->t_state) || tfo_syn)) { if (DELAY_ACK(tp, tlen) || tfo_syn) @@ -3042,7 +3042,7 @@ dodata: /* XXX */ * m_adj() doesn't actually frees any mbufs * when trimming from the head. */ - thflags = tcp_reass(tp, th, , m); + thflags = tcp_reass(tp, th, _start, , m); tp->t_flags |= TF_ACKNOW; } if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT)) Modified: head/sys/netinet/tcp_log_buf.h == --- head/sys/netinet/tcp_log_buf.h Mon Aug 20 12:31:39 2018 (r338101) +++ head/sys/netinet/tcp_log_buf.h Mon Aug 20 12:43:18 2018 (r338102) @@ -217,7 +217,9 @@ enum tcp_log_events { BBR_LOG_REDUCE, /* old bbr log reduce for 4.1 and earlier 46*/ TCP_LOG_RTT,/* A rtt (in useconds) is being sampled and applied to the srtt algo 47 */ BBR_LOG_SETTINGS_CHG, /* Settings changed for loss response 48 */ - TCP_LOG_END /* End (keep at end)49 */ + BBR_LOG_SRTT_GAIN_EVENT, /* SRTT gaining 49 */ + TCP_LOG_REASS, /* Reassembly buffer logging 50 */ + TCP_LOG_END /* End (keep at end)51 */ }; enum tcp_log_states { Modified: head/sys/netinet/tcp_reass.c == --- head/sys/netinet/tcp_reass.cMon Aug 20 12:31:39 2018 (r338101) +++ head/sys/netinet/tcp_reass.cMon Aug 20 12:43:18 2018 (r338102) @@ -72,15 +72,37 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#include #include #include #ifdef TCPDEBUG #include #endif /* TCPDEBUG */ +#define TCP_R_LOG_ADD 1 +#define TCP_R_LOG_LIMIT_REACHED 2 +#define TCP_R_LOG_APPEND 3 +#define
svn commit: r337455 - head/sys/netinet/tcp_stacks
Author: rrs Date: Wed Aug 8 13:36:49 2018 New Revision: 337455 URL: https://svnweb.freebsd.org/changeset/base/337455 Log: Fix a small bug in rack where it will end up sending the FIN twice. Sponsored by: Netflix Inc. Differential Revision:https://reviews.freebsd.org/D16604 Modified: head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Wed Aug 8 12:08:46 2018 (r337454) +++ head/sys/netinet/tcp_stacks/rack.c Wed Aug 8 13:36:49 2018 (r337455) @@ -7603,13 +7603,10 @@ dontupdate: * If our state indicates that FIN should be sent and we have not * yet done so, then we need to send. */ - if (flags & TH_FIN) { - if ((tp->t_flags & TF_SENTFIN) || - (((tp->t_flags & TF_SENTFIN) == 0) && -(tp->snd_nxt == tp->snd_una))) { - pass = 11; - goto send; - } + if ((flags & TH_FIN) && + (tp->snd_nxt == tp->snd_una)) { + pass = 11; + goto send; } /* * No reason to send a segment, just return. ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r337375 - head/sys/netinet/tcp_stacks
Author: rrs Date: Mon Aug 6 09:22:07 2018 New Revision: 337375 URL: https://svnweb.freebsd.org/changeset/base/337375 Log: This fixes a bug in Rack where we were not properly using the correct value for Delayed Ack. Sponsored by: Netflix Inc. Differential Revision: https://reviews.freebsd.org/D16579 Modified: head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Mon Aug 6 08:40:02 2018 (r337374) +++ head/sys/netinet/tcp_stacks/rack.c Mon Aug 6 09:22:07 2018 (r337375) @@ -2275,7 +2275,7 @@ rack_start_hpts_timer(struct tcp_rack *rack, struct tc } hpts_timeout = rack_timer_start(tp, rack, cts); if (tp->t_flags & TF_DELACK) { - delayed_ack = tcp_delacktime; + delayed_ack = TICKS_2_MSEC(tcp_delacktime); rack->r_ctl.rc_hpts_flags |= PACE_TMR_DELACK; } if (delayed_ack && ((hpts_timeout == 0) || ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r336893 - head/sys/netinet/tcp_stacks
Author: rrs Date: Mon Jul 30 10:23:29 2018 New Revision: 336893 URL: https://svnweb.freebsd.org/changeset/base/336893 Log: This fixes a hole where rack could end up sending an invalid segment into the reassembly queue. This would happen if you enabled the data after close option. Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D16453 Modified: head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Mon Jul 30 09:50:26 2018 (r336892) +++ head/sys/netinet/tcp_stacks/rack.c Mon Jul 30 10:23:29 2018 (r336893) @@ -4657,7 +4657,6 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, s rack = (struct tcp_rack *)tp->t_fb_ptr; INP_WLOCK_ASSERT(tp->t_inpcb); - nsegs = max(1, m->m_pkthdr.lro_nsegs); if ((thflags & TH_ACK) && (SEQ_LT(tp->snd_wl1, th->th_seq) || @@ -4686,6 +4685,10 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, s tp->snd_nxt = tp->snd_max; /* Make sure we output to start the timer */ rack->r_wanted_output++; + } + if (tp->t_flags2 & TF2_DROP_AF_DATA) { + m_freem(m); + return (0); } /* * Process segments with URG. ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r336672 - in head/sys: modules/tcp modules/tcp/fastpath netinet/tcp_stacks
Author: rrs Date: Tue Jul 24 14:55:47 2018 New Revision: 336672 URL: https://svnweb.freebsd.org/changeset/base/336672 Log: Delete the example tcp stack "fastpath" which was only put in has an example. Sponsored by: Netflix inc. Differential Revision:https://reviews.freebsd.org/D16420 Deleted: head/sys/modules/tcp/fastpath/ head/sys/netinet/tcp_stacks/fastpath.c Modified: head/sys/modules/tcp/Makefile Modified: head/sys/modules/tcp/Makefile == --- head/sys/modules/tcp/Makefile Tue Jul 24 13:31:50 2018 (r336671) +++ head/sys/modules/tcp/Makefile Tue Jul 24 14:55:47 2018 (r336672) @@ -6,12 +6,10 @@ SYSDIR?=${SRCTOP}/sys .include "${SYSDIR}/conf/kern.opts.mk" SUBDIR=\ - ${_tcp_fastpath} \ ${_tcp_rack} \ ${_tcpmd5} \ .if ${MK_EXTRA_TCP_STACKS} != "no" || defined(ALL_MODULES) -_tcp_fastpath= fastpath _tcp_rack= rack .endif ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r336465 - in head/sys/netinet: . tcp_stacks
Author: rrs Date: Wed Jul 18 22:49:53 2018 New Revision: 336465 URL: https://svnweb.freebsd.org/changeset/base/336465 Log: Bump the ICMP echo limits to match the RFC Reviewed by: tuexen Sponsored by: Netflix Inc. Differential Revision:https://reviews.freebsd.org/D16333 Modified: head/sys/netinet/ip_icmp.c head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/ip_icmp.c == --- head/sys/netinet/ip_icmp.c Wed Jul 18 22:45:45 2018(r336464) +++ head/sys/netinet/ip_icmp.c Wed Jul 18 22:49:53 2018(r336465) @@ -139,8 +139,8 @@ static VNET_DEFINE(int, icmp_rfi) = 0; SYSCTL_INT(_net_inet_icmp, OID_AUTO, reply_from_interface, CTLFLAG_VNET | CTLFLAG_RW, _NAME(icmp_rfi), 0, "ICMP reply from incoming interface for non-local packets"); - -static VNET_DEFINE(int, icmp_quotelen) = 8; +/* Router requirements RFC 1812 section 4.3.2.3 requires 576 - 28. */ +static VNET_DEFINE(int, icmp_quotelen) = 548; #defineV_icmp_quotelen VNET(icmp_quotelen) SYSCTL_INT(_net_inet_icmp, OID_AUTO, quotelen, CTLFLAG_VNET | CTLFLAG_RW, _NAME(icmp_quotelen), 0, Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Wed Jul 18 22:45:45 2018 (r336464) +++ head/sys/netinet/tcp_stacks/rack.c Wed Jul 18 22:49:53 2018 (r336465) @@ -1627,7 +1627,6 @@ rack_process_rst(struct mbuf *m, struct tcphdr *th, st static void rack_challenge_ack(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, int32_t * ret_val) { - INP_INFO_RLOCK_ASSERT(_tcbinfo); TCPSTAT_INC(tcps_badsyn); @@ -6103,7 +6102,6 @@ rack_do_lastack(struct mbuf *m, struct tcphdr *th, str return (ret_val); } if (ourfinisacked) { - INP_INFO_RLOCK_ASSERT(_tcbinfo); tp = tcp_close(tp); rack_do_drop(m, tp); ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r335502 - head/sys/netinet
Author: rrs Date: Thu Jun 21 21:03:58 2018 New Revision: 335502 URL: https://svnweb.freebsd.org/changeset/base/335502 Log: This adds in an optimization so that we only walk one time through the mbuf chain during copy and TSO limiting. It is used by both Rack and now the FreeBSD stack. Sponsored by: Netflix Inc Differential Revision: https://reviews.freebsd.org/D15937 Modified: head/sys/netinet/tcp_output.c Modified: head/sys/netinet/tcp_output.c == --- head/sys/netinet/tcp_output.c Thu Jun 21 20:18:23 2018 (r335501) +++ head/sys/netinet/tcp_output.c Thu Jun 21 21:03:58 2018 (r335502) @@ -209,6 +209,8 @@ tcp_output(struct tcpcb *tp) int32_t len; uint32_t recwin, sendwin; int off, flags, error = 0; /* Keep compiler happy */ + u_int if_hw_tsomaxsegcount = 0; + u_int if_hw_tsomaxsegsize; struct mbuf *m; struct ip *ip = NULL; #ifdef TCPDEBUG @@ -879,9 +881,6 @@ send: if (tso) { u_int if_hw_tsomax; - u_int if_hw_tsomaxsegcount; - u_int if_hw_tsomaxsegsize; - struct mbuf *mb; u_int moff; int max_len; @@ -913,66 +912,7 @@ send: len = max_len; } } - /* -* Check if we should limit by maximum segment -* size and count: -*/ - if (if_hw_tsomaxsegcount != 0 && - if_hw_tsomaxsegsize != 0) { - /* -* Subtract one segment for the LINK -* and TCP/IP headers mbuf that will -* be prepended to this mbuf chain -* after the code in this section -* limits the number of mbufs in the -* chain to if_hw_tsomaxsegcount. -*/ - if_hw_tsomaxsegcount -= 1; - max_len = 0; - mb = sbsndmbuf(>so_snd, off, ); - - while (mb != NULL && max_len < len) { - u_int mlen; - u_int frags; - - /* -* Get length of mbuf fragment -* and how many hardware frags, -* rounded up, it would use: -*/ - mlen = (mb->m_len - moff); - frags = howmany(mlen, - if_hw_tsomaxsegsize); - - /* Handle special case: Zero Length Mbuf */ - if (frags == 0) - frags = 1; - - /* -* Check if the fragment limit -* will be reached or exceeded: -*/ - if (frags >= if_hw_tsomaxsegcount) { - max_len += min(mlen, - if_hw_tsomaxsegcount * - if_hw_tsomaxsegsize); - break; - } - max_len += mlen; - if_hw_tsomaxsegcount -= frags; - moff = 0; - mb = mb->m_next; - } - if (max_len <= 0) { - len = 0; - } else if (len > max_len) { - sendalot = 1; - len = max_len; - } - } - - /* * Prevent the last segment from being * fractional unless the send sockbuf can be * emptied: @@ -1006,7 +946,6 @@ send: */ if (tp->t_flags & TF_NEEDFIN) sendalot = 1; - } else { len = tp->t_maxseg - optlen - ipoptlen; sendalot = 1; @@
svn commit: r335364 - head/sys/netinet/tcp_stacks
Author: rrs Date: Tue Jun 19 11:20:28 2018 New Revision: 335364 URL: https://svnweb.freebsd.org/changeset/base/335364 Log: Make sure that the t_peakrate_thr is not compiled in by default until NF can upstream it. Reviewed by: and suggested lstewart Sponsored by: Netflix Inc. Modified: head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Tue Jun 19 11:06:36 2018 (r335363) +++ head/sys/netinet/tcp_stacks/rack.c Tue Jun 19 11:20:28 2018 (r335364) @@ -1206,7 +1206,7 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *r tp->t_stats_gput_prev); tp->t_flags &= ~TF_GPUTINPROG; tp->t_stats_gput_prev = gput; - +#ifdef NETFLIX_CWV if (tp->t_maxpeakrate) { /* * We update t_peakrate_thr. This gives us roughly @@ -1214,6 +1214,7 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *r */ tcp_update_peakrate_thr(tp); } +#endif } #endif if (tp->snd_cwnd > tp->snd_ssthresh) { @@ -1267,11 +1268,11 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *r tcp_newcwv_update_pipeack(tp, data); } } -#endif /* we enforce max peak rate if it is set. */ if (tp->t_peakrate_thr && tp->snd_cwnd > tp->t_peakrate_thr) { tp->snd_cwnd = tp->t_peakrate_thr; } +#endif } static void ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r335361 - head/sys/netinet
Author: rrs Date: Tue Jun 19 05:28:14 2018 New Revision: 335361 URL: https://svnweb.freebsd.org/changeset/base/335361 Log: Move the tp set back to where it was before we started playing with the VNET sets. This way we have verified the INP settings before we go to the trouble of de-referencing it. Reviewed by: and suggested by lstewart Sponsored by: Netflix Inc. Modified: head/sys/netinet/tcp_hpts.c Modified: head/sys/netinet/tcp_hpts.c == --- head/sys/netinet/tcp_hpts.c Tue Jun 19 05:01:07 2018(r335360) +++ head/sys/netinet/tcp_hpts.c Tue Jun 19 05:28:14 2018(r335361) @@ -1158,7 +1158,6 @@ tcp_input_data(struct tcp_hpts_entry *hpts, struct tim hpts->p_inp = inp; drop_reason = inp->inp_hpts_drop_reas; inp->inp_in_input = 0; - tp = intotcpcb(inp); mtx_unlock(>p_mtx); CURVNET_SET(inp->inp_vnet); if (drop_reason) { @@ -1183,6 +1182,7 @@ out: mtx_lock(>p_mtx); continue; } + tp = intotcpcb(inp); if ((tp == NULL) || (tp->t_inpcb == NULL)) { goto out; } ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r335317 - head/sys/netinet
Author: rrs Date: Mon Jun 18 14:10:12 2018 New Revision: 335317 URL: https://svnweb.freebsd.org/changeset/base/335317 Log: Move to using the inp->vnet pointer has suggested by lstewart. This is far better since the hpts system is using the inp as its basis anyway. Unfortunately his comments came late. Sponsored by: Netflix Inc. Modified: head/sys/netinet/tcp_hpts.c Modified: head/sys/netinet/tcp_hpts.c == --- head/sys/netinet/tcp_hpts.c Mon Jun 18 13:49:44 2018(r335316) +++ head/sys/netinet/tcp_hpts.c Mon Jun 18 14:10:12 2018(r335317) @@ -1216,7 +1216,7 @@ tcp_input_data(struct tcp_hpts_entry *hpts, struct tim inp->inp_in_input = 0; tp = intotcpcb(inp); mtx_unlock(>p_mtx); - CURVNET_SET(tp->t_vnet); + CURVNET_SET(inp->inp_vnet); if (drop_reason) { INP_INFO_RLOCK(_tcbinfo); ti_locked = TI_RLOCKED; @@ -1589,7 +1589,7 @@ out_now: getmicrouptime(); cts = tcp_tv_to_usectick(); } - CURVNET_SET(tp->t_vnet); + CURVNET_SET(inp->inp_vnet); /* * There is a hole here, we get the refcnt on the * inp so it will still be preserved but to make ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r335106 - head/sys/netinet/tcp_stacks
Author: rrs Date: Thu Jun 14 03:27:42 2018 New Revision: 335106 URL: https://svnweb.freebsd.org/changeset/base/335106 Log: This fixes several bugs that Larry Rosenman helped me find in Rack with respect to its handling of TCP Fast Open. Several fixes all related to TFO are included in this commit: 1) Handling of non-TFO retransmissions 2) Building the proper send-map when we are doing TFO 3) Dealing with the ack that comes back that includes the SYN and data. It appears that with this commit TFO now works :-) Thanks Larry for all your help!! Sponsored by: Netflix Inc. Differential Revision:https://reviews.freebsd.org/D15758 Modified: head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Thu Jun 14 02:30:43 2018 (r335105) +++ head/sys/netinet/tcp_stacks/rack.c Thu Jun 14 03:27:42 2018 (r335106) @@ -2083,6 +2083,8 @@ rack_timer_start(struct tcpcb *tp, struct tcp_rack *ra /* We can't start any timer in persists */ return (rack_get_persists_timer_val(tp, rack)); } + if (tp->t_state < TCPS_ESTABLISHED) + goto activate_rxt; rsm = TAILQ_FIRST(>r_ctl.rc_tmap); if (rsm == NULL) { /* Nothing on the send map */ @@ -3385,8 +3387,15 @@ again: rsm->r_tim_lastsent[0] = ts; rsm->r_rtr_cnt = 1; rsm->r_rtr_bytes = 0; - rsm->r_start = seq_out; - rsm->r_end = rsm->r_start + len; + if (th_flags & TH_SYN) { + /* The data space is one beyond snd_una */ + rsm->r_start = seq_out + 1; + rsm->r_end = rsm->r_start + (len - 1); + } else { + /* Normal case */ + rsm->r_start = seq_out; + rsm->r_end = rsm->r_start + len; + } rsm->r_sndcnt = 0; TAILQ_INSERT_TAIL(>r_ctl.rc_map, rsm, r_next); TAILQ_INSERT_TAIL(>r_ctl.rc_tmap, rsm, r_tnext); @@ -4657,11 +4666,7 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, s * send garbage on first SYN. */ int32_t nsegs; -#ifdef TCP_RFC7413 int32_t tfo_syn; -#else -#definetfo_syn (FALSE) -#endif struct tcp_rack *rack; rack = (struct tcp_rack *)tp->t_fb_ptr; @@ -4767,10 +4772,8 @@ dodata: /* XXX */ * PRU_RCVD). If a FIN has already been received on this connection * then we just ignore the text. */ -#ifdef TCP_RFC7413 tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) && - (tp->t_flags & TF_FASTOPEN)); -#endif + IS_FASTOPEN(tp->t_flags)); if ((tlen || (thflags & TH_FIN) || tfo_syn) && TCPS_HAVERCVDFIN(tp->t_state) == 0) { tcp_seq save_start = th->th_seq; @@ -5237,6 +5240,8 @@ rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, st tp->irs = th->th_seq; tcp_rcvseqinit(tp); if (thflags & TH_ACK) { + int tfo_partial = 0; + TCPSTAT_INC(tcps_connects); soisconnected(so); #ifdef MAC @@ -5250,10 +5255,19 @@ rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, st tp->rcv_adv += min(tp->rcv_wnd, TCP_MAXWIN << tp->rcv_scale); /* +* If not all the data that was sent in the TFO SYN +* has been acked, resend the remainder right away. +*/ + if (IS_FASTOPEN(tp->t_flags) && + (tp->snd_una != tp->snd_max)) { + tp->snd_nxt = th->th_ack; + tfo_partial = 1; + } + /* * If there's data, delay ACK; if there's also a FIN ACKNOW * will be turned on later. */ - if (DELAY_ACK(tp, tlen) && tlen != 0) { + if (DELAY_ACK(tp, tlen) && tlen != 0 && (tfo_partial == 0)) { rack_timer_cancel(tp, (struct tcp_rack *)tp->t_fb_ptr, ((struct tcp_rack *)tp->t_fb_ptr)->r_ctl.rc_rcvtime, __LINE__); tp->t_flags |= TF_DELACK; @@ -5266,6 +5280,21 @@ rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, st tp->t_flags |= TF_ECN_PERMIT; TCPSTAT_INC(tcps_ecn_shs); } + if (SEQ_GT(th->th_ack, tp->snd_una)) { + /* +* We advance snd_una for the +* fast open case. If th_ack is +* acknowledging data beyond +* snd_una we can't just call +
svn commit: r335022 - head/sys/netinet
Author: rrs Date: Tue Jun 12 23:54:08 2018 New Revision: 335022 URL: https://svnweb.freebsd.org/changeset/base/335022 Log: This fixes missing VNET sets in the hpts system. Basically without this and running vnets with a TCP stack that uses some of the features is a recipe for panic (without this commit). Reported by: Larry Rosenman Sponsored by: Netflix Inc. Differential Revision:https://reviews.freebsd.org/D15757 Modified: head/sys/netinet/tcp_hpts.c Modified: head/sys/netinet/tcp_hpts.c == --- head/sys/netinet/tcp_hpts.c Tue Jun 12 23:26:25 2018(r335021) +++ head/sys/netinet/tcp_hpts.c Tue Jun 12 23:54:08 2018(r335022) @@ -1215,7 +1215,9 @@ tcp_input_data(struct tcp_hpts_entry *hpts, struct tim hpts->p_inp = inp; drop_reason = inp->inp_hpts_drop_reas; inp->inp_in_input = 0; + tp = intotcpcb(inp); mtx_unlock(>p_mtx); + CURVNET_SET(tp->t_vnet); if (drop_reason) { INP_INFO_RLOCK(_tcbinfo); ti_locked = TI_RLOCKED; @@ -1234,10 +1236,10 @@ out: INP_WUNLOCK(inp); } ti_locked = TI_UNLOCKED; + CURVNET_RESTORE(); mtx_lock(>p_mtx); continue; } - tp = intotcpcb(inp); if ((tp == NULL) || (tp->t_inpcb == NULL)) { goto out; } @@ -1262,6 +1264,7 @@ out: } if (in_pcbrele_wlocked(inp) == 0) INP_WUNLOCK(inp); + CURVNET_RESTORE(); mtx_lock(>p_mtx); continue; } @@ -1282,7 +1285,6 @@ out: */ tcp_set_hpts(inp); } - CURVNET_SET(tp->t_vnet); m = tp->t_in_pkt; n = NULL; if (m != NULL && @@ -1366,7 +1368,6 @@ out: if (m) n = m->m_nextpkt; } - CURVNET_RESTORE(); goto out; } /* ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r334804 - in head/sys: kern modules/tcp modules/tcp/rack netinet netinet/tcp_stacks sys
> On Jun 7, 2018, at 6:01 PM, hiren panchasara > wrote: > > On 06/07/18 at 06:18P, Randall Stewart wrote: >> Author: rrs >> Date: Thu Jun 7 18:18:13 2018 >> New Revision: 334804 >> URL: https://svnweb.freebsd.org/changeset/base/334804 >> >> Log: >> This commit brings in a new refactored TCP stack called Rack. >> Rack includes the following features: >> - A different SACK processing scheme (the old sack structures are not >> used). >> - RACK (Recent acknowledgment) where counting dup-acks is no longer done >> instead time is used to knwo when to retransmit. (see the I-D) >> - TLP (Tail Loss Probe) where we will probe for tail-losses to attempt >> to try not to take a retransmit time-out. (see the I-D) >> - Burst mitigation using TCPHTPS >> - PRR (partial rate reduction) see the RFC. >> >> Once built into your kernel, you can select this stack by either >> socket option with the name of the stack is "rack" or by setting >> the global sysctl so the default is rack. >> >> Note that any connection that does not support SACK will be kicked >> back to the "default" base FreeBSD stack (currently known as "default"). >> >> To build this into your kernel you will need to enable in your >> kernel: >> makeoptions WITH_EXTRA_TCP_STACKS=1 >> options TCPHPTS >> >> Sponsored by: Netflix Inc. >> Differential Revision: https://reviews.freebsd.org/D15525 >> >> Added: >> head/sys/modules/tcp/rack/ >> head/sys/modules/tcp/rack/Makefile (contents, props changed) >> head/sys/netinet/tcp_stacks/rack.c (contents, props changed) >> head/sys/netinet/tcp_stacks/rack_bbr_common.h (contents, props changed) >> head/sys/netinet/tcp_stacks/sack_filter.c (contents, props changed) >> head/sys/netinet/tcp_stacks/sack_filter.h (contents, props changed) >> head/sys/netinet/tcp_stacks/tcp_rack.h (contents, props changed) >> Modified: >> head/sys/kern/uipc_sockbuf.c >> head/sys/modules/tcp/Makefile >> head/sys/netinet/tcp.h >> head/sys/netinet/tcp_log_buf.h >> head/sys/netinet/tcp_output.c >> head/sys/netinet/tcp_stacks/fastpath.c >> head/sys/netinet/tcp_timer.c >> head/sys/netinet/tcp_timer.h >> head/sys/netinet/tcp_var.h >> head/sys/sys/mbuf.h >> head/sys/sys/queue.h >> head/sys/sys/sockbuf.h >> head/sys/sys/time.h > > I thought we'd have more time to review/test this. Looks like BSDCan > commit-spree in effect. :-) The Phabricator review has been up since May 22nd. Thats over 2.5 weeks, this was also discussed on the Thursday conference calls. > > A few questions: > 1) Does RACK work reliably without HPTS? If yes, has that config been > tested? > No it requires the pacer. > 2) It looks like PRR is tied to RACK. Why did we go that route? > Shouldn't it be easily used with the 'default' stack also? > It is what I developed.. and I had no desire to work with the default stack. That is a fifth rail that no one wants touched. > 3) Can new SACK be used with the traditional stack? Well if you want to rework the base stack you might be able to do that :) It would be quite some effort.. I think Robert wants eventually the old stack to be de-composed and then slowly work at getting more common code between them until eventually you can have a diff and somehow figure out how to integrate the two. > > 4) Where should manpage like info for RACK go? a new man-page or > extending tcp(4)? Info like how to enable system-wide or per socket > should go here. > The enable/disable or per-socket I think is in with the pluggable stack stuff. We might want a Rack man page.. have to think about it. > 5) Any perf numbers to go along with this commit? Synthetic or > production numbers showing improvements in transfer speed or any other > impact on CPU usage (specially with HPTS) that you can share? > CPU will be more but we see close to a drop in rebuffers by about 12% I am told. > 6) In your testing, have you found cases where RACK does poorly compared > to the 'default' stack? Any recommendations on when should RACK be > enabled? (Something like this could go in the manpage.) Nope. R > > Glad to finally see this in -head! > > Cheers, > Hiren Randall Stewart r...@netflix.com 803-317-4952 ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r334815 - head/sys/modules/tcp/rack
Author: rrs Date: Thu Jun 7 20:57:12 2018 New Revision: 334815 URL: https://svnweb.freebsd.org/changeset/base/334815 Log: Take out the stack alias inadvertantly added by my commit. Reported by: Peter Lei Modified: head/sys/modules/tcp/rack/Makefile Modified: head/sys/modules/tcp/rack/Makefile == --- head/sys/modules/tcp/rack/Makefile Thu Jun 7 20:49:01 2018 (r334814) +++ head/sys/modules/tcp/rack/Makefile Thu Jun 7 20:57:12 2018 (r334815) @@ -19,6 +19,5 @@ SRCS+=opt_kern_tls.h CFLAGS+= -DMODNAME=${KMOD} CFLAGS+= -DSTACKNAME=${STACKNAME} -CFLAGS+= -DSTACKALIAS=rack_18q21 .include ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r334813 - head/sys/sys
Author: rrs Date: Thu Jun 7 19:57:55 2018 New Revision: 334813 URL: https://svnweb.freebsd.org/changeset/base/334813 Log: Fix build issue with const and volatile and the myriad ways that the various compliers treat this. The only safe prefetch appears to be for AMD. The other compilers either are not volatile or are not const :( Reported by: Michael Tuexen Modified: head/sys/sys/kern_prefetch.h Modified: head/sys/sys/kern_prefetch.h == --- head/sys/sys/kern_prefetch.hThu Jun 7 19:48:49 2018 (r334812) +++ head/sys/sys/kern_prefetch.hThu Jun 7 19:57:55 2018 (r334813) @@ -34,7 +34,7 @@ kern_prefetch(const volatile void *addr, void* before) #if defined(__amd64__) __asm __volatile("prefetcht1 (%1)":"=rm"(*((int32_t *)before)):"r"(addr):); #else - __builtin_prefetch(addr); +/* __builtin_prefetch(addr);*/ #endif } ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r334804 - in head/sys: kern modules/tcp modules/tcp/rack netinet netinet/tcp_stacks sys
Author: rrs Date: Thu Jun 7 18:18:13 2018 New Revision: 334804 URL: https://svnweb.freebsd.org/changeset/base/334804 Log: This commit brings in a new refactored TCP stack called Rack. Rack includes the following features: - A different SACK processing scheme (the old sack structures are not used). - RACK (Recent acknowledgment) where counting dup-acks is no longer done instead time is used to knwo when to retransmit. (see the I-D) - TLP (Tail Loss Probe) where we will probe for tail-losses to attempt to try not to take a retransmit time-out. (see the I-D) - Burst mitigation using TCPHTPS - PRR (partial rate reduction) see the RFC. Once built into your kernel, you can select this stack by either socket option with the name of the stack is "rack" or by setting the global sysctl so the default is rack. Note that any connection that does not support SACK will be kicked back to the "default" base FreeBSD stack (currently known as "default"). To build this into your kernel you will need to enable in your kernel: makeoptions WITH_EXTRA_TCP_STACKS=1 options TCPHPTS Sponsored by: Netflix Inc. Differential Revision:https://reviews.freebsd.org/D15525 Added: head/sys/modules/tcp/rack/ head/sys/modules/tcp/rack/Makefile (contents, props changed) head/sys/netinet/tcp_stacks/rack.c (contents, props changed) head/sys/netinet/tcp_stacks/rack_bbr_common.h (contents, props changed) head/sys/netinet/tcp_stacks/sack_filter.c (contents, props changed) head/sys/netinet/tcp_stacks/sack_filter.h (contents, props changed) head/sys/netinet/tcp_stacks/tcp_rack.h (contents, props changed) Modified: head/sys/kern/uipc_sockbuf.c head/sys/modules/tcp/Makefile head/sys/netinet/tcp.h head/sys/netinet/tcp_log_buf.h head/sys/netinet/tcp_output.c head/sys/netinet/tcp_stacks/fastpath.c head/sys/netinet/tcp_timer.c head/sys/netinet/tcp_timer.h head/sys/netinet/tcp_var.h head/sys/sys/mbuf.h head/sys/sys/queue.h head/sys/sys/sockbuf.h head/sys/sys/time.h Modified: head/sys/kern/uipc_sockbuf.c == --- head/sys/kern/uipc_sockbuf.cThu Jun 7 18:06:01 2018 (r334803) +++ head/sys/kern/uipc_sockbuf.cThu Jun 7 18:18:13 2018 (r334804) @@ -1283,6 +1283,55 @@ sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_i return (ret); } +struct mbuf * +sbsndptr_noadv(struct sockbuf *sb, uint32_t off, uint32_t *moff) +{ + struct mbuf *m; + + KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__)); + if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) { + *moff = off; + if (sb->sb_sndptr == NULL) { + sb->sb_sndptr = sb->sb_mb; + sb->sb_sndptroff = 0; + } + return (sb->sb_mb); + } else { + m = sb->sb_sndptr; + off -= sb->sb_sndptroff; + } + *moff = off; + return (m); +} + +void +sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, uint32_t len) +{ + /* +* A small copy was done, advance forward the sb_sbsndptr to cover +* it. +*/ + struct mbuf *m; + + if (mb != sb->sb_sndptr) { + /* Did not copyout at the same mbuf */ + return; + } + m = mb; + while (m && (len > 0)) { + if (len >= m->m_len) { + len -= m->m_len; + if (m->m_next) { + sb->sb_sndptroff += m->m_len; + sb->sb_sndptr = m->m_next; + } + m = m->m_next; + } else { + len = 0; + } + } +} + /* * Return the first mbuf and the mbuf data offset for the provided * send offset without changing the "sb_sndptroff" field. Modified: head/sys/modules/tcp/Makefile == --- head/sys/modules/tcp/Makefile Thu Jun 7 18:06:01 2018 (r334803) +++ head/sys/modules/tcp/Makefile Thu Jun 7 18:18:13 2018 (r334804) @@ -7,10 +7,12 @@ SYSDIR?=${SRCTOP}/sys SUBDIR=\ ${_tcp_fastpath} \ +${_tcp_rack} \ ${_tcpmd5} \ .if ${MK_EXTRA_TCP_STACKS} != "no" || defined(ALL_MODULES) _tcp_fastpath= fastpath +_tcp_rack= rack .endif .if (${MK_INET_SUPPORT} != "no" || ${MK_INET6_SUPPORT} != "no") || \ Added: head/sys/modules/tcp/rack/Makefile == --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/sys/modules/tcp/rack/Makefile Thu Jun 7 18:18:13 2018 (r334804) @@ -0,0 +1,24 @@ +# +# $FreeBSD$ +# + +.PATH: ${.CURDIR}/../../../netinet/tcp_stacks + +STACKNAME=
svn commit: r333041 - head/sys/netinet
Author: rrs Date: Thu Apr 26 21:41:16 2018 New Revision: 333041 URL: https://svnweb.freebsd.org/changeset/base/333041 Log: This change re-arranges the fields within the tcp-pcb so that they are more in order of cache line use as one passes through the tcp_input/output paths (non-errors most likely path). This helps speed up cache line optimization so that the tcp stack runs a bit more efficently. Sponsored by: Netflix Inc. Differential Revision:https://reviews.freebsd.org/D15136 Modified: head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_var.h == --- head/sys/netinet/tcp_var.h Thu Apr 26 21:40:05 2018(r333040) +++ head/sys/netinet/tcp_var.h Thu Apr 26 21:41:16 2018(r333041) @@ -83,125 +83,123 @@ STAILQ_HEAD(tcp_log_stailq, tcp_log_mem); /* * Tcp control block, one per tcp; fields: - * Organized for 16 byte cacheline efficiency. + * Organized for 64 byte cacheline efficiency based + * on common tcp_input/tcp_output processing. */ struct tcpcb { - struct tsegqe_head t_segq; /* segment reassembly queue */ - int t_segqlen; /* segment reassembly queue length */ - int t_dupacks; /* consecutive dup acks recd */ - - struct mbuf *t_in_pkt; /* head of the input packet queue for the tcp_hpts system */ - struct mbuf *t_tail_pkt; /* tail of the input packet queue for the tcp_hpts system */ - struct tcp_timer *t_timers; /* All the TCP timers in one struct */ - + /* Cache line 1 */ struct inpcb *t_inpcb; /* back pointer to internet pcb */ - int t_state;/* state of this connection */ + struct tcp_function_block *t_fb;/* TCP function call block */ + void*t_fb_ptr; /* Pointer to t_fb specific data */ + uint32_t t_maxseg:24, /* maximum segment size */ + t_logstate:8; /* State of "black box" logging */ + uint32_t t_state:4, /* state of this connection */ + bits_spare : 24; u_int t_flags; - - struct vnet *t_vnet; /* back pointer to parent vnet */ - tcp_seq snd_una;/* sent but unacknowledged */ tcp_seq snd_max;/* highest sequence number sent; * used to recognize retransmits */ tcp_seq snd_nxt;/* send next */ tcp_seq snd_up; /* send urgent pointer */ - - tcp_seq snd_wl1;/* window update seg seq number */ - tcp_seq snd_wl2;/* window update seg ack number */ - tcp_seq iss;/* initial send sequence number */ - tcp_seq irs;/* initial receive sequence number */ - + uint32_t snd_wnd; /* send window */ + uint32_t snd_cwnd; /* congestion-controlled window */ + uint32_t cl1_spare; /* Spare to round out CL 1 */ + /* Cache line 2 */ + u_int32_t ts_offset; /* our timestamp offset */ + u_int32_t rfbuf_ts; /* recv buffer autoscaling timestamp */ + int rcv_numsacks; /* # distinct sack blks present */ + u_int t_tsomax; /* TSO total burst length limit in bytes */ + u_int t_tsomaxsegcount; /* TSO maximum segment count */ + u_int t_tsomaxsegsize;/* TSO maximum segment size in bytes */ tcp_seq rcv_nxt;/* receive next */ tcp_seq rcv_adv;/* advertised window */ uint32_t rcv_wnd; /* receive window */ + u_int t_flags2; /* More tcpcb flags storage */ + int t_srtt; /* smoothed round-trip time */ + int t_rttvar; /* variance in round-trip time */ + u_int32_t ts_recent; /* timestamp echo data */ + u_char snd_scale; /* window scaling for send window */ + u_char rcv_scale; /* window scaling for recv window */ + u_char snd_limited;/* segments limited transmitted */ + u_char request_r_scale;/* pending window scaling */ + tcp_seq last_ack_sent; + u_int t_rcvtime; /* inactivity time */ + /* Cache line 3 */ tcp_seq rcv_up; /* receive urgent pointer */ - - uint32_t snd_wnd; /* send window */ - uint32_t snd_cwnd; /* congestion-controlled window */ + int t_segqlen; /* segment reassembly queue length */ + struct tsegqe_head t_segq; /* segment reassembly queue */ + struct mbuf *t_in_pkt; + struct mbuf *t_tail_pkt; + struct tcp_timer *t_timers;
svn commit: r332774 - head/sys/netinet
Author: rrs Date: Thu Apr 19 15:03:48 2018 New Revision: 332774 URL: https://svnweb.freebsd.org/changeset/base/332774 Log: These two modules need the tcp_hpts.h file for when the option is enabled (not sure how LINT/build-universe missed this) opps. Sponsored by: Netflix Inc Modified: head/sys/netinet/tcp_subr.c head/sys/netinet/tcp_usrreq.c Modified: head/sys/netinet/tcp_subr.c == --- head/sys/netinet/tcp_subr.c Thu Apr 19 15:02:53 2018(r332773) +++ head/sys/netinet/tcp_subr.c Thu Apr 19 15:03:48 2018(r332774) @@ -99,6 +99,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #ifdef INET6 #include Modified: head/sys/netinet/tcp_usrreq.c == --- head/sys/netinet/tcp_usrreq.c Thu Apr 19 15:02:53 2018 (r332773) +++ head/sys/netinet/tcp_usrreq.c Thu Apr 19 15:03:48 2018 (r332774) @@ -94,6 +94,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #ifdef TCPPCAP #include #endif ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r332770 - in head/sys: conf netinet netinet/tcp_stacks sys
Author: rrs Date: Thu Apr 19 13:37:59 2018 New Revision: 332770 URL: https://svnweb.freebsd.org/changeset/base/332770 Log: This commit brings in the TCP high precision timer system (tcp_hpts). It is the forerunner/foundational work of bringing in both Rack and BBR which use hpts for pacing out packets. The feature is optional and requires the TCPHPTS option to be enabled before the feature will be active. TCP modules that use it must assure that the base component is compile in the kernel in which they are loaded. MFC after:Never Sponsored by: Netflix Inc. Differential Revision:https://reviews.freebsd.org/D15020 Added: head/sys/netinet/tcp_hpts.c (contents, props changed) head/sys/netinet/tcp_hpts.h (contents, props changed) head/sys/sys/kern_prefetch.h (contents, props changed) Modified: head/sys/conf/files head/sys/conf/options head/sys/netinet/in_pcb.c head/sys/netinet/in_pcb.h head/sys/netinet/tcp_stacks/fastpath.c head/sys/netinet/tcp_subr.c head/sys/netinet/tcp_syncache.c head/sys/netinet/tcp_usrreq.c head/sys/netinet/tcp_var.h head/sys/sys/mbuf.h Modified: head/sys/conf/files == --- head/sys/conf/files Thu Apr 19 12:50:49 2018(r332769) +++ head/sys/conf/files Thu Apr 19 13:37:59 2018(r332770) @@ -4355,6 +4355,7 @@ netinet/tcp_log_buf.c optional tcp_blackbox inet | tc netinet/tcp_lro.c optional inet | inet6 netinet/tcp_output.c optional inet | inet6 netinet/tcp_offload.c optional tcp_offload inet | tcp_offload inet6 +netinet/tcp_hpts.c optional tcphpts inet | tcphpts inet6 netinet/tcp_pcap.c optional inet tcppcap | inet6 tcppcap netinet/tcp_reass.coptional inet | inet6 netinet/tcp_sack.c optional inet | inet6 Modified: head/sys/conf/options == --- head/sys/conf/options Thu Apr 19 12:50:49 2018(r332769) +++ head/sys/conf/options Thu Apr 19 13:37:59 2018(r332770) @@ -218,6 +218,7 @@ SYSVMSG opt_sysvipc.h SYSVSEMopt_sysvipc.h SYSVSHMopt_sysvipc.h SW_WATCHDOGopt_watchdog.h +TCPHPTS opt_inet.h TURNSTILE_PROFILING UMTX_PROFILING UMTX_CHAINSopt_global.h Modified: head/sys/netinet/in_pcb.c == --- head/sys/netinet/in_pcb.c Thu Apr 19 12:50:49 2018(r332769) +++ head/sys/netinet/in_pcb.c Thu Apr 19 13:37:59 2018(r332770) @@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -87,6 +88,9 @@ __FBSDID("$FreeBSD$"); #include #include #include +#ifdef TCPHPTS +#include +#endif #include #include #endif @@ -1224,9 +1228,28 @@ in_pcbrele_rlocked(struct inpcb *inp) } return (0); } - + KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__)); - +#ifdef TCPHPTS + if (inp->inp_in_hpts || inp->inp_in_input) { + struct tcp_hpts_entry *hpts; + /* +* We should not be on the hpts at +* this point in any form. we must +* get the lock to be sure. +*/ + hpts = tcp_hpts_lock(inp); + if (inp->inp_in_hpts) + panic("Hpts:%p inp:%p at free still on hpts", + hpts, inp); + mtx_unlock(>p_mtx); + hpts = tcp_input_lock(inp); + if (inp->inp_in_input) + panic("Hpts:%p inp:%p at free still on input hpts", + hpts, inp); + mtx_unlock(>p_mtx); + } +#endif INP_RUNLOCK(inp); pcbinfo = inp->inp_pcbinfo; uma_zfree(pcbinfo->ipi_zone, inp); @@ -1255,7 +1278,26 @@ in_pcbrele_wlocked(struct inpcb *inp) } KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__)); - +#ifdef TCPHPTS + if (inp->inp_in_hpts || inp->inp_in_input) { + struct tcp_hpts_entry *hpts; + /* +* We should not be on the hpts at +* this point in any form. we must +* get the lock to be sure. +*/ + hpts = tcp_hpts_lock(inp); + if (inp->inp_in_hpts) + panic("Hpts:%p inp:%p at free still on hpts", + hpts, inp); + mtx_unlock(>p_mtx); + hpts = tcp_input_lock(inp); + if (inp->inp_in_input) + panic("Hpts:%p inp:%p at free still on input hpts", + hpts, inp); + mtx_unlock(>p_mtx); + } +#endif
svn commit: r304224 - head/sys/netinet
Author: rrs Date: Tue Aug 16 15:17:36 2016 New Revision: 304224 URL: https://svnweb.freebsd.org/changeset/base/304224 Log: A few more wording tweaks as suggested (with some modifications as well) by Ravi Pokala. Thanks for the comments :-) Sponsored by: Netflix Inc. Modified: head/sys/netinet/tcp_timer.c Modified: head/sys/netinet/tcp_timer.c == --- head/sys/netinet/tcp_timer.cTue Aug 16 15:11:46 2016 (r304223) +++ head/sys/netinet/tcp_timer.cTue Aug 16 15:17:36 2016 (r304224) @@ -307,15 +307,15 @@ tcp_timer_delack(void *xtp) * should only have grabbed the INP_WLOCK() when * it entered. To safely switch to holding both the * INP_INFO_RLOCK() and the INP_WLOCK() we must first - * grab a reference on the inp, this will hold the inp - * so that it can't be removed. We then unlock and grab - * the info-read lock. Once we have the INP_INFO_RLOCK() we - * proceed again to get the INP_WLOCK() but after that - * we must check if someone else deleted the pcb i.e. - * the inp_flags check.If so we return 1 otherwise - * we return 0. + * grab a reference on the inp, which will hold the inp + * so that it can't be removed. We then unlock the INP_WLOCK(), + * and grab the INP_INFO_RLOCK() lock. Once we have the INP_INFO_RLOCK() + * we proceed again to get the INP_WLOCK() (this preserves proper + * lock order). After acquiring the INP_WLOCK we must check if someone + * else deleted the pcb i.e. the inp_flags check. + * If so we return 1 otherwise we return 0. * - * No matter which the tcp_inpinfo_lock_add() function + * No matter what the tcp_inpinfo_lock_add() function * returns the caller must afterwards call tcp_inpinfo_lock_del() * to drop the locks and reference properly. */ ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r304223 - in head: share/man/man4 share/man/man9 sys/netinet
Author: rrs Date: Tue Aug 16 15:11:46 2016 New Revision: 304223 URL: https://svnweb.freebsd.org/changeset/base/304223 Log: Here we update the modular tcp to be able to switch to an alternate TCP stack in other then the closed state (pre-listen/connect). The idea is that *if* that is supported by the alternate stack, it is asked if its ok to switch. If it approves the "handoff" then we allow the switch to happen. Also the fini() function now gets a flag to tell if you are switching away *or* the tcb is destroyed. The init() call into the alternate stack is moved to the end so the tcb is more fully formed before the init transpires. Sponsored by: Netflix Inc. Differential Revision:D6790 Modified: head/share/man/man4/tcp.4 head/share/man/man9/tcp_functions.9 head/sys/netinet/tcp_subr.c head/sys/netinet/tcp_syncache.c head/sys/netinet/tcp_usrreq.c head/sys/netinet/tcp_var.h Modified: head/share/man/man4/tcp.4 == --- head/share/man/man4/tcp.4 Tue Aug 16 14:33:25 2016(r304222) +++ head/share/man/man4/tcp.4 Tue Aug 16 15:11:46 2016(r304223) @@ -633,7 +633,8 @@ when trying to use a TCP function block .Xr mod_cc 4 , .Xr siftr 4 , .Xr syncache 4 , -.Xr setkey 8 +.Xr setkey 8 , +.Xr tcp_functions 9 .Rs .%A "V. Jacobson" .%A "R. Braden" Modified: head/share/man/man9/tcp_functions.9 == --- head/share/man/man9/tcp_functions.9 Tue Aug 16 14:33:25 2016 (r304222) +++ head/share/man/man9/tcp_functions.9 Tue Aug 16 15:11:46 2016 (r304223) @@ -114,14 +114,17 @@ struct tcp_function_block { struct inpcb *inp, struct tcpcb *tp); /* Optional memory allocation/free routine */ void(*tfb_tcp_fb_init)(struct tcpcb *); - void(*tfb_tcp_fb_fini)(struct tcpcb *); + void(*tfb_tcp_fb_fini)(struct tcpcb *, int); /* Optional timers, must define all if you define one */ int (*tfb_tcp_timer_stop_all)(struct tcpcb *); void(*tfb_tcp_timer_activate)(struct tcpcb *, uint32_t, u_int); int (*tfb_tcp_timer_active)(struct tcpcb *, uint32_t); void(*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t); + /* Optional functions */ void(*tfb_tcp_rexmit_tmr)(struct tcpcb *); + void(*tfb_tcp_handoff_ok)(struct tcpcb *); + /* System use */ volatile uint32_t tfb_refcnt; uint32_t tfb_flags; }; @@ -157,6 +160,16 @@ in the .Va tfb_tcp_fb_fini field. .Pp +If the +.Va tfb_tcp_fb_fini +argument is non-NULL, the function to which it points is called when the +kernel is destroying the TCP control block or when the socket is transitioning +to use a different TCP stack. +The function is called with arguments of the TCP control block and an integer +flag. +The flag will be zero if the socket is transitioning to use another TCP stack +or one if the TCP control block is being destroyed. +.Pp If the TCP stack implements additional timers, the TCP stack should set a non-NULL pointer in the .Va tfb_tcp_timer_stop_all , @@ -193,6 +206,37 @@ However, care must be taken to ensure th TCP control block in a valid state for the remainder of the retransmit timer logic. .Pp +A user may select a new TCP stack before calling +.Xr connect 2 +or +.Xr listen 2 . +Optionally, a TCP stack may also allow a user to begin using the TCP stack for +a connection that is in a later state by setting a non-NULL function pointer in +the +.Va tfb_tcp_handoff_ok +field. +If this field is non-NULL and a user attempts to select that TCP stack after +calling +.Xr connect 2 +or +.Xr listen 2 +for that socket, the kernel will call the function pointed to by the +.Va tfb_tcp_handoff_ok +field. +The function should return 0 if the user is allowed to switch the socket to use +the TCP stack. Otherwise, the function should return an error code, which will +be returned to the user. +If the +.Va tfb_tcp_handoff_ok +field is +.Dv NULL +and a user attempts to select the TCP stack after calling +.Xr connect 2 +or +.Xr listen 2 +for that socket, the operation will fail and the kernel will return +.Er EINVAL . +.Pp The .Va tfb_refcnt and @@ -269,8 +313,10 @@ The .Fa blk argument references a function block that is not currently registered. .Sh SEE ALSO -.Xr malloc 9 , -.Xr tcp 4 +.Xr connect 2 , +.Xr listen 2 , +.Xr tcp 4 , +.Xr malloc 9 .Sh HISTORY This framework first appeared in .Fx 11.0 . Modified: head/sys/netinet/tcp_subr.c == --- head/sys/netinet/tcp_subr.c Tue Aug 16 14:33:25 2016(r304222) +++ head/sys/netinet/tcp_subr.c Tue Aug 16 15:11:46 2016(r304223) @@ -1187,9 +1187,6 @@ tcp_newtcpcb(struct inpcb *inp) tp->t_fb = tcp_func_set_ptr;
Re: svn commit: r304218 - head/sys/netinet
In theory it *could* be MFC’d to stable-10 and 11 but I am not sure we want to do that. I am told by Drew that it does improve performance since in stable-10 you are getting the INFO_WLOCK() but I am not sure if folks want it MFC’d… One thing that this code leads us towards is we *in theory* could move the lock acquisition to the timer code itself (I think).. we would have to make sure that the callout functions did do the unlock since thats part of the lock-dance with reference… but its theoretically possible :-) R > On Aug 16, 2016, at 6:18 AM, Slawa Olhovchenkov <s...@zxy.spb.ru> wrote: > > On Tue, Aug 16, 2016 at 12:40:56PM +, Randall Stewart wrote: > >> Author: rrs >> Date: Tue Aug 16 12:40:56 2016 >> New Revision: 304218 >> URL: https://svnweb.freebsd.org/changeset/base/304218 >> >> Log: >> This cleans up the timer code in TCP and also makes it so we do not >> take the INFO lock *unless* we are really going to delete the TCB. >> >> Differential Revision: D7136 > > Is this related to stable/10? Randall Stewart r...@netflix.com 803-317-4952 ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r304218 - head/sys/netinet
Hans: Take a look at the comments maybe they will help you understand whats going on. The idea of it is that you *only* need the INFO_RLOCK when the timer function wants to destroy the tcb (not all timers do this).. and yes usually the timer function is going to call the drop/close path to purge the TCB. So in order to pick-up the info lock you do the refcnt/lock-dance to get both locks in the proper lock order. This means that someone could possibly come in and purge the tcb on you while you are in the process of doing the lock-dance. If that occurs (the return code is 1) all the caller has to do is call the drop-lock function (the mate to the add_lock) and then return (since the pcb is in the state the caller wants.. i.e. gone). If the return code is 0, the caller can proceed to purge the tcb.. and then call the drop_lock function. Note that in theory this could be used outside of wanting to kill the tcb.. but I am not sure why one would want to hold the INFO_RLOCK if one did not want to purge the tcb. R > On Aug 16, 2016, at 6:14 AM, Hans Petter Selasky <h...@selasky.org> wrote: > > On 08/16/16 15:01, Randall Stewart wrote: >> Sure >> >> Let me add some comments for you. The idea her is that you pick-up a >> reference >> to the PCB.. so it can’t be removed. Thus when you re-lock the INP you check >> the >> dropped flag (just in case someone did get in). > > And this code is only used before tcp_close() / tcp_drop(), so if others got > in it is safe to assume that the inp is dead? > > --HPS Randall Stewart r...@netflix.com 803-317-4952 ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r304219 - head/sys/netinet
Author: rrs Date: Tue Aug 16 13:08:03 2016 New Revision: 304219 URL: https://svnweb.freebsd.org/changeset/base/304219 Log: Comments describing how to properly use the new lock_add functions and its respective companion. Sponsored by: Netflix Inc. Modified: head/sys/netinet/tcp_timer.c Modified: head/sys/netinet/tcp_timer.c == --- head/sys/netinet/tcp_timer.cTue Aug 16 12:40:56 2016 (r304218) +++ head/sys/netinet/tcp_timer.cTue Aug 16 13:08:03 2016 (r304219) @@ -301,6 +301,25 @@ tcp_timer_delack(void *xtp) CURVNET_RESTORE(); } +/* + * When a timer wants to remove a TCB it must + * hold the INP_INFO_RLOCK(). The timer function + * should only have grabbed the INP_WLOCK() when + * it entered. To safely switch to holding both the + * INP_INFO_RLOCK() and the INP_WLOCK() we must first + * grab a reference on the inp, this will hold the inp + * so that it can't be removed. We then unlock and grab + * the info-read lock. Once we have the INP_INFO_RLOCK() we + * proceed again to get the INP_WLOCK() but after that + * we must check if someone else deleted the pcb i.e. + * the inp_flags check.If so we return 1 otherwise + * we return 0. + * + * No matter which the tcp_inpinfo_lock_add() function + * returns the caller must afterwards call tcp_inpinfo_lock_del() + * to drop the locks and reference properly. + */ + int tcp_inpinfo_lock_add(struct inpcb *inp) { ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r304218 - head/sys/netinet
Sure Let me add some comments for you. The idea her is that you pick-up a reference to the PCB.. so it can’t be removed. Thus when you re-lock the INP you check the dropped flag (just in case someone did get in). Let me get that in comments.. (note thats also why when using this function you have to use its companion function to drop the reference). > On Aug 16, 2016, at 5:58 AM, Hans Petter Selasky <h...@selasky.org> wrote: > > On 08/16/16 14:40, Randall Stewart wrote: >> +int >> +tcp_inpinfo_lock_add(struct inpcb *inp) >> +{ >> +in_pcbref(inp); >> +INP_WUNLOCK(inp); >> +INP_INFO_RLOCK(_tcbinfo); >> +INP_WLOCK(inp); >> +if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { >> +return(1); >> +} >> +return(0); >> + >> +} > > Hi, > > Could you add some comments describing how it is considered safe to drop the > INP write-lock and then pick it up again? > > My first impression is that because you are dropping the inp lock, multiple > threads can enter the code in question, leaving the window open to races? > > --HPS Randall Stewart r...@netflix.com 803-317-4952 ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r304218 - head/sys/netinet
Author: rrs Date: Tue Aug 16 12:40:56 2016 New Revision: 304218 URL: https://svnweb.freebsd.org/changeset/base/304218 Log: This cleans up the timer code in TCP and also makes it so we do not take the INFO lock *unless* we are really going to delete the TCB. Differential Revision:D7136 Modified: head/sys/netinet/tcp_timer.c head/sys/netinet/tcp_timer.h Modified: head/sys/netinet/tcp_timer.c == --- head/sys/netinet/tcp_timer.cTue Aug 16 12:13:12 2016 (r304217) +++ head/sys/netinet/tcp_timer.cTue Aug 16 12:40:56 2016 (r304218) @@ -294,11 +294,6 @@ tcp_timer_delack(void *xtp) CURVNET_RESTORE(); return; } - KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, - ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); - KASSERT((tp->t_timers->tt_flags & TT_DELACK) != 0, - ("%s: tp %p delack callout should be running", __func__, tp)); - tp->t_flags |= TF_ACKNOW; TCPSTAT_INC(tcps_delack); (void) tp->t_fb->tfb_tcp_output(tp); @@ -306,6 +301,39 @@ tcp_timer_delack(void *xtp) CURVNET_RESTORE(); } +int +tcp_inpinfo_lock_add(struct inpcb *inp) +{ + in_pcbref(inp); + INP_WUNLOCK(inp); + INP_INFO_RLOCK(_tcbinfo); + INP_WLOCK(inp); + if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) { + return(1); + } + return(0); + +} + +void +tcp_inpinfo_lock_del(struct inpcb *inp, struct tcpcb *tp) +{ + INP_INFO_RUNLOCK(_tcbinfo); + if (inp && (tp == NULL)) { + /* +* If tcp_close/drop() gets called and tp +* returns NULL, then the function dropped +* the inp lock, we hold a reference keeping +* this around, so we must re-aquire the +* INP_WLOCK() in order to proceed with +* our dropping the inp reference. +*/ + INP_WLOCK(inp); + } + if (inp && in_pcbrele_wlocked(inp) == 0) + INP_WUNLOCK(inp); +} + void tcp_timer_2msl(void *xtp) { @@ -317,7 +345,6 @@ tcp_timer_2msl(void *xtp) ostate = tp->t_state; #endif - INP_INFO_RLOCK(_tcbinfo); inp = tp->t_inpcb; KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); INP_WLOCK(inp); @@ -325,21 +352,17 @@ tcp_timer_2msl(void *xtp) if (callout_pending(>t_timers->tt_2msl) || !callout_active(>t_timers->tt_2msl)) { INP_WUNLOCK(tp->t_inpcb); - INP_INFO_RUNLOCK(_tcbinfo); CURVNET_RESTORE(); return; } callout_deactivate(>t_timers->tt_2msl); if ((inp->inp_flags & INP_DROPPED) != 0) { INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(_tcbinfo); CURVNET_RESTORE(); return; } KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0, ("%s: tp %p tcpcb can't be stopped here", __func__, tp)); - KASSERT((tp->t_timers->tt_flags & TT_2MSL) != 0, - ("%s: tp %p 2msl callout should be running", __func__, tp)); /* * 2 MSL timeout in shutdown went off. If we're closed but * still waiting for peer to close and connection has been idle @@ -355,7 +378,6 @@ tcp_timer_2msl(void *xtp) */ if ((inp->inp_flags & INP_TIMEWAIT) != 0) { INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(_tcbinfo); CURVNET_RESTORE(); return; } @@ -363,15 +385,26 @@ tcp_timer_2msl(void *xtp) tp->t_inpcb && tp->t_inpcb->inp_socket && (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) { TCPSTAT_INC(tcps_finwait2_drops); + if (tcp_inpinfo_lock_add(inp)) { + tcp_inpinfo_lock_del(inp, tp); + goto out; + } tp = tcp_close(tp); + tcp_inpinfo_lock_del(inp, tp); + goto out; } else { if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) { - if (!callout_reset(>t_timers->tt_2msl, - TP_KEEPINTVL(tp), tcp_timer_2msl, tp)) { - tp->t_timers->tt_flags &= ~TT_2MSL_RST; + callout_reset(>t_timers->tt_2msl, + TP_KEEPINTVL(tp), tcp_timer_2msl, tp); + } else { + if (tcp_inpinfo_lock_add(inp)) { + tcp_inpinfo_lock_del(inp, tp); + goto out; } - } else - tp = tcp_close(tp); + tp = tcp_close(tp); + tcp_inpinfo_lock_del(inp, tp); +
svn commit: r303412 - head
Author: rrs Date: Wed Jul 27 20:37:32 2016 New Revision: 303412 URL: https://svnweb.freebsd.org/changeset/base/303412 Log: Remove myself from kern_timeout.c yeah! Modified: head/MAINTAINERS Modified: head/MAINTAINERS == --- head/MAINTAINERSWed Jul 27 20:34:09 2016(r303411) +++ head/MAINTAINERSWed Jul 27 20:37:32 2016(r303412) @@ -37,7 +37,6 @@ subsystem login notes - atffreebsd-testing,jmmv,ngie Pre-commit review requested. ath(4) adrian Pre-commit review requested, send to freebsd-wirel...@freebsd.org -callout_*(9) rrs Pre-commit review requested -- becareful its tricksy code :o. contrib/compiler-rtdim Pre-commit review preferred. contrib/libc++ dim Pre-commit review preferred. contrib/libcxxrt dim Pre-commit review preferred. ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r303037 - head/sys/kern
You are most welcome to backout anything you like.. as far as I am concerned you own the code.. R > On Jul 20, 2016, at 6:35 PM, Gleb Smirnoff <gleb...@freebsd.org> wrote: > > Randall, > > I have just tested and r303037 brings the TCP panic back. I > got two crashes during 2.5 hours. > > In your email [1] you are right, there is regression that (-1) > return value is lost. This problem was worked on in the PR 210884, > and we were very close to commiting the fix. > > The whole 11.0-RELEASE cycle strongly depends on this change. We > don't want to release with TCP panic, and of course we want the > regression described in 210884 to be fixed. > > Your backout mixed with extra code really made things messy. Since > I don't want to go with commit war, on behalf of RE we are asking > for explicit agreement to back out r303037. Then we will proceed with > latest patch from 210884. Is that okay? > > [1] https://lists.freebsd.org/pipermail/svn-src-head/2016-July/089313.html > > On Wed, Jul 20, 2016 at 03:33:37PM +0200, Randall Stewart wrote: > R> Gleb > R> > R> I wish you would have responded earlier.. I am more than glad to hand > R> off all kern_timeout.c to you… please take it commit what you want to > R> it and have it. I hate the code and I dislike having to touch it. > R> > R> Its yours.. I can assure you I will not touch it again. > R> > R> R > R> > On Jul 20, 2016, at 8:53 AM, Gleb Smirnoff <gleb...@freebsd.org> wrote: > R> > > R> > On Tue, Jul 19, 2016 at 06:31:19PM +, Randall Stewart wrote: > R> > R> Author: rrs > R> > R> Date: Tue Jul 19 18:31:19 2016 > R> > R> New Revision: 303037 > R> > R> URL: https://svnweb.freebsd.org/changeset/base/303037 > R> > R> > R> > R> Log: > R> > R> This reverts out Gleb's changes and adds three small > R> > R> fixes that I think closes up the races Gleb was > R> > R> looking for. This is running quite nicely in Netflix and > R> > R> now no longer causes TCP-tcb leaks. > R> > R> > R> > R> Differential Revision: 7135 > R> > > R> > Just to notice that I am completely pissed of by this commit > R> > war, that you started. > R> > > R> > I've been testing my changes properly, I gave people time to > R> > review my changes. You didn't. > R> > > R> > From your explanation in other emails I see that you've been > R> > testing your changes with a version of FreeBSD that is a heavily > R> > modified FreeBSD 10, not 11. > R> > > R> > The new code you mixed with revert of mine, doesn't fix the > R> > problem observed. It fixes another problem that you imagined, > R> > which might exist, but isn't observed. We already discussed that > R> > and you didn't prove it wrong. > R> > > R> > Your change doesn't even revert my change completely. > R> > > R> > -- > R> > Totus tuus, Glebius. > R> > R> > R> Randall Stewart > R> r...@netflix.com > R> 803-317-4952 > R> > R> > R> > R> > R> > R> > > -- > Totus tuus, Glebius. Randall Stewart r...@netflix.com 803-317-4952 ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r303037 - head/sys/kern
Gleb I wish you would have responded earlier.. I am more than glad to hand off all kern_timeout.c to you… please take it commit what you want to it and have it. I hate the code and I dislike having to touch it. Its yours.. I can assure you I will not touch it again. R > On Jul 20, 2016, at 8:53 AM, Gleb Smirnoff <gleb...@freebsd.org> wrote: > > On Tue, Jul 19, 2016 at 06:31:19PM +, Randall Stewart wrote: > R> Author: rrs > R> Date: Tue Jul 19 18:31:19 2016 > R> New Revision: 303037 > R> URL: https://svnweb.freebsd.org/changeset/base/303037 > R> > R> Log: > R> This reverts out Gleb's changes and adds three small > R> fixes that I think closes up the races Gleb was > R> looking for. This is running quite nicely in Netflix and > R> now no longer causes TCP-tcb leaks. > R> > R> Differential Revision: 7135 > > Just to notice that I am completely pissed of by this commit > war, that you started. > > I've been testing my changes properly, I gave people time to > review my changes. You didn't. > > From your explanation in other emails I see that you've been > testing your changes with a version of FreeBSD that is a heavily > modified FreeBSD 10, not 11. > > The new code you mixed with revert of mine, doesn't fix the > problem observed. It fixes another problem that you imagined, > which might exist, but isn't observed. We already discussed that > and you didn't prove it wrong. > > Your change doesn't even revert my change completely. > > -- > Totus tuus, Glebius. Randall Stewart r...@netflix.com 803-317-4952 ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r303037 - head/sys/kern
Author: rrs Date: Tue Jul 19 18:31:19 2016 New Revision: 303037 URL: https://svnweb.freebsd.org/changeset/base/303037 Log: This reverts out Gleb's changes and adds three small fixes that I think closes up the races Gleb was looking for. This is running quite nicely in Netflix and now no longer causes TCP-tcb leaks. Differential Revision:7135 Modified: head/sys/kern/kern_timeout.c Modified: head/sys/kern/kern_timeout.c == --- head/sys/kern/kern_timeout.cTue Jul 19 18:15:22 2016 (r303036) +++ head/sys/kern/kern_timeout.cTue Jul 19 18:31:19 2016 (r303037) @@ -1050,7 +1050,7 @@ callout_reset_sbt_on(struct callout *c, */ if (c->c_lock != NULL && !cc_exec_cancel(cc, direct)) cancelled = cc_exec_cancel(cc, direct) = true; - if (cc_exec_waiting(cc, direct)) { + if (cc_exec_waiting(cc, direct) || cc_exec_drain(cc, direct)) { /* * Someone has called callout_drain to kill this * callout. Don't reschedule. @@ -1166,7 +1166,7 @@ _callout_stop_safe(struct callout *c, in struct callout_cpu *cc, *old_cc; struct lock_class *class; int direct, sq_locked, use_lock; - int cancelled, not_on_a_list; + int not_on_a_list; if ((flags & CS_DRAIN) != 0) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, c->c_lock, @@ -1234,17 +1234,47 @@ again: panic("migration should not happen"); #endif } - + if ((drain != NULL) && (c->c_iflags & CALLOUT_PENDING) && + (cc_exec_curr(cc, direct) != c)) { + /* +* This callout is executing and we are draining. +* The only way this can happen is if its also +* been rescheduled to run on one thread *and* asked to drain +* on this thread (at the same time it is waiting to execute). +*/ + if ((c->c_iflags & CALLOUT_PROCESSED) == 0) { + if (cc_exec_next(cc) == c) + cc_exec_next(cc) = LIST_NEXT(c, c_links.le); + LIST_REMOVE(c, c_links.le); + } else { + TAILQ_REMOVE(>cc_expireq, c, c_links.tqe); + } + c->c_iflags &= ~CALLOUT_PENDING; + c->c_flags &= ~CALLOUT_ACTIVE; + } /* -* If the callout is running, try to stop it or drain it. +* If the callout isn't pending, it's not on the queue, so +* don't attempt to remove it from the queue. We can try to +* stop it by other means however. */ - if (cc_exec_curr(cc, direct) == c) { + if (!(c->c_iflags & CALLOUT_PENDING)) { /* -* Succeed we to stop it or not, we must clear the -* active flag - this is what API users expect. +* If it wasn't on the queue and it isn't the current +* callout, then we can't stop it, so just bail. +* It probably has already been run (if locking +* is properly done). You could get here if the caller +* calls stop twice in a row for example. The second +* call would fall here without CALLOUT_ACTIVE set. */ c->c_flags &= ~CALLOUT_ACTIVE; - + if (cc_exec_curr(cc, direct) != c) { + CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", + c, c->c_func, c->c_arg); + CC_UNLOCK(cc); + if (sq_locked) + sleepq_release(_exec_waiting(cc, direct)); + return (-1); + } if ((flags & CS_DRAIN) != 0) { /* * The current callout is running (or just @@ -1278,7 +1308,6 @@ again: old_cc = cc; goto again; } - /* * Migration could be cancelled here, but * as long as it is still not sure when it @@ -1362,6 +1391,8 @@ again: cc_exec_drain(cc, direct) = drain; } CC_UNLOCK(cc); + if (drain) + return(0); return ((flags & CS_EXECUTING) != 0); } CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", @@ -1369,20 +1400,12 @@ again: if (drain) { cc_exec_drain(cc, direct) = drain; } - KASSERT(!sq_locked,
Re: svn commit: r302998 - head/sys/kern
Well The code itself I had up on machines for probably about 2 months. But then I switched over to Gleb’s changes here just recently .. which caused me all kinds of fun :) I had to go back into Mercurial to pull back my changes.. I have had the resurrected changes running on my netflix machines for about 20 or so hours generating about anywhere from 14Gbps to 32Gbps depending on the machine type. I plan on waiting until tomorrow to sync it down into the NF code base. Note that if you do decide instead to roll back to the 10.x kern_timeout.c you will need to roll back a bunch of tcp changes as well that use the new async_drain() interface. I am game either way for you to proceed.. I will commit this current code to head as long as I hear no objections (from Gleb or others)…. R > On Jul 19, 2016, at 3:56 PM, Glen Barber <g...@freebsd.org> wrote: > > On Tue, Jul 19, 2016 at 03:46:54PM +0200, Randall Stewart wrote: >> Glen: >> >> My changes work.. I have them running in NF in at least 1/2 dozen machines. >> > > For how long? What are the uptimes on these machines? > > This is the blocker for 11.0-BETA2, and I don't want to see more > regressions being introduced at this point of the cycle. > > Glen > >> I am more than willing to commit them.. they actually are not much different >> than >> whats in stable 10.. though I don’t know if the async-drain was MFC’d >> there.. it >> needs to be in for TCP.. or else you will have yet another mess in that >> respect (TCP depends on ASYNC-drain). >> >> I can commit what I have.. if you like.. or not.. I really don’t care (I >> hate kern_timeout.c :-o) >> >> R >>> On Jul 19, 2016, at 2:25 PM, Glen Barber <g...@freebsd.org> wrote: >>> >>> On Tue, Jul 19, 2016 at 01:43:16PM +0200, Randall Stewart wrote: >>>> Gleb >>>> >>>> Ok >>>> >>>> I have now updated >>>> >>>> https://reviews.freebsd.org/D7135 >>>> >>>> You can take this or not… I really don’t care either way… (you are welcome >>>> to >>>> own the kern_timeout.c code I hate it) :-) >>>> >>>> Basically when you went off and re-factored kern_timeout.c I had worked in >>>> parallel on fixing >>>> the bugs you were seeing.. There were three distinct problems that I >>>> fixed… but then >>>> you had refactored the stop() routine.. and I thought ok.. thats fine. I >>>> had actually thought about >>>> doing something similar to what you did and was too chicken to poke that >>>> much at it.. it has >>>> always had a nasty habit of biting back when you make a lot of changes :-D >>>> >>>> I know my version has worked for quite some time in my testing so I >>>> brought it back. >>>> Complete with its 3 return codes (I only recently switched to your version >>>> and thus >>>> started having difficulties with leaks and crashes)…. >>>> >>>> You are welcome not to use this.. I know it works (it ran >>>> on a number of machines at NF last night.. and we will of course continue >>>> testing >>>> it as we finish our dev testing for the upcoming OCA software release).. >>>> For now >>>> this is what will be going out into the OCA’s at least :-) >>>> >>> >>> I'm honestly done with this topic, and at the point now where I'm >>> considering backing out all changes to callout(9) and related changes to >>> the state they were at in stable/10. >>> >>> This changes the KBI, and if it needs to be done, it needs to happen >>> now. We cannot wait for RC1 phase for this, and the amount of churn to >>> get things into a working state with the current implementation far >>> outweighs the benefit of the dangers. >>> >>> Glen >>> >> >> >> Randall Stewart >> r...@netflix.com >> 803-317-4952 >> >> >> >> >> Randall Stewart r...@netflix.com 803-317-4952 ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r302998 - head/sys/kern
Glen: My changes work.. I have them running in NF in at least 1/2 dozen machines. I am more than willing to commit them.. they actually are not much different than whats in stable 10.. though I don’t know if the async-drain was MFC’d there.. it needs to be in for TCP.. or else you will have yet another mess in that respect (TCP depends on ASYNC-drain). I can commit what I have.. if you like.. or not.. I really don’t care (I hate kern_timeout.c :-o) R > On Jul 19, 2016, at 2:25 PM, Glen Barber <g...@freebsd.org> wrote: > > On Tue, Jul 19, 2016 at 01:43:16PM +0200, Randall Stewart wrote: >> Gleb >> >> Ok >> >> I have now updated >> >> https://reviews.freebsd.org/D7135 >> >> You can take this or not… I really don’t care either way… (you are welcome to >> own the kern_timeout.c code I hate it) :-) >> >> Basically when you went off and re-factored kern_timeout.c I had worked in >> parallel on fixing >> the bugs you were seeing.. There were three distinct problems that I fixed… >> but then >> you had refactored the stop() routine.. and I thought ok.. thats fine. I had >> actually thought about >> doing something similar to what you did and was too chicken to poke that >> much at it.. it has >> always had a nasty habit of biting back when you make a lot of changes :-D >> >> I know my version has worked for quite some time in my testing so I brought >> it back. >> Complete with its 3 return codes (I only recently switched to your version >> and thus >> started having difficulties with leaks and crashes)…. >> >> You are welcome not to use this.. I know it works (it ran >> on a number of machines at NF last night.. and we will of course continue >> testing >> it as we finish our dev testing for the upcoming OCA software release).. For >> now >> this is what will be going out into the OCA’s at least :-) >> > > I'm honestly done with this topic, and at the point now where I'm > considering backing out all changes to callout(9) and related changes to > the state they were at in stable/10. > > This changes the KBI, and if it needs to be done, it needs to happen > now. We cannot wait for RC1 phase for this, and the amount of churn to > get things into a working state with the current implementation far > outweighs the benefit of the dangers. > > Glen > Randall Stewart r...@netflix.com 803-317-4952 ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r302998 - head/sys/kern
Gleb Ok I have now updated https://reviews.freebsd.org/D7135 You can take this or not… I really don’t care either way… (you are welcome to own the kern_timeout.c code I hate it) :-) Basically when you went off and re-factored kern_timeout.c I had worked in parallel on fixing the bugs you were seeing.. There were three distinct problems that I fixed… but then you had refactored the stop() routine.. and I thought ok.. thats fine. I had actually thought about doing something similar to what you did and was too chicken to poke that much at it.. it has always had a nasty habit of biting back when you make a lot of changes :-D I know my version has worked for quite some time in my testing so I brought it back. Complete with its 3 return codes (I only recently switched to your version and thus started having difficulties with leaks and crashes)…. You are welcome not to use this.. I know it works (it ran on a number of machines at NF last night.. and we will of course continue testing it as we finish our dev testing for the upcoming OCA software release).. For now this is what will be going out into the OCA’s at least :-) R > On Jul 18, 2016, at 6:19 PM, Randall Stewart <r...@netflix.com> wrote: > > I have worked out a fix of this in Netflix code base (I have the same code > running there). I > will get that tested tonight I will get the fixes in to restore the behavior. > > I will setup a phabricator shortly.. most likely I will update the one I > already > have on the one problem your earlier patch did not fix. > > R >> On Jul 18, 2016, at 5:44 PM, Randall Stewart <r...@netflix.com> wrote: >> >> Gleb: >> >> This now leaks TCP-PCB’s since you have broken the return codes with all your >> fixes that used to be in here. >> >> It was >> >> return 1 — You stopped the callout >> return 0 — The callout could not be stopped >> return -1 — The callout was not running. >> >> The LLRef code that was crashing in in.c depended on this to know to free >> the memory.. i.e. if was > 0 then they needed to free the memory. >> >> TCP depends on a return 0 to indicate the async-drain function will be >> called back and >> thus increments a refcnt and waits for the callback. >> >> You now return 0 when no timer was active.. which makes the stack then wait >> for the not forth coming async-drain call. >> >> R >>> On Jul 18, 2016, at 11:29 AM, Gleb Smirnoff <gleb...@freebsd.org> wrote: >>> >>> Author: glebius >>> Date: Mon Jul 18 09:29:08 2016 >>> New Revision: 302998 >>> URL: https://svnweb.freebsd.org/changeset/base/302998 >>> >>> Log: >>> Revert the last commit. It must get more review and testing first. >>> >>> Modified: >>> head/sys/kern/kern_timeout.c >>> >>> Modified: head/sys/kern/kern_timeout.c >>> == >>> --- head/sys/kern/kern_timeout.cMon Jul 18 09:26:06 2016 >>> (r302997) >>> +++ head/sys/kern/kern_timeout.cMon Jul 18 09:29:08 2016 >>> (r302998) >>> @@ -1381,7 +1381,7 @@ again: >>> CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", >>> c, c->c_func, c->c_arg); >>> CC_UNLOCK(cc); >>> - return (-1); >>> + return (0); >>> } >>> >>> c->c_iflags &= ~CALLOUT_PENDING; >>> >> >> >> Randall Stewart >> r...@netflix.com >> 803-317-4952 >> >> >> >> >> > > > Randall Stewart > r...@netflix.com > 803-317-4952 > > > > > Randall Stewart r...@netflix.com 803-317-4952 ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r302998 - head/sys/kern
I have worked out a fix of this in Netflix code base (I have the same code running there). I will get that tested tonight I will get the fixes in to restore the behavior. I will setup a phabricator shortly.. most likely I will update the one I already have on the one problem your earlier patch did not fix. R > On Jul 18, 2016, at 5:44 PM, Randall Stewart <r...@netflix.com> wrote: > > Gleb: > > This now leaks TCP-PCB’s since you have broken the return codes with all your > fixes that used to be in here. > > It was > > return 1 — You stopped the callout > return 0 — The callout could not be stopped > return -1 — The callout was not running. > > The LLRef code that was crashing in in.c depended on this to know to free > the memory.. i.e. if was > 0 then they needed to free the memory. > > TCP depends on a return 0 to indicate the async-drain function will be called > back and > thus increments a refcnt and waits for the callback. > > You now return 0 when no timer was active.. which makes the stack then wait > for the not forth coming async-drain call. > > R >> On Jul 18, 2016, at 11:29 AM, Gleb Smirnoff <gleb...@freebsd.org> wrote: >> >> Author: glebius >> Date: Mon Jul 18 09:29:08 2016 >> New Revision: 302998 >> URL: https://svnweb.freebsd.org/changeset/base/302998 >> >> Log: >> Revert the last commit. It must get more review and testing first. >> >> Modified: >> head/sys/kern/kern_timeout.c >> >> Modified: head/sys/kern/kern_timeout.c >> == >> --- head/sys/kern/kern_timeout.c Mon Jul 18 09:26:06 2016 >> (r302997) >> +++ head/sys/kern/kern_timeout.c Mon Jul 18 09:29:08 2016 >> (r302998) >> @@ -1381,7 +1381,7 @@ again: >> CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", >> c, c->c_func, c->c_arg); >> CC_UNLOCK(cc); >> -return (-1); >> +return (0); >> } >> >> c->c_iflags &= ~CALLOUT_PENDING; >> > > > Randall Stewart > r...@netflix.com > 803-317-4952 > > > > > Randall Stewart r...@netflix.com 803-317-4952 ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r302998 - head/sys/kern
Gleb: This now leaks TCP-PCB’s since you have broken the return codes with all your fixes that used to be in here. It was return 1 — You stopped the callout return 0 — The callout could not be stopped return -1 — The callout was not running. The LLRef code that was crashing in in.c depended on this to know to free the memory.. i.e. if was > 0 then they needed to free the memory. TCP depends on a return 0 to indicate the async-drain function will be called back and thus increments a refcnt and waits for the callback. You now return 0 when no timer was active.. which makes the stack then wait for the not forth coming async-drain call. R > On Jul 18, 2016, at 11:29 AM, Gleb Smirnoff <gleb...@freebsd.org> wrote: > > Author: glebius > Date: Mon Jul 18 09:29:08 2016 > New Revision: 302998 > URL: https://svnweb.freebsd.org/changeset/base/302998 > > Log: > Revert the last commit. It must get more review and testing first. > > Modified: > head/sys/kern/kern_timeout.c > > Modified: head/sys/kern/kern_timeout.c > == > --- head/sys/kern/kern_timeout.c Mon Jul 18 09:26:06 2016 > (r302997) > +++ head/sys/kern/kern_timeout.c Mon Jul 18 09:29:08 2016 > (r302998) > @@ -1381,7 +1381,7 @@ again: > CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", > c, c->c_func, c->c_arg); > CC_UNLOCK(cc); > - return (-1); > + return (0); > } > > c->c_iflags &= ~CALLOUT_PENDING; > Randall Stewart r...@netflix.com 803-317-4952 ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r300042 - in head/sys/netinet: . tcp_stacks
Author: rrs Date: Tue May 17 09:53:22 2016 New Revision: 300042 URL: https://svnweb.freebsd.org/changeset/base/300042 Log: This small change adopts the excellent suggestion for using named structures in the add of a new tcp-stack that came in late to me via email after the last commit. It also makes it so that a new stack may optionally get a callback during a retransmit timeout. This allows the new stack to clear specific state (think sack scoreboards or other such structures). Sponsored by: Netflix Inc. Differential Revision:http://reviews.freebsd.org/D6303 Modified: head/sys/netinet/tcp_stacks/fastpath.c head/sys/netinet/tcp_timer.c head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_stacks/fastpath.c == --- head/sys/netinet/tcp_stacks/fastpath.c Tue May 17 09:24:54 2016 (r300041) +++ head/sys/netinet/tcp_stacks/fastpath.c Tue May 17 09:53:22 2016 (r300042) @@ -2375,34 +2375,17 @@ tcp_do_segment_fastack(struct mbuf *m, s } struct tcp_function_block __tcp_fastslow = { - "fastslow", - tcp_output, - tcp_do_segment_fastslow, - tcp_default_ctloutput, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - 0, - 0 - + .tfb_tcp_block_name = "fastslow", + .tfb_tcp_output = tcp_output, + .tfb_tcp_do_segment = tcp_do_segment_fastslow, + .tfb_tcp_ctloutput = tcp_default_ctloutput, }; struct tcp_function_block __tcp_fastack = { - "fastack", - tcp_output, - tcp_do_segment_fastack, - tcp_default_ctloutput, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - 0, - 0 + .tfb_tcp_block_name = "fastack", + .tfb_tcp_output = tcp_output, + .tfb_tcp_do_segment = tcp_do_segment_fastack, + .tfb_tcp_ctloutput = tcp_default_ctloutput }; static int Modified: head/sys/netinet/tcp_timer.c == --- head/sys/netinet/tcp_timer.cTue May 17 09:24:54 2016 (r300041) +++ head/sys/netinet/tcp_timer.cTue May 17 09:53:22 2016 (r300042) @@ -604,6 +604,10 @@ tcp_timer_rexmt(void * xtp) KASSERT((tp->t_timers->tt_flags & TT_REXMT) != 0, ("%s: tp %p rexmt callout should be running", __func__, tp)); tcp_free_sackholes(tp); + if (tp->t_fb->tfb_tcp_rexmit_tmr) { + /* The stack has a timer action too. */ + (*tp->t_fb->tfb_tcp_rexmit_tmr)(tp); + } /* * Retransmission timer went off. Message has not * been acked within retransmit interval. Back off Modified: head/sys/netinet/tcp_var.h == --- head/sys/netinet/tcp_var.h Tue May 17 09:24:54 2016(r300041) +++ head/sys/netinet/tcp_var.h Tue May 17 09:53:22 2016(r300042) @@ -135,6 +135,7 @@ struct tcp_function_block { uint32_t, u_int); int (*tfb_tcp_timer_active)(struct tcpcb *, uint32_t); void(*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t); + void(*tfb_tcp_rexmit_tmr)(struct tcpcb *); volatile uint32_t tfb_refcnt; uint32_t tfb_flags; }; ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r298747 - in head/sys: net netinet netinet6
Author: rrs Date: Thu Apr 28 15:53:10 2016 New Revision: 298747 URL: https://svnweb.freebsd.org/changeset/base/298747 Log: Complete the UDP tunneling of ICMP msgs to those protocols interested in having tunneled UDP and finding out about the ICMP (tested by Michael Tuexen with SCTP.. soon to be using this feature). Differential Revision:http://reviews.freebsd.org/D5875 Modified: head/sys/net/if_vxlan.c head/sys/netinet/sctputil.c head/sys/netinet/udp_usrreq.c head/sys/netinet/udp_var.h head/sys/netinet6/udp6_usrreq.c Modified: head/sys/net/if_vxlan.c == --- head/sys/net/if_vxlan.c Thu Apr 28 15:20:08 2016(r298746) +++ head/sys/net/if_vxlan.c Thu Apr 28 15:53:10 2016(r298747) @@ -930,7 +930,7 @@ vxlan_socket_init(struct vxlan_socket *v } error = udp_set_kernel_tunneling(vso->vxlso_sock, - vxlan_rcv_udp_packet, vso); + vxlan_rcv_udp_packet, NULL, vso); if (error) { if_printf(ifp, "cannot set tunneling function: %d\n", error); return (error); Modified: head/sys/netinet/sctputil.c == --- head/sys/netinet/sctputil.c Thu Apr 28 15:20:08 2016(r298746) +++ head/sys/netinet/sctputil.c Thu Apr 28 15:53:10 2016(r298747) @@ -6945,7 +6945,7 @@ sctp_over_udp_start(void) } /* Call the special UDP hook. */ if ((ret = udp_set_kernel_tunneling(SCTP_BASE_INFO(udp4_tun_socket), - sctp_recv_udp_tunneled_packet, NULL))) { + sctp_recv_udp_tunneled_packet, NULL, NULL))) { sctp_over_udp_stop(); return (ret); } @@ -6969,7 +6969,7 @@ sctp_over_udp_start(void) } /* Call the special UDP hook. */ if ((ret = udp_set_kernel_tunneling(SCTP_BASE_INFO(udp6_tun_socket), - sctp_recv_udp_tunneled_packet, NULL))) { + sctp_recv_udp_tunneled_packet, NULL, NULL))) { sctp_over_udp_stop(); return (ret); } Modified: head/sys/netinet/udp_usrreq.c == --- head/sys/netinet/udp_usrreq.c Thu Apr 28 15:20:08 2016 (r298746) +++ head/sys/netinet/udp_usrreq.c Thu Apr 28 15:53:10 2016 (r298747) @@ -792,6 +792,21 @@ udp_common_ctlinput(int cmd, struct sock udp_notify(inp, inetctlerrmap[cmd]); } INP_RUNLOCK(inp); + } else { + inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport, + ip->ip_src, uh->uh_sport, + INPLOOKUP_WILDCARD | INPLOOKUP_RLOCKPCB, NULL); + if (inp != NULL) { + struct udpcb *up; + + up = intoudpcb(inp); + if (up->u_icmp_func != NULL) { + INP_RUNLOCK(inp); + (*up->u_icmp_func)(cmd, sa, vip, up->u_tun_ctx); + } else { + INP_RUNLOCK(inp); + } + } } } else in_pcbnotifyall(pcbinfo, faddr, inetctlerrmap[cmd], @@ -1748,7 +1763,7 @@ udp_attach(struct socket *so, int proto, #endif /* INET */ int -udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f, void *ctx) +udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f, udp_tun_icmp_t i, void *ctx) { struct inpcb *inp; struct udpcb *up; @@ -1759,11 +1774,13 @@ udp_set_kernel_tunneling(struct socket * KASSERT(inp != NULL, ("udp_set_kernel_tunneling: inp == NULL")); INP_WLOCK(inp); up = intoudpcb(inp); - if (up->u_tun_func != NULL) { + if ((up->u_tun_func != NULL) || + (up->u_icmp_func != NULL)) { INP_WUNLOCK(inp); return (EBUSY); } up->u_tun_func = f; + up->u_icmp_func = i; up->u_tun_ctx = ctx; INP_WUNLOCK(inp); return (0); Modified: head/sys/netinet/udp_var.h == --- head/sys/netinet/udp_var.h Thu Apr 28 15:20:08 2016(r298746) +++ head/sys/netinet/udp_var.h Thu Apr 28 15:53:10 2016(r298747) @@ -55,14 +55,16 @@ struct udpiphdr { struct inpcb; struct mbuf; -typedef void(*udp_tun_func_t)(struct mbuf *, int off, struct inpcb *, +typedef void(*udp_tun_func_t)(struct mbuf *, int, struct inpcb *, const struct sockaddr *, void *); - +typedef void(*udp_tun_icmp_t)(int, struct sockaddr *, void *, void *); +
svn commit: r298743 - in head/sys/netinet: . tcp_stacks
Author: rrs Date: Thu Apr 28 13:27:12 2016 New Revision: 298743 URL: https://svnweb.freebsd.org/changeset/base/298743 Log: This cleans up the timers code in TCP to start using the new async_drain functionality. This as been tested in NF as well as by Verisign. Still to do in here is to remove all the old flags. They are currently left being maintained but probably are no longer needed. Sponsored by: Netflix Inc. Differential Revision:http://reviews.freebsd.org/D5924 Modified: head/sys/netinet/tcp_stacks/fastpath.c head/sys/netinet/tcp_subr.c head/sys/netinet/tcp_timer.c head/sys/netinet/tcp_timer.h head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_stacks/fastpath.c == --- head/sys/netinet/tcp_stacks/fastpath.c Thu Apr 28 13:00:40 2016 (r298742) +++ head/sys/netinet/tcp_stacks/fastpath.c Thu Apr 28 13:27:12 2016 (r298743) @@ -2386,7 +2386,6 @@ struct tcp_function_block __tcp_fastslow NULL, NULL, NULL, - NULL, 0, 0 @@ -2403,7 +2402,6 @@ struct tcp_function_block __tcp_fastack NULL, NULL, NULL, - NULL, 0, 0 }; Modified: head/sys/netinet/tcp_subr.c == --- head/sys/netinet/tcp_subr.c Thu Apr 28 13:00:40 2016(r298742) +++ head/sys/netinet/tcp_subr.c Thu Apr 28 13:27:12 2016(r298743) @@ -244,7 +244,6 @@ static struct inpcb *tcp_mtudisc_notify( static void tcp_mtudisc(struct inpcb *, int); static char * tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th, void *ip4hdr, const void *ip6hdr); -static voidtcp_timer_discard(struct tcpcb *, uint32_t); static struct tcp_function_block tcp_def_funcblk = { @@ -258,7 +257,6 @@ static struct tcp_function_block tcp_def NULL, NULL, NULL, - NULL, 0, 0 }; @@ -528,7 +526,6 @@ register_tcp_functions(struct tcp_functi return (EINVAL); } if (blk->tfb_tcp_timer_stop_all || - blk->tfb_tcp_timers_left || blk->tfb_tcp_timer_activate || blk->tfb_tcp_timer_active || blk->tfb_tcp_timer_stop) { @@ -537,7 +534,6 @@ register_tcp_functions(struct tcp_functi * must have them all. */ if ((blk->tfb_tcp_timer_stop_all == NULL) || - (blk->tfb_tcp_timers_left == NULL) || (blk->tfb_tcp_timer_activate == NULL) || (blk->tfb_tcp_timer_active == NULL) || (blk->tfb_tcp_timer_stop == NULL)) { @@ -1343,13 +1339,21 @@ tcp_discardcb(struct tcpcb *tp) * callout, and the last discard function called will take care of * deleting the tcpcb. */ + tp->t_timers->tt_draincnt = 0; tcp_timer_stop(tp, TT_REXMT); tcp_timer_stop(tp, TT_PERSIST); tcp_timer_stop(tp, TT_KEEP); tcp_timer_stop(tp, TT_2MSL); tcp_timer_stop(tp, TT_DELACK); if (tp->t_fb->tfb_tcp_timer_stop_all) { - /* Call the stop-all function of the methods */ + /* +* Call the stop-all function of the methods, +* this function should call the tcp_timer_stop() +* method with each of the function specific timeouts. +* That stop will be called via the tfb_tcp_timer_stop() +* which should use the async drain function of the +* callout system (see tcp_var.h). +*/ tp->t_fb->tfb_tcp_timer_stop_all(tp); } @@ -1434,13 +1438,8 @@ tcp_discardcb(struct tcpcb *tp) CC_ALGO(tp) = NULL; inp->inp_ppcb = NULL; - if ((tp->t_timers->tt_flags & TT_MASK) == 0) { + if (tp->t_timers->tt_draincnt == 0) { /* We own the last reference on tcpcb, let's free it. */ - if ((tp->t_fb->tfb_tcp_timers_left) && - (tp->t_fb->tfb_tcp_timers_left(tp))) { - /* Some fb timers left running! */ - return; - } if (tp->t_fb->tfb_tcp_fb_fini) (*tp->t_fb->tfb_tcp_fb_fini)(tp); refcount_release(>t_fb->tfb_refcnt); @@ -1453,45 +1452,12 @@ tcp_discardcb(struct tcpcb *tp) } void -tcp_timer_2msl_discard(void *xtp) -{ - - tcp_timer_discard((struct tcpcb *)xtp, TT_2MSL); -} - -void -tcp_timer_keep_discard(void *xtp) -{ - - tcp_timer_discard((struct tcpcb *)xtp, TT_KEEP); -} - -void -tcp_timer_persist_discard(void *xtp) -{ - - tcp_timer_discard((struct tcpcb *)xtp, TT_PERSIST); -} - -void -tcp_timer_rexmt_discard(void *xtp) -{ - - tcp_timer_discard((struct tcpcb *)xtp, TT_REXMT); -} - -void -tcp_timer_delack_discard(void *xtp) -{ - -
svn commit: r297663 - head/sys/netinet
Author: rrs Date: Thu Apr 7 09:34:41 2016 New Revision: 297663 URL: https://svnweb.freebsd.org/changeset/base/297663 Log: A couple of minor changes that I missed that Michael had done, most noted in these is the change to non-strict ordering for incoming data (this will make pkt-drill test 14 fail but its expected). Modified: head/sys/netinet/sctp_indata.h head/sys/netinet/sctp_os_bsd.h head/sys/netinet/sctp_output.c head/sys/netinet/sctp_sysctl.h head/sys/netinet/sctp_var.h head/sys/netinet/sctputil.c Modified: head/sys/netinet/sctp_indata.h == --- head/sys/netinet/sctp_indata.h Thu Apr 7 09:10:34 2016 (r297662) +++ head/sys/netinet/sctp_indata.h Thu Apr 7 09:34:41 2016 (r297663) @@ -53,7 +53,7 @@ sctp_build_readq_entry(struct sctp_tcb * memset(_ctl, 0, sizeof(struct sctp_queued_to_read)); \ (_ctl)->sinfo_stream = stream_no; \ (_ctl)->sinfo_ssn = stream_seq; \ - TAILQ_INIT(&_ctl->reasm); \ + TAILQ_INIT(&_ctl->reasm); \ (_ctl)->top_fsn = tfsn; \ (_ctl)->msg_id = msgid; \ (_ctl)->sinfo_flags = (flags << 8); \ Modified: head/sys/netinet/sctp_os_bsd.h == --- head/sys/netinet/sctp_os_bsd.h Thu Apr 7 09:10:34 2016 (r297662) +++ head/sys/netinet/sctp_os_bsd.h Thu Apr 7 09:34:41 2016 (r297663) @@ -480,9 +480,9 @@ sctp_get_mbuf_for_msg(unsigned int space #define SCTP_SAVE_ATOMIC_DECREMENT(addr, val) \ { \ int32_t oldval; \ - oldval = atomic_fetchadd_int(addr, -val); \ + oldval = atomic_fetchadd_int(addr, -val); \ if (oldval < val) { \ - panic("Counter goes negative addr:%p val:%d oldval:%d", addr, val, oldval); \ + panic("Counter goes negative"); \ } \ } #else Modified: head/sys/netinet/sctp_output.c == --- head/sys/netinet/sctp_output.c Thu Apr 7 09:10:34 2016 (r297662) +++ head/sys/netinet/sctp_output.c Thu Apr 7 09:34:41 2016 (r297663) @@ -10499,6 +10499,7 @@ sctp_fill_in_rest: strseq++; } else { strseq_m->stream = ntohs(at->rec.data.stream_number); + strseq_m->reserved = ntohs(0); strseq_m->msg_id = ntohl(at->rec.data.stream_seq); strseq_m++; } Modified: head/sys/netinet/sctp_sysctl.h == --- head/sys/netinet/sctp_sysctl.h Thu Apr 7 09:10:34 2016 (r297662) +++ head/sys/netinet/sctp_sysctl.h Thu Apr 7 09:34:41 2016 (r297663) @@ -432,7 +432,7 @@ struct sctp_sysctl { #define SCTPCTL_STRICT_DATA_ORDER_DESC "Enforce strict data ordering, abort if control inside data" #define SCTPCTL_STRICT_DATA_ORDER_MIN 0 #define SCTPCTL_STRICT_DATA_ORDER_MAX 1 -#define SCTPCTL_STRICT_DATA_ORDER_DEFAULT 1 +#define SCTPCTL_STRICT_DATA_ORDER_DEFAULT 0 /* min_residual: min residual in a data fragment leftover */ #define SCTPCTL_MIN_RESIDUAL_DESC "Minimum residual data chunk in second part of split" Modified: head/sys/netinet/sctp_var.h == --- head/sys/netinet/sctp_var.h Thu Apr 7 09:10:34 2016(r297662) +++ head/sys/netinet/sctp_var.h Thu Apr 7 09:34:41 2016(r297663) @@ -99,8 +99,8 @@ extern struct pr_usrreqs sctp_usrreqs; */ #ifdef INVARIANTS #define sctp_free_a_readq(_stcb, _readq) { \ - if ((_readq)->on_strm_q)\ - panic("On strm q stcb:%p readq:%p", (_stcb), (_readq)); \ + if ((_readq)->on_strm_q) \ + panic("On strm q stcb:%p readq:%p", (_stcb), (_readq)); \ SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), (_readq)); \ SCTP_DECR_READQ_COUNT(); \ } @@ -204,7 +204,7 @@ extern struct pr_usrreqs sctp_usrreqs; } #define sctp_sbfree(ctl, stcb, sb, m) { \ - SCTP_SAVE_ATOMIC_DECREMENT(&(sb)->sb_cc, SCTP_BUF_LEN((m)));\ + SCTP_SAVE_ATOMIC_DECREMENT(&(sb)->sb_cc, SCTP_BUF_LEN((m))); \ SCTP_SAVE_ATOMIC_DECREMENT(&(sb)->sb_mbcnt, MSIZE); \ if (((ctl)->do_not_ref_stcb == 0) && stcb) {\ SCTP_SAVE_ATOMIC_DECREMENT(&(stcb)->asoc.sb_cc, SCTP_BUF_LEN((m))); \ Modified: head/sys/netinet/sctputil.c == --- head/sys/netinet/sctputil.c Thu Apr 7 09:10:34 2016(r297662) +++ head/sys/netinet/sctputil.c Thu Apr 7 09:34:41 2016(r297663) @@ -6100,7 +6100,7 @@