Author: sbruno
Date: Wed Oct 29 22:17:45 2014
New Revision: 273838
URL: https://svnweb.freebsd.org/changeset/base/273838

Log:
  MFC r272720, 273061, 273062, 273063, 273064
  
  Implement PLPMTUD blackhole detection (RFC 4821), inspired by code
  from xnu sources.  If we encounter a network where ICMP is blocked
  the Needs Frag indicator may not propagate back to us.  Attempt to
  downshift the mss once to a preconfigured value.
  
  Note, this is turned off by default.

Modified:
  stable/10/share/man/man4/tcp.4
  stable/10/sys/netinet/tcp_output.c
  stable/10/sys/netinet/tcp_timer.c
  stable/10/sys/netinet/tcp_var.h
Directory Properties:
  stable/10/   (props changed)

Modified: stable/10/share/man/man4/tcp.4
==============================================================================
--- stable/10/share/man/man4/tcp.4      Wed Oct 29 20:18:37 2014        
(r273837)
+++ stable/10/share/man/man4/tcp.4      Wed Oct 29 22:17:45 2014        
(r273838)
@@ -38,7 +38,7 @@
 .\"     From: @(#)tcp.4        8.1 (Berkeley) 6/5/93
 .\" $FreeBSD$
 .\"
-.Dd November 8, 2013
+.Dd October 13, 2014
 .Dt TCP 4
 .Os
 .Sh NAME
@@ -522,6 +522,21 @@ avoid packet drops.
 Number of retries (SYN or SYN/ACK retransmits) before disabling ECN on a
 specific connection. This is needed to help with connection establishment
 when a broken firewall is in the network path.
+.It Va pmtud_blackhole_detection
+Turn on automatic path MTU blackhole detection. In case of retransmits we will
+lower the MSS to check if it's MTU problem. If current MSS is greater than
+configured value to try, it will be set to it, otherwise, MSS will be set to
+default values (net.inet.tcp.mssdflt and net.inet.tcp.v6mssdflt).
+.It Va pmtud_blackhole_mss
+MSS to try for IPv4 if PMTU blackhole detection is turned on.
+.It Va v6pmtud_blackhole_mss
+MSS to try for IPv6 if PMTU blackhole detection is turned on.
+.It Va pmtud_blackhole_activated
+Number of times the code was activated to attempt a MSS downshift.
+.It Va pmtud_blackhole_min_activated
+Number of times the blackhole MSS was used in an attempt to downshift.
+.It Va pmtud_blackhole_failed
+Number of times that we failed to connect after we downshifted the MSS.
 .El
 .Sh ERRORS
 A socket operation may fail with one of the following errors returned:

Modified: stable/10/sys/netinet/tcp_output.c
==============================================================================
--- stable/10/sys/netinet/tcp_output.c  Wed Oct 29 20:18:37 2014        
(r273837)
+++ stable/10/sys/netinet/tcp_output.c  Wed Oct 29 22:17:45 2014        
(r273838)
@@ -676,6 +676,12 @@ just_return:
 
 send:
        SOCKBUF_LOCK_ASSERT(&so->so_snd);
+       if (len > 0) {
+               if (len >= tp->t_maxseg)
+                       tp->t_flags2 |= TF2_PLPMTU_MAXSEGSNT;
+               else
+                       tp->t_flags2 &= ~TF2_PLPMTU_MAXSEGSNT;
+       }
        /*
         * Before ESTABLISHED, force sending of initial options
         * unless TCP set not to do any options.
@@ -1184,6 +1190,11 @@ send:
                 */
                ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(*ip6));
 
+               if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss)
+                       tp->t_flags2 |= TF2_PLPMTU_PMTUD;
+               else
+                       tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
+
                if (tp->t_state == TCPS_SYN_SENT)
                        TCP_PROBE5(connect__request, NULL, tp, ip6, tp, th);
 
@@ -1220,8 +1231,12 @@ send:
         *
         * NB: Don't set DF on small MTU/MSS to have a safe fallback.
         */
-       if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss)
+       if (V_path_mtu_discovery && tp->t_maxopd > V_tcp_minmss) {
                ip->ip_off |= htons(IP_DF);
+               tp->t_flags2 |= TF2_PLPMTU_PMTUD;
+       } else {
+               tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
+       }
 
        if (tp->t_state == TCPS_SYN_SENT)
                TCP_PROBE5(connect__request, NULL, tp, ip, tp, th);

Modified: stable/10/sys/netinet/tcp_timer.c
==============================================================================
--- stable/10/sys/netinet/tcp_timer.c   Wed Oct 29 20:18:37 2014        
(r273837)
+++ stable/10/sys/netinet/tcp_timer.c   Wed Oct 29 22:17:45 2014        
(r273838)
@@ -63,6 +63,9 @@ __FBSDID("$FreeBSD$");
 #include <netinet/tcp_fsm.h>
 #include <netinet/tcp_timer.h>
 #include <netinet/tcp_var.h>
+#ifdef INET6
+#include <netinet6/tcp6_var.h>
+#endif
 #include <netinet/tcpip.h>
 #ifdef TCPDEBUG
 #include <netinet/tcp_debug.h>
@@ -124,6 +127,54 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexm
     &tcp_rexmit_drop_options, 0,
     "Drop TCP options from 3rd and later retransmitted SYN");
 
+static VNET_DEFINE(int, tcp_pmtud_blackhole_detect);
+#define        V_tcp_pmtud_blackhole_detect    VNET(tcp_pmtud_blackhole_detect)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_detection,
+    CTLFLAG_RW,
+    &VNET_NAME(tcp_pmtud_blackhole_detect), 0,
+    "Path MTU Discovery Black Hole Detection Enabled");
+
+static VNET_DEFINE(int, tcp_pmtud_blackhole_activated);
+#define        V_tcp_pmtud_blackhole_activated \
+    VNET(tcp_pmtud_blackhole_activated)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated,
+    CTLFLAG_RD,
+    &VNET_NAME(tcp_pmtud_blackhole_activated), 0,
+    "Path MTU Discovery Black Hole Detection, Activation Count");
+
+static VNET_DEFINE(int, tcp_pmtud_blackhole_activated_min_mss);
+#define        V_tcp_pmtud_blackhole_activated_min_mss \
+    VNET(tcp_pmtud_blackhole_activated_min_mss)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_activated_min_mss,
+    CTLFLAG_RD,
+    &VNET_NAME(tcp_pmtud_blackhole_activated_min_mss), 0,
+    "Path MTU Discovery Black Hole Detection, Activation Count at min MSS");
+
+static VNET_DEFINE(int, tcp_pmtud_blackhole_failed);
+#define        V_tcp_pmtud_blackhole_failed    VNET(tcp_pmtud_blackhole_failed)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_failed,
+    CTLFLAG_RD,
+    &VNET_NAME(tcp_pmtud_blackhole_failed), 0,
+    "Path MTU Discovery Black Hole Detection, Failure Count");
+
+#ifdef INET
+static VNET_DEFINE(int, tcp_pmtud_blackhole_mss) = 1200;
+#define        V_tcp_pmtud_blackhole_mss       VNET(tcp_pmtud_blackhole_mss)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, pmtud_blackhole_mss,
+    CTLFLAG_RW,
+    &VNET_NAME(tcp_pmtud_blackhole_mss), 0,
+    "Path MTU Discovery Black Hole Detection lowered MSS");
+#endif
+
+#ifdef INET6
+static VNET_DEFINE(int, tcp_v6pmtud_blackhole_mss) = 1220;
+#define        V_tcp_v6pmtud_blackhole_mss     VNET(tcp_v6pmtud_blackhole_mss)
+SYSCTL_VNET_INT(_net_inet_tcp, OID_AUTO, v6pmtud_blackhole_mss,
+    CTLFLAG_RW,
+    &VNET_NAME(tcp_v6pmtud_blackhole_mss), 0,
+    "Path MTU Discovery IPv6 Black Hole Detection lowered MSS");
+#endif
+
 static int     per_cpu_timers = 0;
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
     &per_cpu_timers , 0, "run tcp timers on all cpus");
@@ -492,6 +543,7 @@ tcp_timer_rexmt(void * xtp)
 
        ostate = tp->t_state;
 #endif
+
        INP_INFO_RLOCK(&V_tcbinfo);
        inp = tp->t_inpcb;
        /*
@@ -593,6 +645,110 @@ tcp_timer_rexmt(void * xtp)
                rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
        TCPT_RANGESET(tp->t_rxtcur, rexmt,
                      tp->t_rttmin, TCPTV_REXMTMAX);
+
+       /*
+        * We enter the path for PLMTUD if connection is established or, if
+        * connection is FIN_WAIT_1 status, reason for the last is that if
+        * amount of data we send is very small, we could send it in couple of
+        * packets and process straight to FIN. In that case we won't catch
+        * ESTABLISHED state.
+        */
+       if (V_tcp_pmtud_blackhole_detect && (((tp->t_state == TCPS_ESTABLISHED))
+           || (tp->t_state == TCPS_FIN_WAIT_1))) {
+               int optlen;
+#ifdef INET6
+               int isipv6;
+#endif
+
+               if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) ==
+                   (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) &&
+                   (tp->t_rxtshift <= 2)) {
+                       /*
+                        * Enter Path MTU Black-hole Detection mechanism:
+                        * - Disable Path MTU Discovery (IP "DF" bit).
+                        * - Reduce MTU to lower value than what we
+                        *   negotiated with peer.
+                        */
+                       /* Record that we may have found a black hole. */
+                       tp->t_flags2 |= TF2_PLPMTU_BLACKHOLE;
+
+                       /* Keep track of previous MSS. */
+                       optlen = tp->t_maxopd - tp->t_maxseg;
+                       tp->t_pmtud_saved_maxopd = tp->t_maxopd;
+
+                       /* 
+                        * Reduce the MSS to blackhole value or to the default
+                        * in an attempt to retransmit.
+                        */
+#ifdef INET6
+                       isipv6 = (tp->t_inpcb->inp_vflag & INP_IPV6) ? 1 : 0;
+                       if (isipv6 &&
+                           tp->t_maxopd > V_tcp_v6pmtud_blackhole_mss) {
+                               /* Use the sysctl tuneable blackhole MSS. */
+                               tp->t_maxopd = V_tcp_v6pmtud_blackhole_mss;
+                               V_tcp_pmtud_blackhole_activated++;
+                       } else if (isipv6) {
+                               /* Use the default MSS. */
+                               tp->t_maxopd = V_tcp_v6mssdflt;
+                               /*
+                                * Disable Path MTU Discovery when we switch to
+                                * minmss.
+                                */
+                               tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
+                               V_tcp_pmtud_blackhole_activated_min_mss++;
+                       }
+#endif
+#if defined(INET6) && defined(INET)
+                       else
+#endif
+#ifdef INET
+                       if (tp->t_maxopd > V_tcp_pmtud_blackhole_mss) {
+                               /* Use the sysctl tuneable blackhole MSS. */
+                               tp->t_maxopd = V_tcp_pmtud_blackhole_mss;
+                               V_tcp_pmtud_blackhole_activated++;
+                       } else {
+                               /* Use the default MSS. */
+                               tp->t_maxopd = V_tcp_mssdflt;
+                               /*
+                                * Disable Path MTU Discovery when we switch to
+                                * minmss.
+                                */
+                               tp->t_flags2 &= ~TF2_PLPMTU_PMTUD;
+                               V_tcp_pmtud_blackhole_activated_min_mss++;
+                       }
+#endif
+                       tp->t_maxseg = tp->t_maxopd - optlen;
+                       /*
+                        * Reset the slow-start flight size
+                        * as it may depend on the new MSS.
+                        */
+                       if (CC_ALGO(tp)->conn_init != NULL)
+                               CC_ALGO(tp)->conn_init(tp->ccv);
+               } else {
+                       /*
+                        * If further retransmissions are still unsuccessful
+                        * with a lowered MTU, maybe this isn't a blackhole and
+                        * we restore the previous MSS and blackhole detection
+                        * flags.
+                        */
+                       if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) &&
+                           (tp->t_rxtshift > 4)) {
+                               tp->t_flags2 |= TF2_PLPMTU_PMTUD;
+                               tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE;
+                               optlen = tp->t_maxopd - tp->t_maxseg;
+                               tp->t_maxopd = tp->t_pmtud_saved_maxopd;
+                               tp->t_maxseg = tp->t_maxopd - optlen;
+                               V_tcp_pmtud_blackhole_failed++;
+                               /*
+                                * Reset the slow-start flight size as it
+                                * may depend on the new MSS.
+                                */
+                               if (CC_ALGO(tp)->conn_init != NULL)
+                                       CC_ALGO(tp)->conn_init(tp->ccv);
+                       }
+               }
+       }
+
        /*
         * Disable RFC1323 and SACK if we haven't got any response to
         * our third SYN to work-around some broken terminal servers

Modified: stable/10/sys/netinet/tcp_var.h
==============================================================================
--- stable/10/sys/netinet/tcp_var.h     Wed Oct 29 20:18:37 2014        
(r273837)
+++ stable/10/sys/netinet/tcp_var.h     Wed Oct 29 22:17:45 2014        
(r273838)
@@ -209,8 +209,10 @@ struct tcpcb {
        u_int   t_keepcnt;              /* number of keepalives before close */
 
        u_int   t_tsomax;               /* tso burst length limit */
+       u_int   t_pmtud_saved_maxopd;   /* pre-blackhole MSS */
+       u_int   t_flags2;               /* More tcpcb flags storage */
 
-       uint32_t t_ispare[8];           /* 5 UTO, 3 TBD */
+       uint32_t t_ispare[6];           /* 5 UTO, 1 TBD */
        void    *t_pspare2[4];          /* 1 TCP_SIGNATURE, 3 TBD */
        uint64_t _pad[6];               /* 6 TBD (1-2 CC/RTT?) */
 };
@@ -284,6 +286,13 @@ struct tcpcb {
 #endif /* TCP_SIGNATURE */
 
 /*
+ * Flags for PLPMTU handling, t_flags2
+ */
+#define        TF2_PLPMTU_BLACKHOLE    0x00000001 /* Possible PLPMTUD Black 
Hole. */
+#define        TF2_PLPMTU_PMTUD        0x00000002 /* Allowed to attempt 
PLPMTUD. */
+#define        TF2_PLPMTU_MAXSEGSNT    0x00000004 /* Last seg sent was full 
seg. */
+
+/*
  * Structure to hold TCP options that are only used during segment
  * processing (in tcp_input), but not held in the tcpcb.
  * It's basically used to reduce the number of parameters
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to