Author: melifaro
Date: Tue Nov 27 20:16:37 2012
New Revision: 243629
URL: http://svnweb.freebsd.org/changeset/base/243629

Log:
  MFC r241406, r241502, r241884.
  
  Do not check if found IPv4 rte is dynamic if net.inet.icmp.drop_redirect is
  enabled. This eliminates one mtx_lock() per each routing lookup thus improving
  performance in several cases (routing to directly connected interface or 
routing
  to default gateway).
  
  Icmp redirects should not be used to provide routing direction nowadays, even
  for end hosts. Routers should not use them too (and this is explicitly 
restricted
  in IPv6, see RFC 4861, clause 8.2).
  
  Current commit changes rnh_machaddr function to 'stock' rn_match (and back) 
for every
  AF_INET routing table in given VNET instance on drop_redirect sysctl change.
  
  Eliminate code checking if found IPv6 rte is dynamic. IPv6 redirects
  are using (different) ND-based approach described in RFC 4861. This change
  is similar to r241406 which conditionally skips the same check in IPv4.
  
  Cleanup documentation: cloning route support has been removed in r186119.
  
  This change is part of bigger patch eliminating rte locking.
  
  Sponsored by: Yandex LLC

Modified:
  stable/8/sys/netinet/in_rmx.c
  stable/8/sys/netinet/in_var.h
  stable/8/sys/netinet/ip_icmp.c
  stable/8/sys/netinet/ip_var.h
  stable/8/sys/netinet6/in6_rmx.c
Directory Properties:
  stable/8/sys/   (props changed)
  stable/8/sys/netinet/   (props changed)
  stable/8/sys/netinet6/   (props changed)

Modified: stable/8/sys/netinet/in_rmx.c
==============================================================================
--- stable/8/sys/netinet/in_rmx.c       Tue Nov 27 20:16:01 2012        
(r243628)
+++ stable/8/sys/netinet/in_rmx.c       Tue Nov 27 20:16:37 2012        
(r243629)
@@ -27,19 +27,6 @@
  * SUCH DAMAGE.
  */
 
-/*
- * This code does two things necessary for the enhanced TCP metrics to
- * function in a useful manner:
- *  1) It marks all non-host routes as `cloning', thus ensuring that
- *     every actual reference to such a route actually gets turned
- *     into a reference to a host route to the specific destination
- *     requested.
- *  2) When such routes lose all their references, it arranges for them
- *     to be deleted in some random collection of circumstances, so that
- *     a large quantity of stale routing data is not kept in kernel memory
- *     indefinitely.  See in_rtqtimo() below for the exact mechanism.
- */
-
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
@@ -58,6 +45,8 @@ __FBSDID("$FreeBSD$");
 
 #include <netinet/in.h>
 #include <netinet/in_var.h>
+#include <netinet/ip.h>
+#include <netinet/ip_icmp.h>
 #include <netinet/ip_var.h>
 
 extern int     in_inithead(void **head, int off);
@@ -340,6 +329,13 @@ in_rtqdrain(void)
        VNET_LIST_RUNLOCK_NOSLEEP();
 }
 
+void
+in_setmatchfunc(struct radix_node_head *rnh, int val)
+{
+
+       rnh->rnh_matchaddr = (val != 0) ? rn_match : in_matroute;
+}
+
 static int _in_rt_was_here;
 /*
  * Initialize our routing tree.
@@ -365,7 +361,7 @@ in_inithead(void **head, int off)
 
        rnh = *head;
        rnh->rnh_addaddr = in_addroute;
-       rnh->rnh_matchaddr = in_matroute;
+       in_setmatchfunc(rnh, V_drop_redirect);
        rnh->rnh_close = in_clsroute;
        if (_in_rt_was_here == 0 ) {
                callout_init(&V_rtq_timer, CALLOUT_MPSAFE);

Modified: stable/8/sys/netinet/in_var.h
==============================================================================
--- stable/8/sys/netinet/in_var.h       Tue Nov 27 20:16:01 2012        
(r243628)
+++ stable/8/sys/netinet/in_var.h       Tue Nov 27 20:16:37 2012        
(r243629)
@@ -423,6 +423,7 @@ inm_acquire_locked(struct in_multi *inm)
 struct rtentry;
 struct route;
 struct ip_moptions;
+struct radix_node_head;
 
 int    imo_multi_filter(const struct ip_moptions *, const struct ifnet *,
            const struct sockaddr *, const struct sockaddr *);
@@ -461,6 +462,7 @@ void         in_rtredirect(struct sockaddr *, s
            struct sockaddr *, int, struct sockaddr *, u_int);
 int     in_rtrequest(int, struct sockaddr *,
            struct sockaddr *, struct sockaddr *, int, struct rtentry **, 
u_int);
+void   in_setmatchfunc(struct radix_node_head *, int);
 
 #if 0
 int     in_rt_getifa(struct rt_addrinfo *, u_int fibnum);

Modified: stable/8/sys/netinet/ip_icmp.c
==============================================================================
--- stable/8/sys/netinet/ip_icmp.c      Tue Nov 27 20:16:01 2012        
(r243628)
+++ stable/8/sys/netinet/ip_icmp.c      Tue Nov 27 20:16:37 2012        
(r243629)
@@ -92,11 +92,7 @@ SYSCTL_VNET_UINT(_net_inet_icmp, OID_AUT
        &VNET_NAME(icmpmaskfake), 0,
        "Fake reply to ICMP Address Mask Request packets.");
 
-static VNET_DEFINE(int, drop_redirect) = 0;
-#define        V_drop_redirect                 VNET(drop_redirect)
-SYSCTL_VNET_INT(_net_inet_icmp, OID_AUTO, drop_redirect, CTLFLAG_RW,
-       &VNET_NAME(drop_redirect), 0,
-       "Ignore ICMP redirects");
+VNET_DEFINE(int, drop_redirect) = 0;
 
 static VNET_DEFINE(int, log_redirect) = 0;
 #define        V_log_redirect                  VNET(log_redirect)
@@ -153,6 +149,39 @@ static void        icmp_send(struct mbuf *, str
 
 extern struct protosw inetsw[];
 
+static int
+sysctl_net_icmp_drop_redir(SYSCTL_HANDLER_ARGS)
+{
+       int error, new;
+       int i;
+       struct radix_node_head *rnh;
+
+       new = V_drop_redirect;
+       error = sysctl_handle_int(oidp, &new, 0, req);
+       if (error == 0 && req->newptr) {
+               new = (new != 0) ? 1 : 0;
+
+               if (new == V_drop_redirect)
+                       return (0);
+
+               for (i = 0; i < rt_numfibs; i++) {
+                       if ((rnh = rt_tables_get_rnh(i, AF_INET)) == NULL)
+                               continue;
+                       RADIX_NODE_HEAD_LOCK(rnh);
+                       in_setmatchfunc(rnh, new);
+                       RADIX_NODE_HEAD_UNLOCK(rnh);
+               }
+               
+               V_drop_redirect = new;
+       }
+
+       return (error);
+}
+
+SYSCTL_VNET_PROC(_net_inet_icmp, OID_AUTO, drop_redirect,
+    CTLTYPE_INT|CTLFLAG_RW, 0, 0,
+    sysctl_net_icmp_drop_redir, "I", "Ignore ICMP redirects");
+
 /*
  * Kernel module interface for updating icmpstat.  The argument is an index
  * into icmpstat treated as an array of u_long.  While this encodes the

Modified: stable/8/sys/netinet/ip_var.h
==============================================================================
--- stable/8/sys/netinet/ip_var.h       Tue Nov 27 20:16:01 2012        
(r243628)
+++ stable/8/sys/netinet/ip_var.h       Tue Nov 27 20:16:37 2012        
(r243629)
@@ -187,6 +187,7 @@ VNET_DECLARE(struct socket *, ip_mrouter
 extern int     (*legal_vif_num)(int);
 extern u_long  (*ip_mcast_src)(int);
 VNET_DECLARE(int, rsvp_on);
+VNET_DECLARE(int, drop_redirect);
 extern struct  pr_usrreqs rip_usrreqs;
 
 #define        V_ipstat                VNET(ipstat)
@@ -199,6 +200,7 @@ extern struct       pr_usrreqs rip_usrreqs;
 #define        V_ip_rsvpd              VNET(ip_rsvpd)
 #define        V_ip_mrouter            VNET(ip_mrouter)
 #define        V_rsvp_on               VNET(rsvp_on)
+#define        V_drop_redirect         VNET(drop_redirect)
 
 void   inp_freemoptions(struct ip_moptions *);
 int    inp_getmoptions(struct inpcb *, struct sockopt *);

Modified: stable/8/sys/netinet6/in6_rmx.c
==============================================================================
--- stable/8/sys/netinet6/in6_rmx.c     Tue Nov 27 20:16:01 2012        
(r243628)
+++ stable/8/sys/netinet6/in6_rmx.c     Tue Nov 27 20:16:37 2012        
(r243629)
@@ -59,19 +59,6 @@
  *
  */
 
-/*
- * This code does two things necessary for the enhanced TCP metrics to
- * function in a useful manner:
- *  1) It marks all non-host routes as `cloning', thus ensuring that
- *     every actual reference to such a route actually gets turned
- *     into a reference to a host route to the specific destination
- *     requested.
- *  2) When such routes lose all their references, it arranges for them
- *     to be deleted in some random collection of circumstances, so that
- *     a large quantity of stale routing data is not kept in kernel memory
- *     indefinitely.  See in6_rtqtimo() below for the exact mechanism.
- */
-
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
@@ -111,8 +98,6 @@ extern int   in6_inithead(void **head, int
 extern int     in6_detachhead(void **head, int off);
 #endif
 
-#define RTPRF_OURS             RTF_PROTO3      /* set on routes we manage */
-
 /*
  * Do what we need to do when inserting a route.
  */
@@ -183,42 +168,8 @@ in6_addroute(void *v_arg, void *n_arg, s
        return (ret);
 }
 
-/*
- * This code is the inverse of in6_clsroute: on first reference, if we
- * were managing the route, stop doing so and set the expiration timer
- * back off again.
- */
-static struct radix_node *
-in6_matroute(void *v_arg, struct radix_node_head *head)
-{
-       struct radix_node *rn = rn_match(v_arg, head);
-       struct rtentry *rt = (struct rtentry *)rn;
-
-       if (rt) {
-               RT_LOCK(rt);
-               if (rt->rt_flags & RTPRF_OURS) {
-                       rt->rt_flags &= ~RTPRF_OURS;
-                       rt->rt_rmx.rmx_expire = 0;
-               }
-               RT_UNLOCK(rt);
-       }
-       return rn;
-}
-
 SYSCTL_DECL(_net_inet6_ip6);
 
-static VNET_DEFINE(int, rtq_reallyold6) = 60*60;
-       /* one hour is ``really old'' */
-#define        V_rtq_reallyold6                VNET(rtq_reallyold6)
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RTEXPIRE, rtexpire, CTLFLAG_RW,
-    &VNET_NAME(rtq_reallyold6) , 0, "");
-
-static VNET_DEFINE(int, rtq_minreallyold6) = 10;
-       /* never automatically crank down to less */
-#define        V_rtq_minreallyold6             VNET(rtq_minreallyold6)
-SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_RTMINEXPIRE, rtminexpire, CTLFLAG_RW,
-    &VNET_NAME(rtq_minreallyold6) , 0, "");
-
 static VNET_DEFINE(int, rtq_toomany6) = 128;
        /* 128 cached routes is ``too many'' */
 #define        V_rtq_toomany6                  VNET(rtq_toomany6)
@@ -236,122 +187,6 @@ struct rtqk_arg {
 };
 
 /*
- * Get rid of old routes.  When draining, this deletes everything, even when
- * the timeout is not expired yet.  When updating, this makes sure that
- * nothing has a timeout longer than the current value of rtq_reallyold6.
- */
-static int
-in6_rtqkill(struct radix_node *rn, void *rock)
-{
-       struct rtqk_arg *ap = rock;
-       struct rtentry *rt = (struct rtentry *)rn;
-       int err;
-
-       RADIX_NODE_HEAD_WLOCK_ASSERT(ap->rnh);
-
-       if (rt->rt_flags & RTPRF_OURS) {
-               ap->found++;
-
-               if (ap->draining || rt->rt_rmx.rmx_expire <= time_uptime) {
-                       if (rt->rt_refcnt > 0)
-                               panic("rtqkill route really not free");
-
-                       err = in6_rtrequest(RTM_DELETE,
-                                       (struct sockaddr *)rt_key(rt),
-                                       rt->rt_gateway, rt_mask(rt),
-                                       rt->rt_flags|RTF_RNH_LOCKED, 0,
-                                       rt->rt_fibnum);
-                       if (err) {
-                               log(LOG_WARNING, "in6_rtqkill: error %d", err);
-                       } else {
-                               ap->killed++;
-                       }
-               } else {
-                       if (ap->updating
-                          && (rt->rt_rmx.rmx_expire - time_uptime
-                              > V_rtq_reallyold6)) {
-                               rt->rt_rmx.rmx_expire = time_uptime
-                                       + V_rtq_reallyold6;
-                       }
-                       ap->nextstop = lmin(ap->nextstop,
-                                           rt->rt_rmx.rmx_expire);
-               }
-       }
-
-       return 0;
-}
-
-#define RTQ_TIMEOUT    60*10   /* run no less than once every ten minutes */
-static VNET_DEFINE(int, rtq_timeout6) = RTQ_TIMEOUT;
-static VNET_DEFINE(struct callout, rtq_timer6);
-
-#define        V_rtq_timeout6                  VNET(rtq_timeout6)
-#define        V_rtq_timer6                    VNET(rtq_timer6)
-
-static void
-in6_rtqtimo_one(struct radix_node_head *rnh)
-{
-       struct rtqk_arg arg;
-       static time_t last_adjusted_timeout = 0;
-
-       arg.found = arg.killed = 0;
-       arg.rnh = rnh;
-       arg.nextstop = time_uptime + V_rtq_timeout6;
-       arg.draining = arg.updating = 0;
-       RADIX_NODE_HEAD_LOCK(rnh);
-       rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
-       RADIX_NODE_HEAD_UNLOCK(rnh);
-
-       /*
-        * Attempt to be somewhat dynamic about this:
-        * If there are ``too many'' routes sitting around taking up space,
-        * then crank down the timeout, and see if we can't make some more
-        * go away.  However, we make sure that we will never adjust more
-        * than once in rtq_timeout6 seconds, to keep from cranking down too
-        * hard.
-        */
-       if ((arg.found - arg.killed > V_rtq_toomany6)
-          && (time_uptime - last_adjusted_timeout >= V_rtq_timeout6)
-          && V_rtq_reallyold6 > V_rtq_minreallyold6) {
-               V_rtq_reallyold6 = 2*V_rtq_reallyold6 / 3;
-               if (V_rtq_reallyold6 < V_rtq_minreallyold6) {
-                       V_rtq_reallyold6 = V_rtq_minreallyold6;
-               }
-
-               last_adjusted_timeout = time_uptime;
-#ifdef DIAGNOSTIC
-               log(LOG_DEBUG, "in6_rtqtimo: adjusted rtq_reallyold6 to %d",
-                   V_rtq_reallyold6);
-#endif
-               arg.found = arg.killed = 0;
-               arg.updating = 1;
-               RADIX_NODE_HEAD_LOCK(rnh);
-               rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
-               RADIX_NODE_HEAD_UNLOCK(rnh);
-       }
-}
-
-static void
-in6_rtqtimo(void *rock)
-{
-       CURVNET_SET_QUIET((struct vnet *) rock);
-       struct radix_node_head *rnh;
-       struct timeval atv;
-       u_int fibnum;
-
-       for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
-               rnh = rt_tables_get_rnh(fibnum, AF_INET6);
-               if (rnh != NULL)
-                       in6_rtqtimo_one(rnh);
-       }
-
-       atv.tv_usec = 0;
-       atv.tv_sec = V_rtq_timeout6;
-       callout_reset(&V_rtq_timer6, tvtohz(&atv), in6_rtqtimo, rock);
-       CURVNET_RESTORE();
-}
-
-/*
  * Age old PMTUs.
  */
 struct mtuex_arg {
@@ -440,12 +275,9 @@ in6_inithead(void **head, int off)
 
        rnh = *head;
        rnh->rnh_addaddr = in6_addroute;
-       rnh->rnh_matchaddr = in6_matroute;
 
        if (V__in6_rt_was_here == 0) {
-               callout_init(&V_rtq_timer6, CALLOUT_MPSAFE);
                callout_init(&V_rtq_mtutimer, CALLOUT_MPSAFE);
-               in6_rtqtimo(curvnet);   /* kick off timeout first time */
                in6_mtutimo(curvnet);   /* kick off timeout first time */
                V__in6_rt_was_here = 1;
        }
@@ -458,7 +290,6 @@ int
 in6_detachhead(void **head, int off)
 {
 
-       callout_drain(&V_rtq_timer6);
        callout_drain(&V_rtq_mtutimer);
        return (1);
 }
_______________________________________________
svn-src-all@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Reply via email to