On 25/08/15(Tue) 12:27, Martin Pieuchot wrote:
> On 12/08/15(Wed) 17:03, Martin Pieuchot wrote:
> > I'm currently working on the routing table interface to make is safe
> > to use by multiple CPUs at the same time.  The diff below is a big
> > step in this direction and I'd really appreciate if people could test
> > it with their usual network setup and report back.
> 
> Updated version to match recent changes.  I'm still looking for test
> reports and reviews.

Thanks to all the testers.  Here's a third version that should fix a
regression reported by phessler@.  The idea is to use rtvalid(9) to
check early if the gateway route is still valid or not.  If it isn't
then call rtalloc(9) again to perform the ARP/NDP resolution.

This diff includes rtvalid(9) and makes use of it in ip{,6}_output(),
more conversions will be done afterward, but these should hopefully
be all the necessary bits to fix the regression.

Tests & reviews are still welcome!

Index: net/route.c
===================================================================
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.226
diff -u -p -r1.226 route.c
--- net/route.c 30 Aug 2015 10:39:16 -0000      1.226
+++ net/route.c 31 Aug 2015 09:33:27 -0000
@@ -153,6 +153,7 @@ int rtable_alloc(void ***, u_int);
 int    rtflushclone1(struct rtentry *, void *, u_int);
 void   rtflushclone(unsigned int, struct rtentry *);
 int    rt_if_remove_rtdelete(struct rtentry *, void *, u_int);
+struct rtentry *rt_match(struct sockaddr *, int, unsigned int);
 
 struct ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *,
                    u_int);
@@ -300,19 +301,68 @@ rtable_exists(u_int id)   /* verify table 
        return (1);
 }
 
+
+/*
+ * Do the actual checks for rtvalid(9), do not use directly!
+ */
+static inline int
+rt_is_valid(struct rtentry *rt)
+{
+       if (rt == NULL)
+               return (0);
+
+       if ((rt->rt_flags & RTF_UP) == 0)
+               return (0);
+
+       /* Routes attached to stall ifas should be freed. */
+       if (rt->rt_ifa == NULL || rt->rt_ifa->ifa_ifp == NULL)
+               return (0);
+
+       return (1);
+}
+
+/*
+ * Returns 1 if the cached ``rt'' entry is still valid, 0 otherwise.
+ */
+int
+rtvalid(struct rtentry *rt)
+{
+       if (rt_is_valid(rt) == 0)
+               return (0);
+
+       /* Next hop must also be valid. */
+       if ((rt->rt_flags & RTF_GATEWAY) && rt_is_valid(rt->rt_gwroute) == 0)
+               return (0);
+
+       return (1);
+}
+
+/*
+ * Do the actual lookup for rtalloc(9), do not use directly!
+ *
+ * Return the best matching entry for the destination ``dst''.
+ *
+ * "RT_RESOLVE" means that a corresponding L2 entry should
+ *   be added to the routing table and resolved (via ARP or
+ *   NDP), if it does not exist.
+ *
+ * "RT_REPORT" indicates that a message should be sent to
+ *   userland if no matching route has been found or if an
+ *   error occured while adding a L2 entry.
+ */
 struct rtentry *
-rtalloc(struct sockaddr *dst, int flags, unsigned int tableid)
+rt_match(struct sockaddr *dst, int flags, unsigned int tableid)
 {
        struct rtentry          *rt;
        struct rtentry          *newrt = NULL;
        struct rt_addrinfo       info;
-       int                      s, error = 0, msgtype = RTM_MISS;
+       int                      s, error = 0;
 
-       s = splsoftnet();
 
        bzero(&info, sizeof(info));
        info.rti_info[RTAX_DST] = dst;
 
+       s = splsoftnet();
        rt = rtable_match(tableid, dst);
        if (rt != NULL) {
                newrt = rt;
@@ -325,10 +375,6 @@ rtalloc(struct sockaddr *dst, int flags,
                                goto miss;
                        }
                        rt = newrt;
-                       if (rt->rt_flags & RTF_XRESOLVE) {
-                               msgtype = RTM_RESOLVE;
-                               goto miss;
-                       }
                        /* Inform listeners of the new route */
                        rt_sendmsg(rt, RTM_ADD, tableid);
                } else
@@ -336,11 +382,8 @@ rtalloc(struct sockaddr *dst, int flags,
        } else {
                rtstat.rts_unreach++;
 miss:
-               if (ISSET(flags, RT_REPORT)) {
-                       bzero((caddr_t)&info, sizeof(info));
-                       info.rti_info[RTAX_DST] = dst;
-                       rt_missmsg(msgtype, &info, 0, NULL, error, tableid);
-               }
+               if (ISSET(flags, RT_REPORT))
+                       rt_missmsg(RTM_MISS, &info, 0, NULL, error, tableid);
        }
        splx(s);
        return (newrt);
@@ -374,6 +417,75 @@ rtalloc_mpath(struct sockaddr *dst, uint
 }
 #endif /* SMALL_KERNEL */
 
+/*
+ * Look in the routing table for the best matching entry for
+ * ``dst''.
+ *
+ * If a route with a gateway is found and its next hop is no
+ * longer valid, try to cache it.
+ */
+struct rtentry *
+rtalloc(struct sockaddr *dst, int flags, unsigned int rtableid)
+{
+       struct rtentry *rt, *nhrt;
+
+       rt = rt_match(dst, flags, rtableid);
+
+       /* No match or route to host?  We're done. */
+       if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) == 0)
+               return (rt);
+
+       nhrt = rt->rt_gwroute;
+
+       /*  Nothing to do if the next hop is valid. */
+       if (nhrt != NULL && (nhrt->rt_flags & RTF_UP))
+               return (rt);
+
+       rtfree(rt->rt_gwroute);
+       rt->rt_gwroute = NULL;
+
+       /*
+        * If we cannot find a valid next hop, return the route
+        * with a gateway.
+        *
+        * Some dragons hiding in the tree certainly depends on
+        * this behavior.
+        */
+       nhrt = rt_match(rt->rt_gateway, flags | RT_RESOLVE, rtableid);
+       if (nhrt == NULL)
+               return (rt);
+
+       /*
+        * Next hop must be reachable, this also prevents rtentry
+        * loops for example when rt->rt_gwroute points to rt.
+        */
+       if ((nhrt->rt_flags & (RTF_UP|RTF_CLONING|RTF_GATEWAY)) != RTF_UP) {
+               rtfree(nhrt);
+               return (rt);
+       }
+
+       /*
+        * Next hop entry MUST be on the same interface.
+        *
+        * This check is needed as long as we support stall
+        * ``ifa'' pointers.
+        */
+       if (nhrt->rt_ifp != rt->rt_ifp) {
+               rtfree(nhrt);
+               return (rt);
+       }
+
+       /*
+        * If the MTU of next hop is 0, this will reset the MTU of the
+        * route to run PMTUD again from scratch.
+        */
+       if (!ISSET(rt->rt_locks, RTV_MTU) && (rt->rt_mtu > nhrt->rt_mtu))
+               rt->rt_mtu = nhrt->rt_mtu;
+
+       rt->rt_gwroute = nhrt;
+       return (rt);
+}
+
 void
 rtfree(struct rtentry *rt)
 {
@@ -527,7 +639,7 @@ create:
                        rt->rt_flags |= RTF_MODIFIED;
                        flags |= RTF_MODIFIED;
                        stat = &rtstat.rts_newgateway;
-                       rt_setgate(rt, gateway, rdomain);
+                       rt_setgate(rt, gateway);
                }
        } else
                error = EHOSTUNREACH;
@@ -659,19 +771,13 @@ ifa_ifwithroute(int flags, struct sockad
        }
        if (ifa == NULL) {
                struct rtentry  *rt = rtalloc(gateway, 0, rtableid);
-               if (rt == NULL)
-                       return (NULL);
                /* The gateway must be local if the same address family. */
-               if ((rt->rt_flags & RTF_GATEWAY) &&
-                   rt_key(rt)->sa_family == dst->sa_family) {
+               if (rtvalid(rt) == 0 || ((rt->rt_flags & RTF_GATEWAY) &&
+                   rt_key(rt)->sa_family == dst->sa_family)) {
                        rtfree(rt);
                        return (NULL);
                }
                ifa = rt->rt_ifa;
-               if (ifa == NULL || ifa->ifa_ifp == NULL) {
-                       rtfree(rt);
-                       return (NULL);
-               }
                rtfree(rt);
        }
        if (ifa->ifa_addr->sa_family != dst->sa_family) {
@@ -982,8 +1088,7 @@ rtrequest1(int req, struct rt_addrinfo *
                 * the routing table because the radix MPATH code use
                 * it to (re)order routes.
                 */
-               if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY],
-                   tableid))) {
+               if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY]))) {
                        free(ndst, M_RTABLE, dlen);
                        pool_put(&rtentry_pool, rt);
                        return (error);
@@ -1034,7 +1139,7 @@ rtrequest1(int req, struct rt_addrinfo *
 }
 
 int
-rt_setgate(struct rtentry *rt, struct sockaddr *gate, unsigned int tableid)
+rt_setgate(struct rtentry *rt, struct sockaddr *gate)
 {
        int glen = ROUNDUP(gate->sa_len);
        struct sockaddr *sa;
@@ -1052,22 +1157,7 @@ rt_setgate(struct rtentry *rt, struct so
                rtfree(rt->rt_gwroute);
                rt->rt_gwroute = NULL;
        }
-       if (rt->rt_flags & RTF_GATEWAY) {
-               /* XXX is this actually valid to cross tables here? */
-               rt->rt_gwroute = rtalloc(gate, RT_REPORT|RT_RESOLVE, tableid);
-               /*
-                * If we switched gateways, grab the MTU from the new
-                * gateway route if the current MTU is 0 or greater
-                * than the MTU of gateway.
-                * Note that, if the MTU of gateway is 0, we will reset the
-                * MTU of the route to run PMTUD again from scratch. XXX
-                */
-               if (rt->rt_gwroute && !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
-                   rt->rt_rmx.rmx_mtu &&
-                   rt->rt_rmx.rmx_mtu > rt->rt_gwroute->rt_rmx.rmx_mtu) {
-                       rt->rt_rmx.rmx_mtu = rt->rt_gwroute->rt_rmx.rmx_mtu;
-               }
-       }
+
        return (0);
 }
 
@@ -1079,28 +1169,21 @@ rt_checkgate(struct ifnet *ifp, struct r
 
        KASSERT(rt != NULL);
 
-       if ((rt->rt_flags & RTF_UP) == 0) {
-               rt = rtalloc(dst, RT_REPORT|RT_RESOLVE, rtableid);
-               if (rt == NULL)
-                       return (EHOSTUNREACH);
-               rt->rt_refcnt--;
-               if (rt->rt_ifp != ifp)
-                       return (EHOSTUNREACH);
-       }
+       if ((rt->rt_flags & RTF_UP) == 0)
+               return (EHOSTUNREACH);
 
        rt0 = rt;
 
        if (rt->rt_flags & RTF_GATEWAY) {
-               if (rt->rt_gwroute && !(rt->rt_gwroute->rt_flags & RTF_UP)) {
+               if (rt->rt_gwroute == NULL)
+                       return (EHOSTUNREACH);
+
+               if ((rt->rt_gwroute->rt_flags & RTF_UP) == 0) {
                        rtfree(rt->rt_gwroute);
                        rt->rt_gwroute = NULL;
+                       return (EHOSTUNREACH);
                }
-               if (rt->rt_gwroute == NULL) {
-                       rt->rt_gwroute = rtalloc(rt->rt_gateway,
-                           RT_REPORT|RT_RESOLVE, rtableid);
-                       if (rt->rt_gwroute == NULL)
-                               return (EHOSTUNREACH);
-               }
+
                /*
                 * Next hop must be reachable, this also prevents rtentry
                 * loops, for example when rt->rt_gwroute points to rt.
Index: net/route.h
===================================================================
RCS file: /cvs/src/sys/net/route.h,v
retrieving revision 1.110
diff -u -p -r1.110 route.h
--- net/route.h 20 Aug 2015 12:39:43 -0000      1.110
+++ net/route.h 31 Aug 2015 09:05:07 -0000
@@ -118,6 +118,8 @@ struct rtentry {
 };
 #define        rt_use          rt_rmx.rmx_pksent
 #define        rt_expire       rt_rmx.rmx_expire
+#define        rt_locks        rt_rmx.rmx_locks
+#define        rt_mtu          rt_rmx.rmx_mtu
 
 #define        RTF_UP          0x1             /* route usable */
 #define        RTF_GATEWAY     0x2             /* destination is a gateway */
@@ -361,7 +363,7 @@ void         rt_sendmsg(struct rtentry *, int, 
 void    rt_sendaddrmsg(struct rtentry *, int);
 void    rt_missmsg(int, struct rt_addrinfo *, int, struct ifnet *, int,
            u_int);
-int     rt_setgate(struct rtentry *, struct sockaddr *, unsigned int);
+int     rt_setgate(struct rtentry *, struct sockaddr *);
 int     rt_checkgate(struct ifnet *, struct rtentry *, struct sockaddr *,
            unsigned int, struct rtentry **);
 void    rt_setmetrics(u_long, struct rt_metrics *, struct rt_kmetrics *);
@@ -377,6 +379,7 @@ void                         rt_timer_queue_destroy(struct 
rt
 unsigned long           rt_timer_queue_count(struct rttimer_queue *);
 void                    rt_timer_timer(void *);
 
+int     rtvalid(struct rtentry *);
 #ifdef SMALL_KERNEL
 #define         rtalloc_mpath(dst, s, rid) rtalloc((dst), 
RT_REPORT|RT_RESOLVE, (rid))
 #else
Index: net/rtsock.c
===================================================================
RCS file: /cvs/src/sys/net/rtsock.c,v
retrieving revision 1.169
diff -u -p -r1.169 rtsock.c
--- net/rtsock.c        24 Aug 2015 22:11:33 -0000      1.169
+++ net/rtsock.c        31 Aug 2015 09:05:07 -0000
@@ -744,9 +744,8 @@ report:
                                    info.rti_info[RTAX_GATEWAY]->sa_len)) {
                                        newgate = 1;
                                }
-                       if (info.rti_info[RTAX_GATEWAY] != NULL &&
-                           (error = rt_setgate(rt, info.rti_info[RTAX_GATEWAY],
-                            tableid)))
+                       if (info.rti_info[RTAX_GATEWAY] != NULL && (error =
+                           rt_setgate(rt, info.rti_info[RTAX_GATEWAY])))
                                goto flush;
                        /*
                         * new gateway could require new ifaddr, ifp;
Index: netinet/ip_output.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.287
diff -u -p -r1.287 ip_output.c
--- netinet/ip_output.c 31 Aug 2015 07:17:12 -0000      1.287
+++ netinet/ip_output.c 31 Aug 2015 09:27:51 -0000
@@ -168,12 +168,13 @@ ip_output(struct mbuf *m0, struct mbuf *
                dst = satosin(&ro->ro_dst);
 
                /*
-                * If there is a cached route, check that it is to the same
-                * destination and is still up.  If not, free it and try again.
+                * If there is a cached route, check that it is to the
+                * same destination and is still valid.  If not, free
+                * it and try again.
                 */
-               if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
+               if (rtvalid(ro->ro_rt) == 0 ||
                    dst->sin_addr.s_addr != ip->ip_dst.s_addr ||
-                   ro->ro_tableid != m->m_pkthdr.ph_rtableid)) {
+                   ro->ro_tableid != m->m_pkthdr.ph_rtableid) {
                        rtfree(ro->ro_rt);
                        ro->ro_rt = NULL;
                }
@@ -296,12 +297,13 @@ reroute:
                dst = satosin(&ro->ro_dst);
 
                /*
-                * If there is a cached route, check that it is to the same
-                * destination and is still up.  If not, free it and try again.
+                * If there is a cached route, check that it is to the
+                * same destination and is still valid.  If not, free
+                * it and try again.
                 */
-               if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
+               if (rtvalid(ro->ro_rt) == 0 ||
                    dst->sin_addr.s_addr != ip->ip_dst.s_addr ||
-                   ro->ro_tableid != m->m_pkthdr.ph_rtableid)) {
+                   ro->ro_tableid != m->m_pkthdr.ph_rtableid) {
                        rtfree(ro->ro_rt);
                        ro->ro_rt = NULL;
                }
Index: netinet6/in6_src.c
===================================================================
RCS file: /cvs/src/sys/netinet6/in6_src.c,v
retrieving revision 1.51
diff -u -p -r1.51 in6_src.c
--- netinet6/in6_src.c  8 Jun 2015 22:19:28 -0000       1.51
+++ netinet6/in6_src.c  31 Aug 2015 09:41:20 -0000
@@ -247,13 +247,12 @@ in6_selectsrc(struct in6_addr **in6src, 
         * our src addr is taken from the i/f, else punt.
         */
        if (ro) {
-               if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
-                   !IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, dst))) {
+               if (rtvalid(ro->ro_rt) == 0 ||
+                   !IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, dst)) {
                        rtfree(ro->ro_rt);
                        ro->ro_rt = NULL;
                }
-               if (ro->ro_rt == (struct rtentry *)0 ||
-                   ro->ro_rt->rt_ifp == (struct ifnet *)0) {
+               if (ro->ro_rt == NULL) {
                        struct sockaddr_in6 *sa6;
 
                        /* No route yet, so try to acquire one */
@@ -419,10 +418,9 @@ selectroute(struct sockaddr_in6 *dstsock
         * cached destination, in case of sharing the cache with IPv4.
         */
        if (ro) {
-               if (ro->ro_rt &&
-                   (!(ro->ro_rt->rt_flags & RTF_UP) ||
+               if (rtvalid(ro->ro_rt) == 0 ||
                     sin6tosa(&ro->ro_dst)->sa_family != AF_INET6 ||
-                    !IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, dst))) {
+                    !IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, dst)) {
                        rtfree(ro->ro_rt);
                        ro->ro_rt = NULL;
                }

Reply via email to