On Wed, Nov 04, 2015 at 12:33:23PM +0100, Martin Pieuchot wrote:
> On 12/08/15(Wed) 17:03, Martin Pieuchot wrote:
> > I'm currently working on the routing table interface to make is safe
> > to use by multiple CPUs at the same time.  The diff below is a big
> > step in this direction and I'd really appreciate if people could test
> > it with their usual network setup and report back.
> 
> Below is an updated version of the diff now that bluhm@ fixed the
> potential loop with RTF_GATEWAY routes.
> 
> Note that regression tests will fail because we no longer call
> rtalloc(9) inside rt_setgate().  In other words we do not cache
> a next-hop route before using it.
> 
> Ok?

OK bluhm@

> 
> Index: net/route.c
> ===================================================================
> RCS file: /cvs/src/sys/net/route.c,v
> retrieving revision 1.268
> diff -u -p -r1.268 route.c
> --- net/route.c       4 Nov 2015 10:13:55 -0000       1.268
> +++ net/route.c       4 Nov 2015 11:15:00 -0000
> @@ -151,6 +151,7 @@ void      rt_timer_init(void);
>  int  rtflushclone1(struct rtentry *, void *, u_int);
>  void rtflushclone(unsigned int, struct rtentry *);
>  int  rt_if_remove_rtdelete(struct rtentry *, void *, u_int);
> +struct rtentry *rt_match(struct sockaddr *, int, unsigned int);
>  
>  struct       ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr 
> *,
>                   u_int);
> @@ -194,6 +195,12 @@ rtisvalid(struct rtentry *rt)
>       if (rt == NULL)
>               return (0);
>  
> +#ifdef DIAGNOSTIC
> +     if (ISSET(rt->rt_flags, RTF_GATEWAY) && (rt->rt_gwroute != NULL) &&
> +         ISSET(rt->rt_gwroute->rt_flags, RTF_GATEWAY))
> +             panic("next hop must be directly reachable");
> +#endif
> +
>       if ((rt->rt_flags & RTF_UP) == 0)
>               return (0);
>  
> @@ -201,11 +208,27 @@ rtisvalid(struct rtentry *rt)
>       if (rt->rt_ifa == NULL || rt->rt_ifa->ifa_ifp == NULL)
>               return (0);
>  
> +     if (ISSET(rt->rt_flags, RTF_GATEWAY) && !rtisvalid(rt->rt_gwroute))
> +             return (0);
> +
>       return (1);
>  }
>  
> +/*
> + * Do the actual lookup for rtalloc(9), do not use directly!
> + *
> + * Return the best matching entry for the destination ``dst''.
> + *
> + * "RT_RESOLVE" means that a corresponding L2 entry should
> + *   be added to the routing table and resolved (via ARP or
> + *   NDP), if it does not exist.
> + *
> + * "RT_REPORT" indicates that a message should be sent to
> + *   userland if no matching route has been found or if an
> + *   error occured while adding a L2 entry.
> + */
>  struct rtentry *
> -rtalloc(struct sockaddr *dst, int flags, unsigned int tableid)
> +rt_match(struct sockaddr *dst, int flags, unsigned int tableid)
>  {
>       struct rtentry          *rt0, *rt = NULL;
>       struct rt_addrinfo       info;
> @@ -336,6 +359,76 @@ rtalloc_mpath(struct sockaddr *dst, uint
>  }
>  #endif /* SMALL_KERNEL */
>  
> +/*
> + * Look in the routing table for the best matching entry for
> + * ``dst''.
> + *
> + * If a route with a gateway is found and its next hop is no
> + * longer valid, try to cache it.
> + */
> +struct rtentry *
> +rtalloc(struct sockaddr *dst, int flags, unsigned int rtableid)
> +{
> +     struct rtentry *rt, *nhrt;
> +
> +     rt = rt_match(dst, flags, rtableid);
> +
> +     /* No match or route to host?  We're done. */
> +     if (rt == NULL || !ISSET(rt->rt_flags, RTF_GATEWAY))
> +             return (rt);
> +
> +     /* Nothing to do if the next hop is valid. */
> +     if (rtisvalid(rt->rt_gwroute))
> +             return (rt);
> +
> +     rtfree(rt->rt_gwroute);
> +     rt->rt_gwroute = NULL;
> +
> +     /*
> +      * If we cannot find a valid next hop, return the route
> +      * with a gateway.
> +      *
> +      * XXX Some dragons hiding in the tree certainly depends on
> +      * this behavior.  But it is safe since rt_checkgate() wont
> +      * allow us to us this route later on.
> +      */
> +     nhrt = rt_match(rt->rt_gateway, flags | RT_RESOLVE, rtableid);
> +     if (nhrt == NULL)
> +             return (rt);
> +
> +     /*
> +      * Next hop must be reachable, this also prevents rtentry
> +      * loops for example when rt->rt_gwroute points to rt.
> +      */
> +     if (ISSET(nhrt->rt_flags, RTF_CLONING|RTF_GATEWAY)) {
> +             rtfree(nhrt);
> +             return (rt);
> +     }
> +
> +     /*
> +      * Next hop entry must be UP and on the same interface.
> +      */
> +     if (!ISSET(nhrt->rt_flags, RTF_UP) || nhrt->rt_ifidx != rt->rt_ifidx) {
> +             rtfree(nhrt);
> +             return (rt);
> +     }
> +
> +     /*
> +      * If the MTU of next hop is 0, this will reset the MTU of the
> +      * route to run PMTUD again from scratch.
> +      */
> +     if (!ISSET(rt->rt_locks, RTV_MTU) && (rt->rt_mtu > nhrt->rt_mtu))
> +             rt->rt_mtu = nhrt->rt_mtu;
> +
> +     /*
> +      * Do not return the cached next-hop route, rt_checkgate() will
> +      * do the magic for us.
> +      */
> +     rt->rt_gwroute = nhrt;
> +
> +     return (rt);
> +}
> +
>  void
>  rtref(struct rtentry *rt)
>  {
> @@ -499,7 +592,7 @@ create:
>                       rt->rt_flags |= RTF_MODIFIED;
>                       flags |= RTF_MODIFIED;
>                       stat = &rtstat.rts_newgateway;
> -                     rt_setgate(rt, gateway, rdomain);
> +                     rt_setgate(rt, gateway);
>               }
>       } else
>               error = EHOSTUNREACH;
> @@ -931,8 +1024,7 @@ rtrequest(int req, struct rt_addrinfo *i
>                * the routing table because the radix MPATH code use
>                * it to (re)order routes.
>                */
> -             if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY],
> -                 tableid))) {
> +             if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY]))) {
>                       free(ndst, M_RTABLE, dlen);
>                       pool_put(&rtentry_pool, rt);
>                       return (error);
> @@ -985,7 +1077,7 @@ rtrequest(int req, struct rt_addrinfo *i
>  }
>  
>  int
> -rt_setgate(struct rtentry *rt, struct sockaddr *gate, unsigned int tableid)
> +rt_setgate(struct rtentry *rt, struct sockaddr *gate)
>  {
>       int glen = ROUNDUP(gate->sa_len);
>       struct sockaddr *sa;
> @@ -1003,22 +1095,7 @@ rt_setgate(struct rtentry *rt, struct so
>               rtfree(rt->rt_gwroute);
>               rt->rt_gwroute = NULL;
>       }
> -     if (rt->rt_flags & RTF_GATEWAY) {
> -             /* XXX is this actually valid to cross tables here? */
> -             rt->rt_gwroute = rtalloc(gate, RT_REPORT|RT_RESOLVE, tableid);
> -             /*
> -              * If we switched gateways, grab the MTU from the new
> -              * gateway route if the current MTU is 0 or greater
> -              * than the MTU of gateway.
> -              * Note that, if the MTU of gateway is 0, we will reset the
> -              * MTU of the route to run PMTUD again from scratch. XXX
> -              */
> -             if (rt->rt_gwroute && !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
> -                 rt->rt_rmx.rmx_mtu &&
> -                 rt->rt_rmx.rmx_mtu > rt->rt_gwroute->rt_rmx.rmx_mtu) {
> -                     rt->rt_rmx.rmx_mtu = rt->rt_gwroute->rt_rmx.rmx_mtu;
> -             }
> -     }
> +
>       return (0);
>  }
>  
> @@ -1033,26 +1110,8 @@ rt_checkgate(struct ifnet *ifp, struct r
>       rt0 = rt;
>  
>       if (rt->rt_flags & RTF_GATEWAY) {
> -             if (rt->rt_gwroute && !(rt->rt_gwroute->rt_flags & RTF_UP)) {
> -                     rtfree(rt->rt_gwroute);
> -                     rt->rt_gwroute = NULL;
> -             }
> -             if (rt->rt_gwroute == NULL) {
> -                     rt->rt_gwroute = rtalloc(rt->rt_gateway,
> -                         RT_REPORT|RT_RESOLVE, rtableid);
> -                     if (rt->rt_gwroute == NULL)
> -                             return (EHOSTUNREACH);
> -             }
> -             /*
> -              * Next hop must be reachable, this also prevents rtentry
> -              * loops, for example when rt->rt_gwroute points to rt.
> -              */
> -             if (((rt->rt_gwroute->rt_flags & (RTF_UP|RTF_GATEWAY)) !=
> -                 RTF_UP) || (rt->rt_gwroute->rt_ifidx != ifp->if_index)) {
> -                     rtfree(rt->rt_gwroute);
> -                     rt->rt_gwroute = NULL;
> +             if (rt->rt_gwroute == NULL)
>                       return (EHOSTUNREACH);
> -             }
>               rt = rt->rt_gwroute;
>       }
>  
> Index: net/route.h
> ===================================================================
> RCS file: /cvs/src/sys/net/route.h,v
> retrieving revision 1.118
> diff -u -p -r1.118 route.h
> --- net/route.h       30 Oct 2015 09:39:42 -0000      1.118
> +++ net/route.h       4 Nov 2015 11:00:15 -0000
> @@ -119,6 +119,8 @@ struct rtentry {
>  };
>  #define      rt_use          rt_rmx.rmx_pksent
>  #define      rt_expire       rt_rmx.rmx_expire
> +#define      rt_locks        rt_rmx.rmx_locks
> +#define      rt_mtu          rt_rmx.rmx_mtu
>  
>  #define      RTF_UP          0x1             /* route usable */
>  #define      RTF_GATEWAY     0x2             /* destination is a gateway */
> @@ -358,7 +360,7 @@ void       rt_maskedcopy(struct sockaddr *,
>  void  rt_sendmsg(struct rtentry *, int, u_int);
>  void  rt_sendaddrmsg(struct rtentry *, int);
>  void  rt_missmsg(int, struct rt_addrinfo *, int, u_int, int, u_int);
> -int   rt_setgate(struct rtentry *, struct sockaddr *, unsigned int);
> +int   rt_setgate(struct rtentry *, struct sockaddr *);
>  int   rt_checkgate(struct ifnet *, struct rtentry *, struct sockaddr *,
>           unsigned int, struct rtentry **);
>  void  rt_setmetrics(u_long, struct rt_metrics *, struct rt_kmetrics *);
> Index: net/rtsock.c
> ===================================================================
> RCS file: /cvs/src/sys/net/rtsock.c,v
> retrieving revision 1.181
> diff -u -p -r1.181 rtsock.c
> --- net/rtsock.c      2 Nov 2015 14:40:09 -0000       1.181
> +++ net/rtsock.c      4 Nov 2015 11:01:17 -0000
> @@ -745,9 +745,8 @@ report:
>                                       goto flush;
>                               ifa = info.rti_ifa;
>                       }
> -                     if (info.rti_info[RTAX_GATEWAY] != NULL &&
> -                         (error = rt_setgate(rt, info.rti_info[RTAX_GATEWAY],
> -                          tableid)))
> +                     if (info.rti_info[RTAX_GATEWAY] != NULL && (error =
> +                         rt_setgate(rt, info.rti_info[RTAX_GATEWAY])))
>                               goto flush;
>                       if (ifa) {
>                               if (rt->rt_ifa != ifa) {
> Index: net/if_spppsubr.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_spppsubr.c,v
> retrieving revision 1.144
> diff -u -p -r1.144 if_spppsubr.c
> --- net/if_spppsubr.c 2 Nov 2015 11:19:30 -0000       1.144
> +++ net/if_spppsubr.c 4 Nov 2015 11:03:24 -0000
> @@ -4244,10 +4244,9 @@ sppp_update_gw_walker(struct rtentry *rt
>       if (rt->rt_ifidx == ifp->if_index) {
>               if (rt->rt_ifa->ifa_dstaddr->sa_family !=
>                   rt->rt_gateway->sa_family ||
> -                 (rt->rt_flags & RTF_GATEWAY) == 0)
> +                 !ISSET(rt->rt_flags, RTF_GATEWAY))
>                       return (0);     /* do not modify non-gateway routes */
> -             bcopy(rt->rt_ifa->ifa_dstaddr, rt->rt_gateway,
> -                 rt->rt_ifa->ifa_dstaddr->sa_len);
> +             rt_setgate(rt, rt->rt_ifa->ifa_dstaddr);
>       }
>       return (0);
>  }

Reply via email to