On Wed, Nov 04, 2015 at 12:33:23PM +0100, Martin Pieuchot wrote:
> On 12/08/15(Wed) 17:03, Martin Pieuchot wrote:
> > I'm currently working on the routing table interface to make is safe
> > to use by multiple CPUs at the same time. The diff below is a big
> > step in this direction and I'd really appreciate if people could test
> > it with their usual network setup and report back.
>
> Below is an updated version of the diff now that bluhm@ fixed the
> potential loop with RTF_GATEWAY routes.
>
> Note that regression tests will fail because we no longer call
> rtalloc(9) inside rt_setgate(). In other words we do not cache
> a next-hop route before using it.
>
> Ok?
OK bluhm@
>
> Index: net/route.c
> ===================================================================
> RCS file: /cvs/src/sys/net/route.c,v
> retrieving revision 1.268
> diff -u -p -r1.268 route.c
> --- net/route.c 4 Nov 2015 10:13:55 -0000 1.268
> +++ net/route.c 4 Nov 2015 11:15:00 -0000
> @@ -151,6 +151,7 @@ void rt_timer_init(void);
> int rtflushclone1(struct rtentry *, void *, u_int);
> void rtflushclone(unsigned int, struct rtentry *);
> int rt_if_remove_rtdelete(struct rtentry *, void *, u_int);
> +struct rtentry *rt_match(struct sockaddr *, int, unsigned int);
>
> struct ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr
> *,
> u_int);
> @@ -194,6 +195,12 @@ rtisvalid(struct rtentry *rt)
> if (rt == NULL)
> return (0);
>
> +#ifdef DIAGNOSTIC
> + if (ISSET(rt->rt_flags, RTF_GATEWAY) && (rt->rt_gwroute != NULL) &&
> + ISSET(rt->rt_gwroute->rt_flags, RTF_GATEWAY))
> + panic("next hop must be directly reachable");
> +#endif
> +
> if ((rt->rt_flags & RTF_UP) == 0)
> return (0);
>
> @@ -201,11 +208,27 @@ rtisvalid(struct rtentry *rt)
> if (rt->rt_ifa == NULL || rt->rt_ifa->ifa_ifp == NULL)
> return (0);
>
> + if (ISSET(rt->rt_flags, RTF_GATEWAY) && !rtisvalid(rt->rt_gwroute))
> + return (0);
> +
> return (1);
> }
>
> +/*
> + * Do the actual lookup for rtalloc(9), do not use directly!
> + *
> + * Return the best matching entry for the destination ``dst''.
> + *
> + * "RT_RESOLVE" means that a corresponding L2 entry should
> + * be added to the routing table and resolved (via ARP or
> + * NDP), if it does not exist.
> + *
> + * "RT_REPORT" indicates that a message should be sent to
> + * userland if no matching route has been found or if an
> + * error occured while adding a L2 entry.
> + */
> struct rtentry *
> -rtalloc(struct sockaddr *dst, int flags, unsigned int tableid)
> +rt_match(struct sockaddr *dst, int flags, unsigned int tableid)
> {
> struct rtentry *rt0, *rt = NULL;
> struct rt_addrinfo info;
> @@ -336,6 +359,76 @@ rtalloc_mpath(struct sockaddr *dst, uint
> }
> #endif /* SMALL_KERNEL */
>
> +/*
> + * Look in the routing table for the best matching entry for
> + * ``dst''.
> + *
> + * If a route with a gateway is found and its next hop is no
> + * longer valid, try to cache it.
> + */
> +struct rtentry *
> +rtalloc(struct sockaddr *dst, int flags, unsigned int rtableid)
> +{
> + struct rtentry *rt, *nhrt;
> +
> + rt = rt_match(dst, flags, rtableid);
> +
> + /* No match or route to host? We're done. */
> + if (rt == NULL || !ISSET(rt->rt_flags, RTF_GATEWAY))
> + return (rt);
> +
> + /* Nothing to do if the next hop is valid. */
> + if (rtisvalid(rt->rt_gwroute))
> + return (rt);
> +
> + rtfree(rt->rt_gwroute);
> + rt->rt_gwroute = NULL;
> +
> + /*
> + * If we cannot find a valid next hop, return the route
> + * with a gateway.
> + *
> + * XXX Some dragons hiding in the tree certainly depends on
> + * this behavior. But it is safe since rt_checkgate() wont
> + * allow us to us this route later on.
> + */
> + nhrt = rt_match(rt->rt_gateway, flags | RT_RESOLVE, rtableid);
> + if (nhrt == NULL)
> + return (rt);
> +
> + /*
> + * Next hop must be reachable, this also prevents rtentry
> + * loops for example when rt->rt_gwroute points to rt.
> + */
> + if (ISSET(nhrt->rt_flags, RTF_CLONING|RTF_GATEWAY)) {
> + rtfree(nhrt);
> + return (rt);
> + }
> +
> + /*
> + * Next hop entry must be UP and on the same interface.
> + */
> + if (!ISSET(nhrt->rt_flags, RTF_UP) || nhrt->rt_ifidx != rt->rt_ifidx) {
> + rtfree(nhrt);
> + return (rt);
> + }
> +
> + /*
> + * If the MTU of next hop is 0, this will reset the MTU of the
> + * route to run PMTUD again from scratch.
> + */
> + if (!ISSET(rt->rt_locks, RTV_MTU) && (rt->rt_mtu > nhrt->rt_mtu))
> + rt->rt_mtu = nhrt->rt_mtu;
> +
> + /*
> + * Do not return the cached next-hop route, rt_checkgate() will
> + * do the magic for us.
> + */
> + rt->rt_gwroute = nhrt;
> +
> + return (rt);
> +}
> +
> void
> rtref(struct rtentry *rt)
> {
> @@ -499,7 +592,7 @@ create:
> rt->rt_flags |= RTF_MODIFIED;
> flags |= RTF_MODIFIED;
> stat = &rtstat.rts_newgateway;
> - rt_setgate(rt, gateway, rdomain);
> + rt_setgate(rt, gateway);
> }
> } else
> error = EHOSTUNREACH;
> @@ -931,8 +1024,7 @@ rtrequest(int req, struct rt_addrinfo *i
> * the routing table because the radix MPATH code use
> * it to (re)order routes.
> */
> - if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY],
> - tableid))) {
> + if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY]))) {
> free(ndst, M_RTABLE, dlen);
> pool_put(&rtentry_pool, rt);
> return (error);
> @@ -985,7 +1077,7 @@ rtrequest(int req, struct rt_addrinfo *i
> }
>
> int
> -rt_setgate(struct rtentry *rt, struct sockaddr *gate, unsigned int tableid)
> +rt_setgate(struct rtentry *rt, struct sockaddr *gate)
> {
> int glen = ROUNDUP(gate->sa_len);
> struct sockaddr *sa;
> @@ -1003,22 +1095,7 @@ rt_setgate(struct rtentry *rt, struct so
> rtfree(rt->rt_gwroute);
> rt->rt_gwroute = NULL;
> }
> - if (rt->rt_flags & RTF_GATEWAY) {
> - /* XXX is this actually valid to cross tables here? */
> - rt->rt_gwroute = rtalloc(gate, RT_REPORT|RT_RESOLVE, tableid);
> - /*
> - * If we switched gateways, grab the MTU from the new
> - * gateway route if the current MTU is 0 or greater
> - * than the MTU of gateway.
> - * Note that, if the MTU of gateway is 0, we will reset the
> - * MTU of the route to run PMTUD again from scratch. XXX
> - */
> - if (rt->rt_gwroute && !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
> - rt->rt_rmx.rmx_mtu &&
> - rt->rt_rmx.rmx_mtu > rt->rt_gwroute->rt_rmx.rmx_mtu) {
> - rt->rt_rmx.rmx_mtu = rt->rt_gwroute->rt_rmx.rmx_mtu;
> - }
> - }
> +
> return (0);
> }
>
> @@ -1033,26 +1110,8 @@ rt_checkgate(struct ifnet *ifp, struct r
> rt0 = rt;
>
> if (rt->rt_flags & RTF_GATEWAY) {
> - if (rt->rt_gwroute && !(rt->rt_gwroute->rt_flags & RTF_UP)) {
> - rtfree(rt->rt_gwroute);
> - rt->rt_gwroute = NULL;
> - }
> - if (rt->rt_gwroute == NULL) {
> - rt->rt_gwroute = rtalloc(rt->rt_gateway,
> - RT_REPORT|RT_RESOLVE, rtableid);
> - if (rt->rt_gwroute == NULL)
> - return (EHOSTUNREACH);
> - }
> - /*
> - * Next hop must be reachable, this also prevents rtentry
> - * loops, for example when rt->rt_gwroute points to rt.
> - */
> - if (((rt->rt_gwroute->rt_flags & (RTF_UP|RTF_GATEWAY)) !=
> - RTF_UP) || (rt->rt_gwroute->rt_ifidx != ifp->if_index)) {
> - rtfree(rt->rt_gwroute);
> - rt->rt_gwroute = NULL;
> + if (rt->rt_gwroute == NULL)
> return (EHOSTUNREACH);
> - }
> rt = rt->rt_gwroute;
> }
>
> Index: net/route.h
> ===================================================================
> RCS file: /cvs/src/sys/net/route.h,v
> retrieving revision 1.118
> diff -u -p -r1.118 route.h
> --- net/route.h 30 Oct 2015 09:39:42 -0000 1.118
> +++ net/route.h 4 Nov 2015 11:00:15 -0000
> @@ -119,6 +119,8 @@ struct rtentry {
> };
> #define rt_use rt_rmx.rmx_pksent
> #define rt_expire rt_rmx.rmx_expire
> +#define rt_locks rt_rmx.rmx_locks
> +#define rt_mtu rt_rmx.rmx_mtu
>
> #define RTF_UP 0x1 /* route usable */
> #define RTF_GATEWAY 0x2 /* destination is a gateway */
> @@ -358,7 +360,7 @@ void rt_maskedcopy(struct sockaddr *,
> void rt_sendmsg(struct rtentry *, int, u_int);
> void rt_sendaddrmsg(struct rtentry *, int);
> void rt_missmsg(int, struct rt_addrinfo *, int, u_int, int, u_int);
> -int rt_setgate(struct rtentry *, struct sockaddr *, unsigned int);
> +int rt_setgate(struct rtentry *, struct sockaddr *);
> int rt_checkgate(struct ifnet *, struct rtentry *, struct sockaddr *,
> unsigned int, struct rtentry **);
> void rt_setmetrics(u_long, struct rt_metrics *, struct rt_kmetrics *);
> Index: net/rtsock.c
> ===================================================================
> RCS file: /cvs/src/sys/net/rtsock.c,v
> retrieving revision 1.181
> diff -u -p -r1.181 rtsock.c
> --- net/rtsock.c 2 Nov 2015 14:40:09 -0000 1.181
> +++ net/rtsock.c 4 Nov 2015 11:01:17 -0000
> @@ -745,9 +745,8 @@ report:
> goto flush;
> ifa = info.rti_ifa;
> }
> - if (info.rti_info[RTAX_GATEWAY] != NULL &&
> - (error = rt_setgate(rt, info.rti_info[RTAX_GATEWAY],
> - tableid)))
> + if (info.rti_info[RTAX_GATEWAY] != NULL && (error =
> + rt_setgate(rt, info.rti_info[RTAX_GATEWAY])))
> goto flush;
> if (ifa) {
> if (rt->rt_ifa != ifa) {
> Index: net/if_spppsubr.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_spppsubr.c,v
> retrieving revision 1.144
> diff -u -p -r1.144 if_spppsubr.c
> --- net/if_spppsubr.c 2 Nov 2015 11:19:30 -0000 1.144
> +++ net/if_spppsubr.c 4 Nov 2015 11:03:24 -0000
> @@ -4244,10 +4244,9 @@ sppp_update_gw_walker(struct rtentry *rt
> if (rt->rt_ifidx == ifp->if_index) {
> if (rt->rt_ifa->ifa_dstaddr->sa_family !=
> rt->rt_gateway->sa_family ||
> - (rt->rt_flags & RTF_GATEWAY) == 0)
> + !ISSET(rt->rt_flags, RTF_GATEWAY))
> return (0); /* do not modify non-gateway routes */
> - bcopy(rt->rt_ifa->ifa_dstaddr, rt->rt_gateway,
> - rt->rt_ifa->ifa_dstaddr->sa_len);
> + rt_setgate(rt, rt->rt_ifa->ifa_dstaddr);
> }
> return (0);
> }