On 25/08/15(Tue) 12:27, Martin Pieuchot wrote:
> On 12/08/15(Wed) 17:03, Martin Pieuchot wrote:
> > I'm currently working on the routing table interface to make is safe
> > to use by multiple CPUs at the same time. The diff below is a big
> > step in this direction and I'd really appreciate if people could test
> > it with their usual network setup and report back.
>
> Updated version to match recent changes. I'm still looking for test
> reports and reviews.
Thanks to all the testers. Here's a third version that should fix a
regression reported by phessler@. The idea is to use rtvalid(9) to
check early if the gateway route is still valid or not. If it isn't
then call rtalloc(9) again to perform the ARP/NDP resolution.
This diff includes rtvalid(9) and makes use of it in ip{,6}_output(),
more conversions will be done afterward, but these should hopefully
be all the necessary bits to fix the regression.
Tests & reviews are still welcome!
Index: net/route.c
===================================================================
RCS file: /cvs/src/sys/net/route.c,v
retrieving revision 1.226
diff -u -p -r1.226 route.c
--- net/route.c 30 Aug 2015 10:39:16 -0000 1.226
+++ net/route.c 31 Aug 2015 09:33:27 -0000
@@ -153,6 +153,7 @@ int rtable_alloc(void ***, u_int);
int rtflushclone1(struct rtentry *, void *, u_int);
void rtflushclone(unsigned int, struct rtentry *);
int rt_if_remove_rtdelete(struct rtentry *, void *, u_int);
+struct rtentry *rt_match(struct sockaddr *, int, unsigned int);
struct ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *,
u_int);
@@ -300,19 +301,68 @@ rtable_exists(u_int id) /* verify table
return (1);
}
+
+/*
+ * Do the actual checks for rtvalid(9), do not use directly!
+ */
+static inline int
+rt_is_valid(struct rtentry *rt)
+{
+ if (rt == NULL)
+ return (0);
+
+ if ((rt->rt_flags & RTF_UP) == 0)
+ return (0);
+
+ /* Routes attached to stall ifas should be freed. */
+ if (rt->rt_ifa == NULL || rt->rt_ifa->ifa_ifp == NULL)
+ return (0);
+
+ return (1);
+}
+
+/*
+ * Returns 1 if the cached ``rt'' entry is still valid, 0 otherwise.
+ */
+int
+rtvalid(struct rtentry *rt)
+{
+ if (rt_is_valid(rt) == 0)
+ return (0);
+
+ /* Next hop must also be valid. */
+ if ((rt->rt_flags & RTF_GATEWAY) && rt_is_valid(rt->rt_gwroute) == 0)
+ return (0);
+
+ return (1);
+}
+
+/*
+ * Do the actual lookup for rtalloc(9), do not use directly!
+ *
+ * Return the best matching entry for the destination ``dst''.
+ *
+ * "RT_RESOLVE" means that a corresponding L2 entry should
+ * be added to the routing table and resolved (via ARP or
+ * NDP), if it does not exist.
+ *
+ * "RT_REPORT" indicates that a message should be sent to
+ * userland if no matching route has been found or if an
+ * error occured while adding a L2 entry.
+ */
struct rtentry *
-rtalloc(struct sockaddr *dst, int flags, unsigned int tableid)
+rt_match(struct sockaddr *dst, int flags, unsigned int tableid)
{
struct rtentry *rt;
struct rtentry *newrt = NULL;
struct rt_addrinfo info;
- int s, error = 0, msgtype = RTM_MISS;
+ int s, error = 0;
- s = splsoftnet();
bzero(&info, sizeof(info));
info.rti_info[RTAX_DST] = dst;
+ s = splsoftnet();
rt = rtable_match(tableid, dst);
if (rt != NULL) {
newrt = rt;
@@ -325,10 +375,6 @@ rtalloc(struct sockaddr *dst, int flags,
goto miss;
}
rt = newrt;
- if (rt->rt_flags & RTF_XRESOLVE) {
- msgtype = RTM_RESOLVE;
- goto miss;
- }
/* Inform listeners of the new route */
rt_sendmsg(rt, RTM_ADD, tableid);
} else
@@ -336,11 +382,8 @@ rtalloc(struct sockaddr *dst, int flags,
} else {
rtstat.rts_unreach++;
miss:
- if (ISSET(flags, RT_REPORT)) {
- bzero((caddr_t)&info, sizeof(info));
- info.rti_info[RTAX_DST] = dst;
- rt_missmsg(msgtype, &info, 0, NULL, error, tableid);
- }
+ if (ISSET(flags, RT_REPORT))
+ rt_missmsg(RTM_MISS, &info, 0, NULL, error, tableid);
}
splx(s);
return (newrt);
@@ -374,6 +417,75 @@ rtalloc_mpath(struct sockaddr *dst, uint
}
#endif /* SMALL_KERNEL */
+/*
+ * Look in the routing table for the best matching entry for
+ * ``dst''.
+ *
+ * If a route with a gateway is found and its next hop is no
+ * longer valid, try to cache it.
+ */
+struct rtentry *
+rtalloc(struct sockaddr *dst, int flags, unsigned int rtableid)
+{
+ struct rtentry *rt, *nhrt;
+
+ rt = rt_match(dst, flags, rtableid);
+
+ /* No match or route to host? We're done. */
+ if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) == 0)
+ return (rt);
+
+ nhrt = rt->rt_gwroute;
+
+ /* Nothing to do if the next hop is valid. */
+ if (nhrt != NULL && (nhrt->rt_flags & RTF_UP))
+ return (rt);
+
+ rtfree(rt->rt_gwroute);
+ rt->rt_gwroute = NULL;
+
+ /*
+ * If we cannot find a valid next hop, return the route
+ * with a gateway.
+ *
+ * Some dragons hiding in the tree certainly depends on
+ * this behavior.
+ */
+ nhrt = rt_match(rt->rt_gateway, flags | RT_RESOLVE, rtableid);
+ if (nhrt == NULL)
+ return (rt);
+
+ /*
+ * Next hop must be reachable, this also prevents rtentry
+ * loops for example when rt->rt_gwroute points to rt.
+ */
+ if ((nhrt->rt_flags & (RTF_UP|RTF_CLONING|RTF_GATEWAY)) != RTF_UP) {
+ rtfree(nhrt);
+ return (rt);
+ }
+
+ /*
+ * Next hop entry MUST be on the same interface.
+ *
+ * This check is needed as long as we support stall
+ * ``ifa'' pointers.
+ */
+ if (nhrt->rt_ifp != rt->rt_ifp) {
+ rtfree(nhrt);
+ return (rt);
+ }
+
+ /*
+ * If the MTU of next hop is 0, this will reset the MTU of the
+ * route to run PMTUD again from scratch.
+ */
+ if (!ISSET(rt->rt_locks, RTV_MTU) && (rt->rt_mtu > nhrt->rt_mtu))
+ rt->rt_mtu = nhrt->rt_mtu;
+
+ rt->rt_gwroute = nhrt;
+ return (rt);
+}
+
void
rtfree(struct rtentry *rt)
{
@@ -527,7 +639,7 @@ create:
rt->rt_flags |= RTF_MODIFIED;
flags |= RTF_MODIFIED;
stat = &rtstat.rts_newgateway;
- rt_setgate(rt, gateway, rdomain);
+ rt_setgate(rt, gateway);
}
} else
error = EHOSTUNREACH;
@@ -659,19 +771,13 @@ ifa_ifwithroute(int flags, struct sockad
}
if (ifa == NULL) {
struct rtentry *rt = rtalloc(gateway, 0, rtableid);
- if (rt == NULL)
- return (NULL);
/* The gateway must be local if the same address family. */
- if ((rt->rt_flags & RTF_GATEWAY) &&
- rt_key(rt)->sa_family == dst->sa_family) {
+ if (rtvalid(rt) == 0 || ((rt->rt_flags & RTF_GATEWAY) &&
+ rt_key(rt)->sa_family == dst->sa_family)) {
rtfree(rt);
return (NULL);
}
ifa = rt->rt_ifa;
- if (ifa == NULL || ifa->ifa_ifp == NULL) {
- rtfree(rt);
- return (NULL);
- }
rtfree(rt);
}
if (ifa->ifa_addr->sa_family != dst->sa_family) {
@@ -982,8 +1088,7 @@ rtrequest1(int req, struct rt_addrinfo *
* the routing table because the radix MPATH code use
* it to (re)order routes.
*/
- if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY],
- tableid))) {
+ if ((error = rt_setgate(rt, info->rti_info[RTAX_GATEWAY]))) {
free(ndst, M_RTABLE, dlen);
pool_put(&rtentry_pool, rt);
return (error);
@@ -1034,7 +1139,7 @@ rtrequest1(int req, struct rt_addrinfo *
}
int
-rt_setgate(struct rtentry *rt, struct sockaddr *gate, unsigned int tableid)
+rt_setgate(struct rtentry *rt, struct sockaddr *gate)
{
int glen = ROUNDUP(gate->sa_len);
struct sockaddr *sa;
@@ -1052,22 +1157,7 @@ rt_setgate(struct rtentry *rt, struct so
rtfree(rt->rt_gwroute);
rt->rt_gwroute = NULL;
}
- if (rt->rt_flags & RTF_GATEWAY) {
- /* XXX is this actually valid to cross tables here? */
- rt->rt_gwroute = rtalloc(gate, RT_REPORT|RT_RESOLVE, tableid);
- /*
- * If we switched gateways, grab the MTU from the new
- * gateway route if the current MTU is 0 or greater
- * than the MTU of gateway.
- * Note that, if the MTU of gateway is 0, we will reset the
- * MTU of the route to run PMTUD again from scratch. XXX
- */
- if (rt->rt_gwroute && !(rt->rt_rmx.rmx_locks & RTV_MTU) &&
- rt->rt_rmx.rmx_mtu &&
- rt->rt_rmx.rmx_mtu > rt->rt_gwroute->rt_rmx.rmx_mtu) {
- rt->rt_rmx.rmx_mtu = rt->rt_gwroute->rt_rmx.rmx_mtu;
- }
- }
+
return (0);
}
@@ -1079,28 +1169,21 @@ rt_checkgate(struct ifnet *ifp, struct r
KASSERT(rt != NULL);
- if ((rt->rt_flags & RTF_UP) == 0) {
- rt = rtalloc(dst, RT_REPORT|RT_RESOLVE, rtableid);
- if (rt == NULL)
- return (EHOSTUNREACH);
- rt->rt_refcnt--;
- if (rt->rt_ifp != ifp)
- return (EHOSTUNREACH);
- }
+ if ((rt->rt_flags & RTF_UP) == 0)
+ return (EHOSTUNREACH);
rt0 = rt;
if (rt->rt_flags & RTF_GATEWAY) {
- if (rt->rt_gwroute && !(rt->rt_gwroute->rt_flags & RTF_UP)) {
+ if (rt->rt_gwroute == NULL)
+ return (EHOSTUNREACH);
+
+ if ((rt->rt_gwroute->rt_flags & RTF_UP) == 0) {
rtfree(rt->rt_gwroute);
rt->rt_gwroute = NULL;
+ return (EHOSTUNREACH);
}
- if (rt->rt_gwroute == NULL) {
- rt->rt_gwroute = rtalloc(rt->rt_gateway,
- RT_REPORT|RT_RESOLVE, rtableid);
- if (rt->rt_gwroute == NULL)
- return (EHOSTUNREACH);
- }
+
/*
* Next hop must be reachable, this also prevents rtentry
* loops, for example when rt->rt_gwroute points to rt.
Index: net/route.h
===================================================================
RCS file: /cvs/src/sys/net/route.h,v
retrieving revision 1.110
diff -u -p -r1.110 route.h
--- net/route.h 20 Aug 2015 12:39:43 -0000 1.110
+++ net/route.h 31 Aug 2015 09:05:07 -0000
@@ -118,6 +118,8 @@ struct rtentry {
};
#define rt_use rt_rmx.rmx_pksent
#define rt_expire rt_rmx.rmx_expire
+#define rt_locks rt_rmx.rmx_locks
+#define rt_mtu rt_rmx.rmx_mtu
#define RTF_UP 0x1 /* route usable */
#define RTF_GATEWAY 0x2 /* destination is a gateway */
@@ -361,7 +363,7 @@ void rt_sendmsg(struct rtentry *, int,
void rt_sendaddrmsg(struct rtentry *, int);
void rt_missmsg(int, struct rt_addrinfo *, int, struct ifnet *, int,
u_int);
-int rt_setgate(struct rtentry *, struct sockaddr *, unsigned int);
+int rt_setgate(struct rtentry *, struct sockaddr *);
int rt_checkgate(struct ifnet *, struct rtentry *, struct sockaddr *,
unsigned int, struct rtentry **);
void rt_setmetrics(u_long, struct rt_metrics *, struct rt_kmetrics *);
@@ -377,6 +379,7 @@ void rt_timer_queue_destroy(struct
rt
unsigned long rt_timer_queue_count(struct rttimer_queue *);
void rt_timer_timer(void *);
+int rtvalid(struct rtentry *);
#ifdef SMALL_KERNEL
#define rtalloc_mpath(dst, s, rid) rtalloc((dst),
RT_REPORT|RT_RESOLVE, (rid))
#else
Index: net/rtsock.c
===================================================================
RCS file: /cvs/src/sys/net/rtsock.c,v
retrieving revision 1.169
diff -u -p -r1.169 rtsock.c
--- net/rtsock.c 24 Aug 2015 22:11:33 -0000 1.169
+++ net/rtsock.c 31 Aug 2015 09:05:07 -0000
@@ -744,9 +744,8 @@ report:
info.rti_info[RTAX_GATEWAY]->sa_len)) {
newgate = 1;
}
- if (info.rti_info[RTAX_GATEWAY] != NULL &&
- (error = rt_setgate(rt, info.rti_info[RTAX_GATEWAY],
- tableid)))
+ if (info.rti_info[RTAX_GATEWAY] != NULL && (error =
+ rt_setgate(rt, info.rti_info[RTAX_GATEWAY])))
goto flush;
/*
* new gateway could require new ifaddr, ifp;
Index: netinet/ip_output.c
===================================================================
RCS file: /cvs/src/sys/netinet/ip_output.c,v
retrieving revision 1.287
diff -u -p -r1.287 ip_output.c
--- netinet/ip_output.c 31 Aug 2015 07:17:12 -0000 1.287
+++ netinet/ip_output.c 31 Aug 2015 09:27:51 -0000
@@ -168,12 +168,13 @@ ip_output(struct mbuf *m0, struct mbuf *
dst = satosin(&ro->ro_dst);
/*
- * If there is a cached route, check that it is to the same
- * destination and is still up. If not, free it and try again.
+ * If there is a cached route, check that it is to the
+ * same destination and is still valid. If not, free
+ * it and try again.
*/
- if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
+ if (rtvalid(ro->ro_rt) == 0 ||
dst->sin_addr.s_addr != ip->ip_dst.s_addr ||
- ro->ro_tableid != m->m_pkthdr.ph_rtableid)) {
+ ro->ro_tableid != m->m_pkthdr.ph_rtableid) {
rtfree(ro->ro_rt);
ro->ro_rt = NULL;
}
@@ -296,12 +297,13 @@ reroute:
dst = satosin(&ro->ro_dst);
/*
- * If there is a cached route, check that it is to the same
- * destination and is still up. If not, free it and try again.
+ * If there is a cached route, check that it is to the
+ * same destination and is still valid. If not, free
+ * it and try again.
*/
- if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
+ if (rtvalid(ro->ro_rt) == 0 ||
dst->sin_addr.s_addr != ip->ip_dst.s_addr ||
- ro->ro_tableid != m->m_pkthdr.ph_rtableid)) {
+ ro->ro_tableid != m->m_pkthdr.ph_rtableid) {
rtfree(ro->ro_rt);
ro->ro_rt = NULL;
}
Index: netinet6/in6_src.c
===================================================================
RCS file: /cvs/src/sys/netinet6/in6_src.c,v
retrieving revision 1.51
diff -u -p -r1.51 in6_src.c
--- netinet6/in6_src.c 8 Jun 2015 22:19:28 -0000 1.51
+++ netinet6/in6_src.c 31 Aug 2015 09:41:20 -0000
@@ -247,13 +247,12 @@ in6_selectsrc(struct in6_addr **in6src,
* our src addr is taken from the i/f, else punt.
*/
if (ro) {
- if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
- !IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, dst))) {
+ if (rtvalid(ro->ro_rt) == 0 ||
+ !IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, dst)) {
rtfree(ro->ro_rt);
ro->ro_rt = NULL;
}
- if (ro->ro_rt == (struct rtentry *)0 ||
- ro->ro_rt->rt_ifp == (struct ifnet *)0) {
+ if (ro->ro_rt == NULL) {
struct sockaddr_in6 *sa6;
/* No route yet, so try to acquire one */
@@ -419,10 +418,9 @@ selectroute(struct sockaddr_in6 *dstsock
* cached destination, in case of sharing the cache with IPv4.
*/
if (ro) {
- if (ro->ro_rt &&
- (!(ro->ro_rt->rt_flags & RTF_UP) ||
+ if (rtvalid(ro->ro_rt) == 0 ||
sin6tosa(&ro->ro_dst)->sa_family != AF_INET6 ||
- !IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, dst))) {
+ !IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, dst)) {
rtfree(ro->ro_rt);
ro->ro_rt = NULL;
}