Module Name: src Committed By: ozaki-r Date: Tue Apr 26 09:30:01 UTC 2016
Modified Files: src/sys/net: if_mpls.c route.c route.h src/sys/netinet: in_offload.c ip_output.c ip_var.h src/sys/netinet6: nd6.c Log Message: Stop using rt_gwroute on packet sending paths rt_gwroute of rtentry is a reference to a rtentry of the gateway for a rtentry with RTF_GATEWAY. That was used by L2 (arp and ndp) to look up L2 addresses. By separating L2 nexthop caches, we don't need a route for the purpose and we can stop using rt_gwroute. By doing so, we can reduce referencing and modifying rtentries, which makes it easy to apply a lock (and/or psref) to the routing table and rtentries. One issue to do this is to keep RTF_REJECT behavior. It seems it was broken when we moved rtalloc1 things from L2 output routines (e.g., ether_output) to ip_hresolv_output, but (fortunately?) it works unexpectedly. What we mistook are: - RTF_REJECT was checked for any routes in L2 output routines, but in ip_hresolv_output it is checked only when the route is RTF_GATEWAY - The RTF_REJECT check wasn't copied to IPv6 (nd6_output) It seems that rt_gwroute checks hid the mistakes and it looked work (unexpectedly) and removing rt_gwroute checks unveil the issue. So we need to fix RTF_REJECT checks in ip_hresolv_output and also add them to nd6_output. One more point we have to care is returning an errno; we need to mimic looutput behavior. Originally RTF_REJECT check was done either in L2 output routines or in looutput. The latter is applied when a reject route directs to a loopback interface. However, now RTF_REJECT check is done before looutput so to keep the original behavior we need to return an errno which looutput chooses. Added rt_check_reject_route does such tweaks. To generate a diff of this commit: cvs rdiff -u -r1.20 -r1.21 src/sys/net/if_mpls.c cvs rdiff -u -r1.164 -r1.165 src/sys/net/route.c cvs rdiff -u -r1.99 -r1.100 src/sys/net/route.h cvs rdiff -u -r1.6 -r1.7 src/sys/netinet/in_offload.c cvs rdiff -u -r1.251 -r1.252 src/sys/netinet/ip_output.c cvs rdiff -u -r1.110 -r1.111 src/sys/netinet/ip_var.h cvs rdiff -u -r1.192 -r1.193 src/sys/netinet6/nd6.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/net/if_mpls.c diff -u src/sys/net/if_mpls.c:1.20 src/sys/net/if_mpls.c:1.21 --- src/sys/net/if_mpls.c:1.20 Tue Feb 9 08:32:12 2016 +++ src/sys/net/if_mpls.c Tue Apr 26 09:30:01 2016 @@ -1,4 +1,4 @@ -/* $NetBSD: if_mpls.c,v 1.20 2016/02/09 08:32:12 ozaki-r Exp $ */ +/* $NetBSD: if_mpls.c,v 1.21 2016/04/26 09:30:01 ozaki-r Exp $ */ /* * Copyright (c) 2010 The NetBSD Foundation, Inc. @@ -30,7 +30,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.20 2016/02/09 08:32:12 ozaki-r Exp $"); +__KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.21 2016/04/26 09:30:01 ozaki-r Exp $"); #ifdef _KERNEL_OPT #include "opt_inet.h" @@ -473,7 +473,7 @@ mpls_send_frame(struct mbuf *m, struct i case IFT_TUNNEL: case IFT_LOOP: #ifdef INET - ret = ip_hresolv_output(ifp, m, rt->rt_gateway, rt); + ret = ip_if_output(ifp, m, rt->rt_gateway, rt); #else KERNEL_LOCK(1, NULL); ret = (*ifp->if_output)(ifp, m, rt->rt_gateway, rt); Index: src/sys/net/route.c diff -u src/sys/net/route.c:1.164 src/sys/net/route.c:1.165 --- src/sys/net/route.c:1.164 Mon Apr 25 14:38:08 2016 +++ src/sys/net/route.c Tue Apr 26 09:30:01 2016 @@ -1,4 +1,4 @@ -/* $NetBSD: route.c,v 1.164 2016/04/25 14:38:08 ozaki-r Exp $ */ +/* $NetBSD: route.c,v 1.165 2016/04/26 09:30:01 ozaki-r Exp $ */ /*- * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc. @@ -96,7 +96,7 @@ #endif #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: route.c,v 1.164 2016/04/25 14:38:08 ozaki-r Exp $"); +__KERNEL_RCSID(0, "$NetBSD: route.c,v 1.165 2016/04/26 09:30:01 ozaki-r Exp $"); #include <sys/param.h> #ifdef RTFLUSH_DEBUG @@ -1506,6 +1506,24 @@ rt_gettag(struct rtentry *rt) return rt->rt_tag; } +int +rt_check_reject_route(struct rtentry *rt, struct ifnet *ifp) +{ + + if ((rt->rt_flags & RTF_REJECT) != 0) { + /* Mimic looutput */ + if (ifp->if_flags & IFF_LOOPBACK) + return (rt->rt_flags & RTF_HOST) ? + EHOSTUNREACH : ENETUNREACH; + else if (rt->rt_rmx.rmx_expire == 0 || + time_uptime < rt->rt_rmx.rmx_expire) + return (rt->rt_flags & RTF_GATEWAY) ? + EHOSTUNREACH : EHOSTDOWN; + } + + return 0; +} + #ifdef DDB #include <machine/db_machdep.h> Index: src/sys/net/route.h diff -u src/sys/net/route.h:1.99 src/sys/net/route.h:1.100 --- src/sys/net/route.h:1.99 Mon Apr 11 09:21:18 2016 +++ src/sys/net/route.h Tue Apr 26 09:30:01 2016 @@ -1,4 +1,4 @@ -/* $NetBSD: route.h,v 1.99 2016/04/11 09:21:18 ozaki-r Exp $ */ +/* $NetBSD: route.h,v 1.100 2016/04/26 09:30:01 ozaki-r Exp $ */ /* * Copyright (c) 1980, 1986, 1993 @@ -405,23 +405,7 @@ const struct sockaddr * struct sockaddr * rt_gettag(struct rtentry *); -static inline struct rtentry * -rt_get_gwroute(struct rtentry *rt) -{ - if (rt->rt_gwroute == NULL) - return NULL; - rt->rt_gwroute->rt_refcnt++; - return rt->rt_gwroute; -} - -static inline void -rt_set_gwroute(struct rtentry *rt, struct rtentry *gwrt) -{ - - rt->rt_gwroute = gwrt; - if (rt->rt_gwroute != NULL) - rt->rt_gwroute->rt_refcnt++; -} +int rt_check_reject_route(struct rtentry *, struct ifnet *); static inline void rt_assert_referenced(const struct rtentry *rt) Index: src/sys/netinet/in_offload.c diff -u src/sys/netinet/in_offload.c:1.6 src/sys/netinet/in_offload.c:1.7 --- src/sys/netinet/in_offload.c:1.6 Thu Jun 4 09:20:00 2015 +++ src/sys/netinet/in_offload.c Tue Apr 26 09:30:01 2016 @@ -1,4 +1,4 @@ -/* $NetBSD: in_offload.c,v 1.6 2015/06/04 09:20:00 ozaki-r Exp $ */ +/* $NetBSD: in_offload.c,v 1.7 2016/04/26 09:30:01 ozaki-r Exp $ */ /*- * Copyright (c)2005, 2006 YAMAMOTO Takashi, @@ -27,7 +27,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: in_offload.c,v 1.6 2015/06/04 09:20:00 ozaki-r Exp $"); +__KERNEL_RCSID(0, "$NetBSD: in_offload.c,v 1.7 2016/04/26 09:30:01 ozaki-r Exp $"); #include <sys/param.h> #include <sys/mbuf.h> @@ -55,7 +55,7 @@ ip_tso_output_callback(void *vp, struct struct ip_tso_output_args *args = vp; struct ifnet *ifp = args->ifp; - return ip_hresolv_output(ifp, m, args->sa, args->rt); + return ip_if_output(ifp, m, args->sa, args->rt); } int Index: src/sys/netinet/ip_output.c diff -u src/sys/netinet/ip_output.c:1.251 src/sys/netinet/ip_output.c:1.252 --- src/sys/netinet/ip_output.c:1.251 Tue Apr 19 09:36:35 2016 +++ src/sys/netinet/ip_output.c Tue Apr 26 09:30:01 2016 @@ -1,4 +1,4 @@ -/* $NetBSD: ip_output.c,v 1.251 2016/04/19 09:36:35 ozaki-r Exp $ */ +/* $NetBSD: ip_output.c,v 1.252 2016/04/26 09:30:01 ozaki-r Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -91,7 +91,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.251 2016/04/19 09:36:35 ozaki-r Exp $"); +__KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.252 2016/04/26 09:30:01 ozaki-r Exp $"); #ifdef _KERNEL_OPT #include "opt_inet.h" @@ -157,44 +157,6 @@ extern pfil_head_t *inet_pfil_hook; /* int ip_do_loopback_cksum = 0; -static bool -ip_hresolv_needed(const struct ifnet * const ifp) -{ - switch (ifp->if_type) { - case IFT_ARCNET: - case IFT_ATM: - case IFT_ECONET: - case IFT_ETHER: - case IFT_FDDI: - case IFT_HIPPI: - case IFT_IEEE1394: - case IFT_ISO88025: - case IFT_SLIP: - return true; - default: - return false; - } -} - -static int -klock_if_output(struct ifnet * const ifp, struct mbuf * const m, - const struct sockaddr * const dst, struct rtentry *rt) -{ - int error; - -#ifndef NET_MPSAFE - KERNEL_LOCK(1, NULL); -#endif - - error = (*ifp->if_output)(ifp, m, dst, rt); - -#ifndef NET_MPSAFE - KERNEL_UNLOCK_ONE(NULL); -#endif - - return error; -} - static int ip_mark_mpls(struct ifnet * const ifp, struct mbuf * const m, struct rtentry *rt) { @@ -228,81 +190,37 @@ ip_mark_mpls(struct ifnet * const ifp, s /* * Send an IP packet to a host. - * - * If necessary, resolve the arbitrary IP route, rt0, to an IP host route before - * calling ifp's output routine. */ int -ip_hresolv_output(struct ifnet * const ifp, struct mbuf * const m, - const struct sockaddr * const dst, struct rtentry *rt0) +ip_if_output(struct ifnet * const ifp, struct mbuf * const m, + const struct sockaddr * const dst, struct rtentry *rt) { int error = 0; - struct rtentry *rt = rt0, *gwrt; -#define RTFREE_IF_NEEDED(_rt) \ - if ((_rt) != NULL && (_rt) != rt0) \ - rtfree((_rt)); - - if (!ip_hresolv_needed(ifp)) - goto out; - - if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) == 0) - goto out; - - gwrt = rt_get_gwroute(rt); - RTFREE_IF_NEEDED(rt); - rt = gwrt; - if (rt == NULL || (rt->rt_flags & RTF_UP) == 0) { - if (rt != NULL) { - RTFREE_IF_NEEDED(rt); - rt = rt0; - } - if (rt == NULL) { - error = EHOSTUNREACH; - goto bad; - } - gwrt = rtalloc1(rt->rt_gateway, 1); - rt_set_gwroute(rt, gwrt); - RTFREE_IF_NEEDED(rt); - rt = gwrt; - if (rt == NULL) { - error = EHOSTUNREACH; - goto bad; - } - /* the "G" test below also prevents rt == rt0 */ - if ((rt->rt_flags & RTF_GATEWAY) != 0 || rt->rt_ifp != ifp) { - if (rt0->rt_gwroute != NULL) - rtfree(rt0->rt_gwroute); - rt0->rt_gwroute = NULL; - error = EHOSTUNREACH; - goto bad; - } - } - if ((rt->rt_flags & RTF_REJECT) != 0) { - if (rt->rt_rmx.rmx_expire == 0 || - time_uptime < rt->rt_rmx.rmx_expire) { - error = (rt == rt0) ? EHOSTDOWN : EHOSTUNREACH; - goto bad; + if (rt != NULL) { + error = rt_check_reject_route(rt, ifp); + if (error != 0) { + m_freem(m); + return error; } } -out: - error = ip_mark_mpls(ifp, m, rt0); - if (error != 0) - goto bad; + error = ip_mark_mpls(ifp, m, rt); + if (error != 0) { + m_freem(m); + return error; + } - error = klock_if_output(ifp, m, dst, rt); - goto exit; +#ifndef NET_MPSAFE + KERNEL_LOCK(1, NULL); +#endif -bad: - if (m != NULL) - m_freem(m); -exit: - RTFREE_IF_NEEDED(rt); + error = (*ifp->if_output)(ifp, m, dst, rt); +#ifndef NET_MPSAFE + KERNEL_UNLOCK_ONE(NULL); +#endif return error; - -#undef RTFREE_IF_NEEDED } /* @@ -715,7 +633,7 @@ sendit: if (__predict_true( (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0 || (ifp->if_capenable & IFCAP_TSOv4) != 0)) { - error = ip_hresolv_output(ifp, m, sa, rt); + error = ip_if_output(ifp, m, sa, rt); } else { error = ip_tso_output(ifp, m, sa, rt); } @@ -783,7 +701,7 @@ sendit: } else { KASSERT((m->m_pkthdr.csum_flags & (M_CSUM_UDPv4 | M_CSUM_TCPv4)) == 0); - error = ip_hresolv_output(ifp, m, + error = ip_if_output(ifp, m, (m->m_flags & M_MCAST) ? sintocsa(rdst) : sintocsa(dst), rt); } Index: src/sys/netinet/ip_var.h diff -u src/sys/netinet/ip_var.h:1.110 src/sys/netinet/ip_var.h:1.111 --- src/sys/netinet/ip_var.h:1.110 Wed Jan 20 22:12:22 2016 +++ src/sys/netinet/ip_var.h Tue Apr 26 09:30:01 2016 @@ -1,4 +1,4 @@ -/* $NetBSD: ip_var.h,v 1.110 2016/01/20 22:12:22 riastradh Exp $ */ +/* $NetBSD: ip_var.h,v 1.111 2016/04/26 09:30:01 ozaki-r Exp $ */ /* * Copyright (c) 1982, 1986, 1993 @@ -238,7 +238,7 @@ int rip_usrreq(struct socket *, int ip_setmoptions(struct ip_moptions **, const struct sockopt *sopt); int ip_getmoptions(struct ip_moptions *, struct sockopt *sopt); -int ip_hresolv_output(struct ifnet * const, struct mbuf * const, +int ip_if_output(struct ifnet * const, struct mbuf * const, const struct sockaddr * const, struct rtentry *); /* IP Flow interface. */ Index: src/sys/netinet6/nd6.c diff -u src/sys/netinet6/nd6.c:1.192 src/sys/netinet6/nd6.c:1.193 --- src/sys/netinet6/nd6.c:1.192 Mon Apr 25 14:38:08 2016 +++ src/sys/netinet6/nd6.c Tue Apr 26 09:30:01 2016 @@ -1,4 +1,4 @@ -/* $NetBSD: nd6.c,v 1.192 2016/04/25 14:38:08 ozaki-r Exp $ */ +/* $NetBSD: nd6.c,v 1.193 2016/04/26 09:30:01 ozaki-r Exp $ */ /* $KAME: nd6.c,v 1.279 2002/06/08 11:16:51 itojun Exp $ */ /* @@ -31,7 +31,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: nd6.c,v 1.192 2016/04/25 14:38:08 ozaki-r Exp $"); +__KERNEL_RCSID(0, "$NetBSD: nd6.c,v 1.193 2016/04/26 09:30:01 ozaki-r Exp $"); #ifdef _KERNEL_OPT #include "opt_net_mpsafe.h" @@ -2113,70 +2113,6 @@ nd6_slowtimo(void *ignored_arg) mutex_exit(softnet_lock); } -/* - * Next hop determination. This routine was derived from ip_output.c. - */ -static int -nd6_determine_nexthop(struct ifnet *ifp, const struct sockaddr_in6 *dst, - struct rtentry *rt0, struct rtentry **ret_rt, bool *sendpkt) -{ - struct rtentry *rt = rt0; - struct rtentry *gwrt = NULL; - struct sockaddr_in6 *gw6 = satosin6(rt->rt_gateway); - - /* - * We skip link-layer address resolution and NUD - * if the gateway is not a neighbor from ND point - * of view, regardless of the value of nd_ifinfo.flags. - * The second condition is a bit tricky; we skip - * if the gateway is our own address, which is - * sometimes used to install a route to a p2p link. - */ - if (!nd6_is_addr_neighbor(gw6, ifp) || - in6ifa_ifpwithaddr(ifp, &gw6->sin6_addr)) { - /* - * We allow this kind of tricky route only - * when the outgoing interface is p2p. - * XXX: we may need a more generic rule here. - */ - if ((ifp->if_flags & IFF_POINTOPOINT) == 0) - goto hostunreach; - - *sendpkt = true; - return 0; - } - - /* Try to use a cached nexthop route (gwroute) if exists */ - gwrt = rt_get_gwroute(rt); - if (gwrt == NULL || (gwrt->rt_flags & RTF_UP) == 0) { - if (gwrt != NULL) { - rtfree(gwrt); - } - /* Look up a nexthop route */ - gwrt = rtalloc1(rt->rt_gateway, 1); - rt_set_gwroute(rt, gwrt); - rt = gwrt; - if (rt == NULL) - goto hostunreach; - /* the "G" test below also prevents rt == rt0 */ - if ((rt->rt_flags & RTF_GATEWAY) || - (rt->rt_ifp != ifp)) { - if (rt0->rt_gwroute != NULL) - rtfree(rt0->rt_gwroute); - rt0->rt_gwroute = NULL; - goto hostunreach; - } - } - *ret_rt = gwrt; - return 0; - -hostunreach: - if (gwrt != NULL) - rtfree(gwrt); - - return EHOSTUNREACH; -} - int nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m, const struct sockaddr_in6 *dst, struct rtentry *rt) @@ -2185,7 +2121,14 @@ nd6_output(struct ifnet *ifp, struct ifn struct llentry *ln = NULL; int error = 0; bool created = false; - struct rtentry *nexthop = NULL; + + if (rt != NULL) { + error = rt_check_reject_route(rt, ifp); + if (error != 0) { + m_freem(m); + return error; + } + } if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr)) goto sendpkt; @@ -2193,6 +2136,32 @@ nd6_output(struct ifnet *ifp, struct ifn if (nd6_need_cache(ifp) == 0) goto sendpkt; + if (rt != NULL && (rt->rt_flags & RTF_GATEWAY) != 0) { + struct sockaddr_in6 *gw6 = satosin6(rt->rt_gateway); + + /* XXX remain the check to keep the original behavior. */ + /* + * We skip link-layer address resolution and NUD + * if the gateway is not a neighbor from ND point + * of view, regardless of the value of nd_ifinfo.flags. + * The second condition is a bit tricky; we skip + * if the gateway is our own address, which is + * sometimes used to install a route to a p2p link. + */ + if (!nd6_is_addr_neighbor(gw6, ifp) || + in6ifa_ifpwithaddr(ifp, &gw6->sin6_addr)) { + /* + * We allow this kind of tricky route only + * when the outgoing interface is p2p. + * XXX: we may need a more generic rule here. + */ + if ((ifp->if_flags & IFF_POINTOPOINT) == 0) + senderr(EHOSTUNREACH); + + goto sendpkt; + } + } + /* * Address resolution or Neighbor Unreachability Detection * for the next hop. @@ -2200,19 +2169,6 @@ nd6_output(struct ifnet *ifp, struct ifn * or an anycast address(i.e. not a multicast). */ - if (rt != NULL && (rt->rt_flags & RTF_GATEWAY) != 0) { - bool sendpkt = false; - - /* Still need a nexthop to reflect RTF_{REJECT,BLACKHOLE} */ - error = nd6_determine_nexthop(ifp, dst, rt, &nexthop, &sendpkt); - if (error != 0) - senderr(error); - if (nexthop != NULL) - rt = nexthop; - if (sendpkt) - goto sendpkt; - } - /* Look up the neighbor cache for the nexthop */ ln = nd6_lookup(&dst->sin6_addr, ifp, true); if ((ln == NULL) && nd6_is_addr_neighbor(dst, ifp)) { @@ -2346,9 +2302,6 @@ nd6_output(struct ifnet *ifp, struct ifn if (m != NULL) m_freem(m); exit: - if (nexthop != NULL) - rtfree(nexthop); - if (created) nd6_gc_neighbors(LLTABLE6(ifp));