Module Name:    src
Committed By:   ozaki-r
Date:           Tue Apr 26 09:30:01 UTC 2016

Modified Files:
        src/sys/net: if_mpls.c route.c route.h
        src/sys/netinet: in_offload.c ip_output.c ip_var.h
        src/sys/netinet6: nd6.c

Log Message:
Stop using rt_gwroute on packet sending paths

rt_gwroute of rtentry is a reference to a rtentry of the gateway
for a rtentry with RTF_GATEWAY. That was used by L2 (arp and ndp)
to look up L2 addresses. By separating L2 nexthop caches, we don't
need a route for the purpose and we can stop using rt_gwroute.
By doing so, we can reduce referencing and modifying rtentries,
which makes it easy to apply a lock (and/or psref) to the
routing table and rtentries.

One issue to do this is to keep RTF_REJECT behavior. It seems it
was broken when we moved rtalloc1 things from L2 output routines
(e.g., ether_output) to ip_hresolv_output, but (fortunately?)
it works unexpectedly. What we mistook are:
- RTF_REJECT was checked for any routes in L2 output routines,
  but in ip_hresolv_output it is checked only when the route
  is RTF_GATEWAY
- The RTF_REJECT check wasn't copied to IPv6 (nd6_output)

It seems that rt_gwroute checks hid the mistakes and it looked
work (unexpectedly) and removing rt_gwroute checks unveil the
issue. So we need to fix RTF_REJECT checks in ip_hresolv_output
and also add them to nd6_output.

One more point we have to care is returning an errno; we need
to mimic looutput behavior. Originally RTF_REJECT check was
done either in L2 output routines or in looutput. The latter is
applied when a reject route directs to a loopback interface.
However, now RTF_REJECT check is done before looutput so to keep
the original behavior we need to return an errno which looutput
chooses. Added rt_check_reject_route does such tweaks.


To generate a diff of this commit:
cvs rdiff -u -r1.20 -r1.21 src/sys/net/if_mpls.c
cvs rdiff -u -r1.164 -r1.165 src/sys/net/route.c
cvs rdiff -u -r1.99 -r1.100 src/sys/net/route.h
cvs rdiff -u -r1.6 -r1.7 src/sys/netinet/in_offload.c
cvs rdiff -u -r1.251 -r1.252 src/sys/netinet/ip_output.c
cvs rdiff -u -r1.110 -r1.111 src/sys/netinet/ip_var.h
cvs rdiff -u -r1.192 -r1.193 src/sys/netinet6/nd6.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/net/if_mpls.c
diff -u src/sys/net/if_mpls.c:1.20 src/sys/net/if_mpls.c:1.21
--- src/sys/net/if_mpls.c:1.20	Tue Feb  9 08:32:12 2016
+++ src/sys/net/if_mpls.c	Tue Apr 26 09:30:01 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: if_mpls.c,v 1.20 2016/02/09 08:32:12 ozaki-r Exp $ */
+/*	$NetBSD: if_mpls.c,v 1.21 2016/04/26 09:30:01 ozaki-r Exp $ */
 
 /*
  * Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.20 2016/02/09 08:32:12 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.21 2016/04/26 09:30:01 ozaki-r Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_inet.h"
@@ -473,7 +473,7 @@ mpls_send_frame(struct mbuf *m, struct i
 	case IFT_TUNNEL:
 	case IFT_LOOP:
 #ifdef INET
-		ret = ip_hresolv_output(ifp, m, rt->rt_gateway, rt);
+		ret = ip_if_output(ifp, m, rt->rt_gateway, rt);
 #else
 		KERNEL_LOCK(1, NULL);
 		ret =  (*ifp->if_output)(ifp, m, rt->rt_gateway, rt);

Index: src/sys/net/route.c
diff -u src/sys/net/route.c:1.164 src/sys/net/route.c:1.165
--- src/sys/net/route.c:1.164	Mon Apr 25 14:38:08 2016
+++ src/sys/net/route.c	Tue Apr 26 09:30:01 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: route.c,v 1.164 2016/04/25 14:38:08 ozaki-r Exp $	*/
+/*	$NetBSD: route.c,v 1.165 2016/04/26 09:30:01 ozaki-r Exp $	*/
 
 /*-
  * Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
@@ -96,7 +96,7 @@
 #endif
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: route.c,v 1.164 2016/04/25 14:38:08 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: route.c,v 1.165 2016/04/26 09:30:01 ozaki-r Exp $");
 
 #include <sys/param.h>
 #ifdef RTFLUSH_DEBUG
@@ -1506,6 +1506,24 @@ rt_gettag(struct rtentry *rt)
 	return rt->rt_tag;
 }
 
+int
+rt_check_reject_route(struct rtentry *rt, struct ifnet *ifp)
+{
+
+	if ((rt->rt_flags & RTF_REJECT) != 0) {
+		/* Mimic looutput */
+		if (ifp->if_flags & IFF_LOOPBACK)
+			return (rt->rt_flags & RTF_HOST) ?
+			    EHOSTUNREACH : ENETUNREACH;
+		else if (rt->rt_rmx.rmx_expire == 0 ||
+		    time_uptime < rt->rt_rmx.rmx_expire)
+			return (rt->rt_flags & RTF_GATEWAY) ?
+			    EHOSTUNREACH : EHOSTDOWN;
+	}
+
+	return 0;
+}
+
 #ifdef DDB
 
 #include <machine/db_machdep.h>

Index: src/sys/net/route.h
diff -u src/sys/net/route.h:1.99 src/sys/net/route.h:1.100
--- src/sys/net/route.h:1.99	Mon Apr 11 09:21:18 2016
+++ src/sys/net/route.h	Tue Apr 26 09:30:01 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: route.h,v 1.99 2016/04/11 09:21:18 ozaki-r Exp $	*/
+/*	$NetBSD: route.h,v 1.100 2016/04/26 09:30:01 ozaki-r Exp $	*/
 
 /*
  * Copyright (c) 1980, 1986, 1993
@@ -405,23 +405,7 @@ const struct sockaddr *
 struct sockaddr *
 	rt_gettag(struct rtentry *);
 
-static inline struct rtentry *
-rt_get_gwroute(struct rtentry *rt)
-{
-	if (rt->rt_gwroute == NULL)
-		return NULL;
-	rt->rt_gwroute->rt_refcnt++;
-	return rt->rt_gwroute;
-}
-
-static inline void
-rt_set_gwroute(struct rtentry *rt, struct rtentry *gwrt)
-{
-
-	rt->rt_gwroute = gwrt;
-	if (rt->rt_gwroute != NULL)
-		rt->rt_gwroute->rt_refcnt++;
-}
+int	rt_check_reject_route(struct rtentry *, struct ifnet *);
 
 static inline void
 rt_assert_referenced(const struct rtentry *rt)

Index: src/sys/netinet/in_offload.c
diff -u src/sys/netinet/in_offload.c:1.6 src/sys/netinet/in_offload.c:1.7
--- src/sys/netinet/in_offload.c:1.6	Thu Jun  4 09:20:00 2015
+++ src/sys/netinet/in_offload.c	Tue Apr 26 09:30:01 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: in_offload.c,v 1.6 2015/06/04 09:20:00 ozaki-r Exp $	*/
+/*	$NetBSD: in_offload.c,v 1.7 2016/04/26 09:30:01 ozaki-r Exp $	*/
 
 /*-
  * Copyright (c)2005, 2006 YAMAMOTO Takashi,
@@ -27,7 +27,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: in_offload.c,v 1.6 2015/06/04 09:20:00 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: in_offload.c,v 1.7 2016/04/26 09:30:01 ozaki-r Exp $");
 
 #include <sys/param.h>
 #include <sys/mbuf.h>
@@ -55,7 +55,7 @@ ip_tso_output_callback(void *vp, struct 
 	struct ip_tso_output_args *args = vp;
 	struct ifnet *ifp = args->ifp;
 
-	return ip_hresolv_output(ifp, m, args->sa, args->rt);
+	return ip_if_output(ifp, m, args->sa, args->rt);
 }
 
 int

Index: src/sys/netinet/ip_output.c
diff -u src/sys/netinet/ip_output.c:1.251 src/sys/netinet/ip_output.c:1.252
--- src/sys/netinet/ip_output.c:1.251	Tue Apr 19 09:36:35 2016
+++ src/sys/netinet/ip_output.c	Tue Apr 26 09:30:01 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: ip_output.c,v 1.251 2016/04/19 09:36:35 ozaki-r Exp $	*/
+/*	$NetBSD: ip_output.c,v 1.252 2016/04/26 09:30:01 ozaki-r Exp $	*/
 
 /*
  * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -91,7 +91,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.251 2016/04/19 09:36:35 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.252 2016/04/26 09:30:01 ozaki-r Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_inet.h"
@@ -157,44 +157,6 @@ extern pfil_head_t *inet_pfil_hook;			/*
 
 int	ip_do_loopback_cksum = 0;
 
-static bool
-ip_hresolv_needed(const struct ifnet * const ifp)
-{
-	switch (ifp->if_type) {
-	case IFT_ARCNET:
-	case IFT_ATM:
-	case IFT_ECONET:
-	case IFT_ETHER:
-	case IFT_FDDI:
-	case IFT_HIPPI:
-	case IFT_IEEE1394:
-	case IFT_ISO88025:
-	case IFT_SLIP:
-		return true;
-	default:
-		return false;
-	}
-}
-
-static int
-klock_if_output(struct ifnet * const ifp, struct mbuf * const m,
-    const struct sockaddr * const dst, struct rtentry *rt)
-{
-	int error;
-
-#ifndef NET_MPSAFE
-	KERNEL_LOCK(1, NULL);
-#endif
-
-	error = (*ifp->if_output)(ifp, m, dst, rt);
-
-#ifndef NET_MPSAFE
-	KERNEL_UNLOCK_ONE(NULL);
-#endif
-
-	return error;
-}
-
 static int
 ip_mark_mpls(struct ifnet * const ifp, struct mbuf * const m, struct rtentry *rt)
 {
@@ -228,81 +190,37 @@ ip_mark_mpls(struct ifnet * const ifp, s
 
 /*
  * Send an IP packet to a host.
- *
- * If necessary, resolve the arbitrary IP route, rt0, to an IP host route before
- * calling ifp's output routine.
  */
 int
-ip_hresolv_output(struct ifnet * const ifp, struct mbuf * const m,
-    const struct sockaddr * const dst, struct rtentry *rt0)
+ip_if_output(struct ifnet * const ifp, struct mbuf * const m,
+    const struct sockaddr * const dst, struct rtentry *rt)
 {
 	int error = 0;
-	struct rtentry *rt = rt0, *gwrt;
 
-#define RTFREE_IF_NEEDED(_rt) \
-	if ((_rt) != NULL && (_rt) != rt0) \
-		rtfree((_rt));
-
-	if (!ip_hresolv_needed(ifp))
-		goto out;
-
-	if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) == 0)
-		goto out;
-
-	gwrt = rt_get_gwroute(rt);
-	RTFREE_IF_NEEDED(rt);
-	rt = gwrt;
-	if (rt == NULL || (rt->rt_flags & RTF_UP) == 0) {
-		if (rt != NULL) {
-			RTFREE_IF_NEEDED(rt);
-			rt = rt0;
-		}
-		if (rt == NULL) {
-			error = EHOSTUNREACH;
-			goto bad;
-		}
-		gwrt = rtalloc1(rt->rt_gateway, 1);
-		rt_set_gwroute(rt, gwrt);
-		RTFREE_IF_NEEDED(rt);
-		rt = gwrt;
-		if (rt == NULL) {
-			error = EHOSTUNREACH;
-			goto bad;
-		}
-		/* the "G" test below also prevents rt == rt0 */
-		if ((rt->rt_flags & RTF_GATEWAY) != 0 || rt->rt_ifp != ifp) {
-			if (rt0->rt_gwroute != NULL)
-				rtfree(rt0->rt_gwroute);
-			rt0->rt_gwroute = NULL;
-			error = EHOSTUNREACH;
-			goto bad;
-		}
-	}
-	if ((rt->rt_flags & RTF_REJECT) != 0) {
-		if (rt->rt_rmx.rmx_expire == 0 ||
-		    time_uptime < rt->rt_rmx.rmx_expire) {
-			error = (rt == rt0) ? EHOSTDOWN : EHOSTUNREACH;
-			goto bad;
+	if (rt != NULL) {
+		error = rt_check_reject_route(rt, ifp);
+		if (error != 0) {
+			m_freem(m);
+			return error;
 		}
 	}
 
-out:
-	error = ip_mark_mpls(ifp, m, rt0);
-	if (error != 0)
-		goto bad;
+	error = ip_mark_mpls(ifp, m, rt);
+	if (error != 0) {
+		m_freem(m);
+		return error;
+	}
 
-	error = klock_if_output(ifp, m, dst, rt);
-	goto exit;
+#ifndef NET_MPSAFE
+	KERNEL_LOCK(1, NULL);
+#endif
 
-bad:
-	if (m != NULL)
-		m_freem(m);
-exit:
-	RTFREE_IF_NEEDED(rt);
+	error = (*ifp->if_output)(ifp, m, dst, rt);
 
+#ifndef NET_MPSAFE
+	KERNEL_UNLOCK_ONE(NULL);
+#endif
 	return error;
-
-#undef RTFREE_IF_NEEDED
 }
 
 /*
@@ -715,7 +633,7 @@ sendit:
 		if (__predict_true(
 		    (m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0 ||
 		    (ifp->if_capenable & IFCAP_TSOv4) != 0)) {
-			error = ip_hresolv_output(ifp, m, sa, rt);
+			error = ip_if_output(ifp, m, sa, rt);
 		} else {
 			error = ip_tso_output(ifp, m, sa, rt);
 		}
@@ -783,7 +701,7 @@ sendit:
 		} else {
 			KASSERT((m->m_pkthdr.csum_flags &
 			    (M_CSUM_UDPv4 | M_CSUM_TCPv4)) == 0);
-			error = ip_hresolv_output(ifp, m,
+			error = ip_if_output(ifp, m,
 			    (m->m_flags & M_MCAST) ?
 			    sintocsa(rdst) : sintocsa(dst), rt);
 		}

Index: src/sys/netinet/ip_var.h
diff -u src/sys/netinet/ip_var.h:1.110 src/sys/netinet/ip_var.h:1.111
--- src/sys/netinet/ip_var.h:1.110	Wed Jan 20 22:12:22 2016
+++ src/sys/netinet/ip_var.h	Tue Apr 26 09:30:01 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: ip_var.h,v 1.110 2016/01/20 22:12:22 riastradh Exp $	*/
+/*	$NetBSD: ip_var.h,v 1.111 2016/04/26 09:30:01 ozaki-r Exp $	*/
 
 /*
  * Copyright (c) 1982, 1986, 1993
@@ -238,7 +238,7 @@ int	 rip_usrreq(struct socket *,
 int	ip_setmoptions(struct ip_moptions **, const struct sockopt *sopt);
 int	ip_getmoptions(struct ip_moptions *, struct sockopt *sopt);
 
-int	ip_hresolv_output(struct ifnet * const, struct mbuf * const,
+int	ip_if_output(struct ifnet * const, struct mbuf * const,
 	    const struct sockaddr * const, struct rtentry *);
 
 /* IP Flow interface. */

Index: src/sys/netinet6/nd6.c
diff -u src/sys/netinet6/nd6.c:1.192 src/sys/netinet6/nd6.c:1.193
--- src/sys/netinet6/nd6.c:1.192	Mon Apr 25 14:38:08 2016
+++ src/sys/netinet6/nd6.c	Tue Apr 26 09:30:01 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: nd6.c,v 1.192 2016/04/25 14:38:08 ozaki-r Exp $	*/
+/*	$NetBSD: nd6.c,v 1.193 2016/04/26 09:30:01 ozaki-r Exp $	*/
 /*	$KAME: nd6.c,v 1.279 2002/06/08 11:16:51 itojun Exp $	*/
 
 /*
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nd6.c,v 1.192 2016/04/25 14:38:08 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nd6.c,v 1.193 2016/04/26 09:30:01 ozaki-r Exp $");
 
 #ifdef _KERNEL_OPT
 #include "opt_net_mpsafe.h"
@@ -2113,70 +2113,6 @@ nd6_slowtimo(void *ignored_arg)
 	mutex_exit(softnet_lock);
 }
 
-/*
- * Next hop determination.  This routine was derived from ip_output.c.
- */
-static int
-nd6_determine_nexthop(struct ifnet *ifp, const struct sockaddr_in6 *dst,
-    struct rtentry *rt0, struct rtentry **ret_rt, bool *sendpkt)
-{
-	struct rtentry *rt = rt0;
-	struct rtentry *gwrt = NULL;
-	struct sockaddr_in6 *gw6 = satosin6(rt->rt_gateway);
-
-	/*
-	 * We skip link-layer address resolution and NUD
-	 * if the gateway is not a neighbor from ND point
-	 * of view, regardless of the value of nd_ifinfo.flags.
-	 * The second condition is a bit tricky; we skip
-	 * if the gateway is our own address, which is
-	 * sometimes used to install a route to a p2p link.
-	 */
-	if (!nd6_is_addr_neighbor(gw6, ifp) ||
-	    in6ifa_ifpwithaddr(ifp, &gw6->sin6_addr)) {
-		/*
-		 * We allow this kind of tricky route only
-		 * when the outgoing interface is p2p.
-		 * XXX: we may need a more generic rule here.
-		 */
-		if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
-			goto hostunreach;
-
-		*sendpkt = true;
-		return 0;
-	}
-
-	/* Try to use a cached nexthop route (gwroute) if exists */
-	gwrt = rt_get_gwroute(rt);
-	if (gwrt == NULL || (gwrt->rt_flags & RTF_UP) == 0) {
-		if (gwrt != NULL) {
-			rtfree(gwrt);
-		}
-		/* Look up a nexthop route */
-		gwrt = rtalloc1(rt->rt_gateway, 1);
-		rt_set_gwroute(rt, gwrt);
-		rt = gwrt;
-		if (rt == NULL)
-			goto hostunreach;
-		/* the "G" test below also prevents rt == rt0 */
-		if ((rt->rt_flags & RTF_GATEWAY) ||
-		    (rt->rt_ifp != ifp)) {
-			if (rt0->rt_gwroute != NULL)
-				rtfree(rt0->rt_gwroute);
-			rt0->rt_gwroute = NULL;
-			goto hostunreach;
-		}
-	}
-	*ret_rt = gwrt;
-	return 0;
-
-hostunreach:
-	if (gwrt != NULL)
-		rtfree(gwrt);
-
-	return EHOSTUNREACH;
-}
-
 int
 nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
     const struct sockaddr_in6 *dst, struct rtentry *rt)
@@ -2185,7 +2121,14 @@ nd6_output(struct ifnet *ifp, struct ifn
 	struct llentry *ln = NULL;
 	int error = 0;
 	bool created = false;
-	struct rtentry *nexthop = NULL;
+
+	if (rt != NULL) {
+		error = rt_check_reject_route(rt, ifp);
+		if (error != 0) {
+			m_freem(m);
+			return error;
+		}
+	}
 
 	if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr))
 		goto sendpkt;
@@ -2193,6 +2136,32 @@ nd6_output(struct ifnet *ifp, struct ifn
 	if (nd6_need_cache(ifp) == 0)
 		goto sendpkt;
 
+	if (rt != NULL && (rt->rt_flags & RTF_GATEWAY) != 0) {
+		struct sockaddr_in6 *gw6 = satosin6(rt->rt_gateway);
+
+		/* XXX remain the check to keep the original behavior. */
+		/*
+		 * We skip link-layer address resolution and NUD
+		 * if the gateway is not a neighbor from ND point
+		 * of view, regardless of the value of nd_ifinfo.flags.
+		 * The second condition is a bit tricky; we skip
+		 * if the gateway is our own address, which is
+		 * sometimes used to install a route to a p2p link.
+		 */
+		if (!nd6_is_addr_neighbor(gw6, ifp) ||
+		    in6ifa_ifpwithaddr(ifp, &gw6->sin6_addr)) {
+			/*
+			 * We allow this kind of tricky route only
+			 * when the outgoing interface is p2p.
+			 * XXX: we may need a more generic rule here.
+			 */
+			if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
+				senderr(EHOSTUNREACH);
+
+			goto sendpkt;
+		}
+	}
+
 	/*
 	 * Address resolution or Neighbor Unreachability Detection
 	 * for the next hop.
@@ -2200,19 +2169,6 @@ nd6_output(struct ifnet *ifp, struct ifn
 	 * or an anycast address(i.e. not a multicast).
 	 */
 
-	if (rt != NULL && (rt->rt_flags & RTF_GATEWAY) != 0) {
-		bool sendpkt = false;
-
-		/* Still need a nexthop to reflect RTF_{REJECT,BLACKHOLE} */
-		error = nd6_determine_nexthop(ifp, dst, rt, &nexthop, &sendpkt);
-		if (error != 0)
-			senderr(error);
-		if (nexthop != NULL)
-			rt = nexthop;
-		if (sendpkt)
-			goto sendpkt;
-	}
-
 	/* Look up the neighbor cache for the nexthop */
 	ln = nd6_lookup(&dst->sin6_addr, ifp, true);
 	if ((ln == NULL) && nd6_is_addr_neighbor(dst, ifp))  {
@@ -2346,9 +2302,6 @@ nd6_output(struct ifnet *ifp, struct ifn
 	if (m != NULL)
 		m_freem(m);
   exit:
-	if (nexthop != NULL)
-		rtfree(nexthop);
-
 	if (created)
 		nd6_gc_neighbors(LLTABLE6(ifp));
 

Reply via email to