Module Name: src
Committed By: ozaki-r
Date: Tue Apr 26 09:30:01 UTC 2016
Modified Files:
src/sys/net: if_mpls.c route.c route.h
src/sys/netinet: in_offload.c ip_output.c ip_var.h
src/sys/netinet6: nd6.c
Log Message:
Stop using rt_gwroute on packet sending paths
rt_gwroute of rtentry is a reference to a rtentry of the gateway
for a rtentry with RTF_GATEWAY. That was used by L2 (arp and ndp)
to look up L2 addresses. By separating L2 nexthop caches, we don't
need a route for the purpose and we can stop using rt_gwroute.
By doing so, we can reduce referencing and modifying rtentries,
which makes it easy to apply a lock (and/or psref) to the
routing table and rtentries.
One issue to do this is to keep RTF_REJECT behavior. It seems it
was broken when we moved rtalloc1 things from L2 output routines
(e.g., ether_output) to ip_hresolv_output, but (fortunately?)
it works unexpectedly. What we mistook are:
- RTF_REJECT was checked for any routes in L2 output routines,
but in ip_hresolv_output it is checked only when the route
is RTF_GATEWAY
- The RTF_REJECT check wasn't copied to IPv6 (nd6_output)
It seems that rt_gwroute checks hid the mistakes and it looked
work (unexpectedly) and removing rt_gwroute checks unveil the
issue. So we need to fix RTF_REJECT checks in ip_hresolv_output
and also add them to nd6_output.
One more point we have to care is returning an errno; we need
to mimic looutput behavior. Originally RTF_REJECT check was
done either in L2 output routines or in looutput. The latter is
applied when a reject route directs to a loopback interface.
However, now RTF_REJECT check is done before looutput so to keep
the original behavior we need to return an errno which looutput
chooses. Added rt_check_reject_route does such tweaks.
To generate a diff of this commit:
cvs rdiff -u -r1.20 -r1.21 src/sys/net/if_mpls.c
cvs rdiff -u -r1.164 -r1.165 src/sys/net/route.c
cvs rdiff -u -r1.99 -r1.100 src/sys/net/route.h
cvs rdiff -u -r1.6 -r1.7 src/sys/netinet/in_offload.c
cvs rdiff -u -r1.251 -r1.252 src/sys/netinet/ip_output.c
cvs rdiff -u -r1.110 -r1.111 src/sys/netinet/ip_var.h
cvs rdiff -u -r1.192 -r1.193 src/sys/netinet6/nd6.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/net/if_mpls.c
diff -u src/sys/net/if_mpls.c:1.20 src/sys/net/if_mpls.c:1.21
--- src/sys/net/if_mpls.c:1.20 Tue Feb 9 08:32:12 2016
+++ src/sys/net/if_mpls.c Tue Apr 26 09:30:01 2016
@@ -1,4 +1,4 @@
-/* $NetBSD: if_mpls.c,v 1.20 2016/02/09 08:32:12 ozaki-r Exp $ */
+/* $NetBSD: if_mpls.c,v 1.21 2016/04/26 09:30:01 ozaki-r Exp $ */
/*
* Copyright (c) 2010 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.20 2016/02/09 08:32:12 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: if_mpls.c,v 1.21 2016/04/26 09:30:01 ozaki-r Exp $");
#ifdef _KERNEL_OPT
#include "opt_inet.h"
@@ -473,7 +473,7 @@ mpls_send_frame(struct mbuf *m, struct i
case IFT_TUNNEL:
case IFT_LOOP:
#ifdef INET
- ret = ip_hresolv_output(ifp, m, rt->rt_gateway, rt);
+ ret = ip_if_output(ifp, m, rt->rt_gateway, rt);
#else
KERNEL_LOCK(1, NULL);
ret = (*ifp->if_output)(ifp, m, rt->rt_gateway, rt);
Index: src/sys/net/route.c
diff -u src/sys/net/route.c:1.164 src/sys/net/route.c:1.165
--- src/sys/net/route.c:1.164 Mon Apr 25 14:38:08 2016
+++ src/sys/net/route.c Tue Apr 26 09:30:01 2016
@@ -1,4 +1,4 @@
-/* $NetBSD: route.c,v 1.164 2016/04/25 14:38:08 ozaki-r Exp $ */
+/* $NetBSD: route.c,v 1.165 2016/04/26 09:30:01 ozaki-r Exp $ */
/*-
* Copyright (c) 1998, 2008 The NetBSD Foundation, Inc.
@@ -96,7 +96,7 @@
#endif
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: route.c,v 1.164 2016/04/25 14:38:08 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: route.c,v 1.165 2016/04/26 09:30:01 ozaki-r Exp $");
#include <sys/param.h>
#ifdef RTFLUSH_DEBUG
@@ -1506,6 +1506,24 @@ rt_gettag(struct rtentry *rt)
return rt->rt_tag;
}
+int
+rt_check_reject_route(struct rtentry *rt, struct ifnet *ifp)
+{
+
+ if ((rt->rt_flags & RTF_REJECT) != 0) {
+ /* Mimic looutput */
+ if (ifp->if_flags & IFF_LOOPBACK)
+ return (rt->rt_flags & RTF_HOST) ?
+ EHOSTUNREACH : ENETUNREACH;
+ else if (rt->rt_rmx.rmx_expire == 0 ||
+ time_uptime < rt->rt_rmx.rmx_expire)
+ return (rt->rt_flags & RTF_GATEWAY) ?
+ EHOSTUNREACH : EHOSTDOWN;
+ }
+
+ return 0;
+}
+
#ifdef DDB
#include <machine/db_machdep.h>
Index: src/sys/net/route.h
diff -u src/sys/net/route.h:1.99 src/sys/net/route.h:1.100
--- src/sys/net/route.h:1.99 Mon Apr 11 09:21:18 2016
+++ src/sys/net/route.h Tue Apr 26 09:30:01 2016
@@ -1,4 +1,4 @@
-/* $NetBSD: route.h,v 1.99 2016/04/11 09:21:18 ozaki-r Exp $ */
+/* $NetBSD: route.h,v 1.100 2016/04/26 09:30:01 ozaki-r Exp $ */
/*
* Copyright (c) 1980, 1986, 1993
@@ -405,23 +405,7 @@ const struct sockaddr *
struct sockaddr *
rt_gettag(struct rtentry *);
-static inline struct rtentry *
-rt_get_gwroute(struct rtentry *rt)
-{
- if (rt->rt_gwroute == NULL)
- return NULL;
- rt->rt_gwroute->rt_refcnt++;
- return rt->rt_gwroute;
-}
-
-static inline void
-rt_set_gwroute(struct rtentry *rt, struct rtentry *gwrt)
-{
-
- rt->rt_gwroute = gwrt;
- if (rt->rt_gwroute != NULL)
- rt->rt_gwroute->rt_refcnt++;
-}
+int rt_check_reject_route(struct rtentry *, struct ifnet *);
static inline void
rt_assert_referenced(const struct rtentry *rt)
Index: src/sys/netinet/in_offload.c
diff -u src/sys/netinet/in_offload.c:1.6 src/sys/netinet/in_offload.c:1.7
--- src/sys/netinet/in_offload.c:1.6 Thu Jun 4 09:20:00 2015
+++ src/sys/netinet/in_offload.c Tue Apr 26 09:30:01 2016
@@ -1,4 +1,4 @@
-/* $NetBSD: in_offload.c,v 1.6 2015/06/04 09:20:00 ozaki-r Exp $ */
+/* $NetBSD: in_offload.c,v 1.7 2016/04/26 09:30:01 ozaki-r Exp $ */
/*-
* Copyright (c)2005, 2006 YAMAMOTO Takashi,
@@ -27,7 +27,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: in_offload.c,v 1.6 2015/06/04 09:20:00 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: in_offload.c,v 1.7 2016/04/26 09:30:01 ozaki-r Exp $");
#include <sys/param.h>
#include <sys/mbuf.h>
@@ -55,7 +55,7 @@ ip_tso_output_callback(void *vp, struct
struct ip_tso_output_args *args = vp;
struct ifnet *ifp = args->ifp;
- return ip_hresolv_output(ifp, m, args->sa, args->rt);
+ return ip_if_output(ifp, m, args->sa, args->rt);
}
int
Index: src/sys/netinet/ip_output.c
diff -u src/sys/netinet/ip_output.c:1.251 src/sys/netinet/ip_output.c:1.252
--- src/sys/netinet/ip_output.c:1.251 Tue Apr 19 09:36:35 2016
+++ src/sys/netinet/ip_output.c Tue Apr 26 09:30:01 2016
@@ -1,4 +1,4 @@
-/* $NetBSD: ip_output.c,v 1.251 2016/04/19 09:36:35 ozaki-r Exp $ */
+/* $NetBSD: ip_output.c,v 1.252 2016/04/26 09:30:01 ozaki-r Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -91,7 +91,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.251 2016/04/19 09:36:35 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip_output.c,v 1.252 2016/04/26 09:30:01 ozaki-r Exp $");
#ifdef _KERNEL_OPT
#include "opt_inet.h"
@@ -157,44 +157,6 @@ extern pfil_head_t *inet_pfil_hook; /*
int ip_do_loopback_cksum = 0;
-static bool
-ip_hresolv_needed(const struct ifnet * const ifp)
-{
- switch (ifp->if_type) {
- case IFT_ARCNET:
- case IFT_ATM:
- case IFT_ECONET:
- case IFT_ETHER:
- case IFT_FDDI:
- case IFT_HIPPI:
- case IFT_IEEE1394:
- case IFT_ISO88025:
- case IFT_SLIP:
- return true;
- default:
- return false;
- }
-}
-
-static int
-klock_if_output(struct ifnet * const ifp, struct mbuf * const m,
- const struct sockaddr * const dst, struct rtentry *rt)
-{
- int error;
-
-#ifndef NET_MPSAFE
- KERNEL_LOCK(1, NULL);
-#endif
-
- error = (*ifp->if_output)(ifp, m, dst, rt);
-
-#ifndef NET_MPSAFE
- KERNEL_UNLOCK_ONE(NULL);
-#endif
-
- return error;
-}
-
static int
ip_mark_mpls(struct ifnet * const ifp, struct mbuf * const m, struct rtentry *rt)
{
@@ -228,81 +190,37 @@ ip_mark_mpls(struct ifnet * const ifp, s
/*
* Send an IP packet to a host.
- *
- * If necessary, resolve the arbitrary IP route, rt0, to an IP host route before
- * calling ifp's output routine.
*/
int
-ip_hresolv_output(struct ifnet * const ifp, struct mbuf * const m,
- const struct sockaddr * const dst, struct rtentry *rt0)
+ip_if_output(struct ifnet * const ifp, struct mbuf * const m,
+ const struct sockaddr * const dst, struct rtentry *rt)
{
int error = 0;
- struct rtentry *rt = rt0, *gwrt;
-#define RTFREE_IF_NEEDED(_rt) \
- if ((_rt) != NULL && (_rt) != rt0) \
- rtfree((_rt));
-
- if (!ip_hresolv_needed(ifp))
- goto out;
-
- if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) == 0)
- goto out;
-
- gwrt = rt_get_gwroute(rt);
- RTFREE_IF_NEEDED(rt);
- rt = gwrt;
- if (rt == NULL || (rt->rt_flags & RTF_UP) == 0) {
- if (rt != NULL) {
- RTFREE_IF_NEEDED(rt);
- rt = rt0;
- }
- if (rt == NULL) {
- error = EHOSTUNREACH;
- goto bad;
- }
- gwrt = rtalloc1(rt->rt_gateway, 1);
- rt_set_gwroute(rt, gwrt);
- RTFREE_IF_NEEDED(rt);
- rt = gwrt;
- if (rt == NULL) {
- error = EHOSTUNREACH;
- goto bad;
- }
- /* the "G" test below also prevents rt == rt0 */
- if ((rt->rt_flags & RTF_GATEWAY) != 0 || rt->rt_ifp != ifp) {
- if (rt0->rt_gwroute != NULL)
- rtfree(rt0->rt_gwroute);
- rt0->rt_gwroute = NULL;
- error = EHOSTUNREACH;
- goto bad;
- }
- }
- if ((rt->rt_flags & RTF_REJECT) != 0) {
- if (rt->rt_rmx.rmx_expire == 0 ||
- time_uptime < rt->rt_rmx.rmx_expire) {
- error = (rt == rt0) ? EHOSTDOWN : EHOSTUNREACH;
- goto bad;
+ if (rt != NULL) {
+ error = rt_check_reject_route(rt, ifp);
+ if (error != 0) {
+ m_freem(m);
+ return error;
}
}
-out:
- error = ip_mark_mpls(ifp, m, rt0);
- if (error != 0)
- goto bad;
+ error = ip_mark_mpls(ifp, m, rt);
+ if (error != 0) {
+ m_freem(m);
+ return error;
+ }
- error = klock_if_output(ifp, m, dst, rt);
- goto exit;
+#ifndef NET_MPSAFE
+ KERNEL_LOCK(1, NULL);
+#endif
-bad:
- if (m != NULL)
- m_freem(m);
-exit:
- RTFREE_IF_NEEDED(rt);
+ error = (*ifp->if_output)(ifp, m, dst, rt);
+#ifndef NET_MPSAFE
+ KERNEL_UNLOCK_ONE(NULL);
+#endif
return error;
-
-#undef RTFREE_IF_NEEDED
}
/*
@@ -715,7 +633,7 @@ sendit:
if (__predict_true(
(m->m_pkthdr.csum_flags & M_CSUM_TSOv4) == 0 ||
(ifp->if_capenable & IFCAP_TSOv4) != 0)) {
- error = ip_hresolv_output(ifp, m, sa, rt);
+ error = ip_if_output(ifp, m, sa, rt);
} else {
error = ip_tso_output(ifp, m, sa, rt);
}
@@ -783,7 +701,7 @@ sendit:
} else {
KASSERT((m->m_pkthdr.csum_flags &
(M_CSUM_UDPv4 | M_CSUM_TCPv4)) == 0);
- error = ip_hresolv_output(ifp, m,
+ error = ip_if_output(ifp, m,
(m->m_flags & M_MCAST) ?
sintocsa(rdst) : sintocsa(dst), rt);
}
Index: src/sys/netinet/ip_var.h
diff -u src/sys/netinet/ip_var.h:1.110 src/sys/netinet/ip_var.h:1.111
--- src/sys/netinet/ip_var.h:1.110 Wed Jan 20 22:12:22 2016
+++ src/sys/netinet/ip_var.h Tue Apr 26 09:30:01 2016
@@ -1,4 +1,4 @@
-/* $NetBSD: ip_var.h,v 1.110 2016/01/20 22:12:22 riastradh Exp $ */
+/* $NetBSD: ip_var.h,v 1.111 2016/04/26 09:30:01 ozaki-r Exp $ */
/*
* Copyright (c) 1982, 1986, 1993
@@ -238,7 +238,7 @@ int rip_usrreq(struct socket *,
int ip_setmoptions(struct ip_moptions **, const struct sockopt *sopt);
int ip_getmoptions(struct ip_moptions *, struct sockopt *sopt);
-int ip_hresolv_output(struct ifnet * const, struct mbuf * const,
+int ip_if_output(struct ifnet * const, struct mbuf * const,
const struct sockaddr * const, struct rtentry *);
/* IP Flow interface. */
Index: src/sys/netinet6/nd6.c
diff -u src/sys/netinet6/nd6.c:1.192 src/sys/netinet6/nd6.c:1.193
--- src/sys/netinet6/nd6.c:1.192 Mon Apr 25 14:38:08 2016
+++ src/sys/netinet6/nd6.c Tue Apr 26 09:30:01 2016
@@ -1,4 +1,4 @@
-/* $NetBSD: nd6.c,v 1.192 2016/04/25 14:38:08 ozaki-r Exp $ */
+/* $NetBSD: nd6.c,v 1.193 2016/04/26 09:30:01 ozaki-r Exp $ */
/* $KAME: nd6.c,v 1.279 2002/06/08 11:16:51 itojun Exp $ */
/*
@@ -31,7 +31,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: nd6.c,v 1.192 2016/04/25 14:38:08 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: nd6.c,v 1.193 2016/04/26 09:30:01 ozaki-r Exp $");
#ifdef _KERNEL_OPT
#include "opt_net_mpsafe.h"
@@ -2113,70 +2113,6 @@ nd6_slowtimo(void *ignored_arg)
mutex_exit(softnet_lock);
}
-/*
- * Next hop determination. This routine was derived from ip_output.c.
- */
-static int
-nd6_determine_nexthop(struct ifnet *ifp, const struct sockaddr_in6 *dst,
- struct rtentry *rt0, struct rtentry **ret_rt, bool *sendpkt)
-{
- struct rtentry *rt = rt0;
- struct rtentry *gwrt = NULL;
- struct sockaddr_in6 *gw6 = satosin6(rt->rt_gateway);
-
- /*
- * We skip link-layer address resolution and NUD
- * if the gateway is not a neighbor from ND point
- * of view, regardless of the value of nd_ifinfo.flags.
- * The second condition is a bit tricky; we skip
- * if the gateway is our own address, which is
- * sometimes used to install a route to a p2p link.
- */
- if (!nd6_is_addr_neighbor(gw6, ifp) ||
- in6ifa_ifpwithaddr(ifp, &gw6->sin6_addr)) {
- /*
- * We allow this kind of tricky route only
- * when the outgoing interface is p2p.
- * XXX: we may need a more generic rule here.
- */
- if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
- goto hostunreach;
-
- *sendpkt = true;
- return 0;
- }
-
- /* Try to use a cached nexthop route (gwroute) if exists */
- gwrt = rt_get_gwroute(rt);
- if (gwrt == NULL || (gwrt->rt_flags & RTF_UP) == 0) {
- if (gwrt != NULL) {
- rtfree(gwrt);
- }
- /* Look up a nexthop route */
- gwrt = rtalloc1(rt->rt_gateway, 1);
- rt_set_gwroute(rt, gwrt);
- rt = gwrt;
- if (rt == NULL)
- goto hostunreach;
- /* the "G" test below also prevents rt == rt0 */
- if ((rt->rt_flags & RTF_GATEWAY) ||
- (rt->rt_ifp != ifp)) {
- if (rt0->rt_gwroute != NULL)
- rtfree(rt0->rt_gwroute);
- rt0->rt_gwroute = NULL;
- goto hostunreach;
- }
- }
- *ret_rt = gwrt;
- return 0;
-
-hostunreach:
- if (gwrt != NULL)
- rtfree(gwrt);
-
- return EHOSTUNREACH;
-}
-
int
nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m,
const struct sockaddr_in6 *dst, struct rtentry *rt)
@@ -2185,7 +2121,14 @@ nd6_output(struct ifnet *ifp, struct ifn
struct llentry *ln = NULL;
int error = 0;
bool created = false;
- struct rtentry *nexthop = NULL;
+
+ if (rt != NULL) {
+ error = rt_check_reject_route(rt, ifp);
+ if (error != 0) {
+ m_freem(m);
+ return error;
+ }
+ }
if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr))
goto sendpkt;
@@ -2193,6 +2136,32 @@ nd6_output(struct ifnet *ifp, struct ifn
if (nd6_need_cache(ifp) == 0)
goto sendpkt;
+ if (rt != NULL && (rt->rt_flags & RTF_GATEWAY) != 0) {
+ struct sockaddr_in6 *gw6 = satosin6(rt->rt_gateway);
+
+ /* XXX remain the check to keep the original behavior. */
+ /*
+ * We skip link-layer address resolution and NUD
+ * if the gateway is not a neighbor from ND point
+ * of view, regardless of the value of nd_ifinfo.flags.
+ * The second condition is a bit tricky; we skip
+ * if the gateway is our own address, which is
+ * sometimes used to install a route to a p2p link.
+ */
+ if (!nd6_is_addr_neighbor(gw6, ifp) ||
+ in6ifa_ifpwithaddr(ifp, &gw6->sin6_addr)) {
+ /*
+ * We allow this kind of tricky route only
+ * when the outgoing interface is p2p.
+ * XXX: we may need a more generic rule here.
+ */
+ if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
+ senderr(EHOSTUNREACH);
+
+ goto sendpkt;
+ }
+ }
+
/*
* Address resolution or Neighbor Unreachability Detection
* for the next hop.
@@ -2200,19 +2169,6 @@ nd6_output(struct ifnet *ifp, struct ifn
* or an anycast address(i.e. not a multicast).
*/
- if (rt != NULL && (rt->rt_flags & RTF_GATEWAY) != 0) {
- bool sendpkt = false;
-
- /* Still need a nexthop to reflect RTF_{REJECT,BLACKHOLE} */
- error = nd6_determine_nexthop(ifp, dst, rt, &nexthop, &sendpkt);
- if (error != 0)
- senderr(error);
- if (nexthop != NULL)
- rt = nexthop;
- if (sendpkt)
- goto sendpkt;
- }
-
/* Look up the neighbor cache for the nexthop */
ln = nd6_lookup(&dst->sin6_addr, ifp, true);
if ((ln == NULL) && nd6_is_addr_neighbor(dst, ifp)) {
@@ -2346,9 +2302,6 @@ nd6_output(struct ifnet *ifp, struct ifn
if (m != NULL)
m_freem(m);
exit:
- if (nexthop != NULL)
- rtfree(nexthop);
-
if (created)
nd6_gc_neighbors(LLTABLE6(ifp));