Module Name: src
Committed By: knakahara
Date: Mon Jun 13 08:34:23 UTC 2016
Modified Files:
src/sys/netinet: ip_flow.c
src/sys/netinet6: ip6_flow.c
Log Message:
MP-ify fastforward to support GATEWAY kernel option.
I add "ipflow_lock" mutex in ip_flow.c and "ip6flow_lock" mutex in ip6_flow.c
to protect all data in each file. Of course, this is not MP-scalable. However,
it is sufficient as tentative workaround. We should make it scalable somehow
in the future.
ok by [email protected].
To generate a diff of this commit:
cvs rdiff -u -r1.69 -r1.70 src/sys/netinet/ip_flow.c
cvs rdiff -u -r1.24 -r1.25 src/sys/netinet6/ip6_flow.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/netinet/ip_flow.c
diff -u src/sys/netinet/ip_flow.c:1.69 src/sys/netinet/ip_flow.c:1.70
--- src/sys/netinet/ip_flow.c:1.69 Mon Jun 13 08:29:55 2016
+++ src/sys/netinet/ip_flow.c Mon Jun 13 08:34:23 2016
@@ -1,4 +1,4 @@
-/* $NetBSD: ip_flow.c,v 1.69 2016/06/13 08:29:55 knakahara Exp $ */
+/* $NetBSD: ip_flow.c,v 1.70 2016/06/13 08:34:23 knakahara Exp $ */
/*-
* Copyright (c) 1998 The NetBSD Foundation, Inc.
@@ -30,7 +30,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip_flow.c,v 1.69 2016/06/13 08:29:55 knakahara Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip_flow.c,v 1.70 2016/06/13 08:34:23 knakahara Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -72,6 +72,14 @@ LIST_HEAD(ipflowhead, ipflow);
#define IPFLOW_TIMER (5 * PR_SLOWHZ)
#define IPFLOW_DEFAULT_HASHSIZE (1 << IPFLOW_HASHBITS)
+/*
+ * ip_flow.c internal lock.
+ * If we use softnet_lock, it would cause recursive lock.
+ *
+ * This is a tentative workaround.
+ * We should make it scalable somehow in the future.
+ */
+static kmutex_t ipflow_lock;
static struct ipflowhead *ipflowtable = NULL;
static struct ipflowhead ipflowlist;
static int ipflow_inuse;
@@ -117,6 +125,8 @@ ipflow_lookup(const struct ip *ip)
size_t hash;
struct ipflow *ipf;
+ KASSERT(mutex_owned(&ipflow_lock));
+
hash = ipflow_hash(ip);
LIST_FOREACH(ipf, &ipflowtable[hash], ipf_hash) {
@@ -142,6 +152,8 @@ ipflow_reinit(int table_size)
struct ipflowhead *new_table;
size_t i;
+ KASSERT(mutex_owned(&ipflow_lock));
+
new_table = (struct ipflowhead *)malloc(sizeof(struct ipflowhead) *
table_size, M_RTABLE, M_NOWAIT);
@@ -164,7 +176,12 @@ ipflow_reinit(int table_size)
void
ipflow_init(void)
{
+
+ mutex_init(&ipflow_lock, MUTEX_DEFAULT, IPL_NONE);
+
+ mutex_enter(&ipflow_lock);
(void)ipflow_reinit(ip_hashsize);
+ mutex_exit(&ipflow_lock);
ipflow_sysctl_init(NULL);
}
@@ -180,19 +197,21 @@ ipflow_fastforward(struct mbuf *m)
int iplen;
struct ifnet *ifp;
int s;
+ int ret = 0;
+ mutex_enter(&ipflow_lock);
/*
* Are we forwarding packets? Big enough for an IP packet?
*/
if (!ipforwarding || ipflow_inuse == 0 || m->m_len < sizeof(struct ip))
- return 0;
+ goto out;
/*
* Was packet received as a link-level multicast or broadcast?
* If so, don't try to fast forward..
*/
if ((m->m_flags & (M_BCAST|M_MCAST)) != 0)
- return 0;
+ goto out;
/*
* IP header with no option and valid version and length
@@ -206,12 +225,12 @@ ipflow_fastforward(struct mbuf *m)
iplen = ntohs(ip->ip_len);
if (ip->ip_v != IPVERSION || ip->ip_hl != (sizeof(struct ip) >> 2) ||
iplen < sizeof(struct ip) || iplen > m->m_pkthdr.len)
- return 0;
+ goto out;
/*
* Find a flow.
*/
if ((ipf = ipflow_lookup(ip)) == NULL)
- return 0;
+ goto out;
ifp = m_get_rcvif(m, &s);
/*
@@ -222,7 +241,7 @@ ipflow_fastforward(struct mbuf *m)
M_CSUM_IPv4_BAD)) {
case M_CSUM_IPv4|M_CSUM_IPv4_BAD:
m_put_rcvif(ifp, &s);
- return 0;
+ goto out;
case M_CSUM_IPv4:
/* Checksum was okay. */
@@ -232,7 +251,7 @@ ipflow_fastforward(struct mbuf *m)
/* Must compute it ourselves. */
if (in_cksum(m, sizeof(struct ip)) != 0) {
m_put_rcvif(ifp, &s);
- return 0;
+ goto out;
}
break;
}
@@ -244,13 +263,13 @@ ipflow_fastforward(struct mbuf *m)
if ((rt = rtcache_validate(&ipf->ipf_ro)) == NULL ||
(rt->rt_ifp->if_flags & IFF_UP) == 0 ||
(rt->rt_flags & (RTF_BLACKHOLE | RTF_BROADCAST)) != 0)
- return 0;
+ goto out;
/*
* Packet size OK? TTL?
*/
if (m->m_pkthdr.len > rt->rt_ifp->if_mtu || ip->ip_ttl <= IPTTLDEC)
- return 0;
+ goto out;
/*
* Clear any in-bound checksum flags for this packet.
@@ -312,7 +331,10 @@ ipflow_fastforward(struct mbuf *m)
ipf->ipf_errors++;
}
KERNEL_UNLOCK_ONE(NULL);
- return 1;
+ ret = 1;
+ out:
+ mutex_exit(&ipflow_lock);
+ return ret;
}
static void
@@ -336,6 +358,9 @@ static void
ipflow_free(struct ipflow *ipf)
{
int s;
+
+ KASSERT(mutex_owned(&ipflow_lock));
+
/*
* Remove the flow from the hash table (at elevated IPL).
* Once it's off the list, we can deal with it at normal
@@ -353,6 +378,9 @@ ipflow_free(struct ipflow *ipf)
static struct ipflow *
ipflow_reap(bool just_one)
{
+
+ KASSERT(mutex_owned(&ipflow_lock));
+
while (just_one || ipflow_inuse > ip_maxflows) {
struct ipflow *ipf, *maybe_ipf = NULL;
int s;
@@ -405,6 +433,7 @@ ipflow_slowtimo(void)
uint64_t *ips;
mutex_enter(softnet_lock);
+ mutex_enter(&ipflow_lock);
KERNEL_LOCK(1, NULL);
for (ipf = LIST_FIRST(&ipflowlist); ipf != NULL; ipf = next_ipf) {
next_ipf = LIST_NEXT(ipf, ipf_list);
@@ -423,6 +452,7 @@ ipflow_slowtimo(void)
}
}
KERNEL_UNLOCK_ONE(NULL);
+ mutex_exit(&ipflow_lock);
mutex_exit(softnet_lock);
}
@@ -434,11 +464,15 @@ ipflow_create(const struct route *ro, st
size_t hash;
int s;
+ mutex_enter(&ipflow_lock);
+
/*
* Don't create cache entries for ICMP messages.
*/
- if (ip_maxflows == 0 || ip->ip_p == IPPROTO_ICMP)
+ if (ip_maxflows == 0 || ip->ip_p == IPPROTO_ICMP) {
+ mutex_exit(&ipflow_lock);
return;
+ }
KERNEL_LOCK(1, NULL);
@@ -487,6 +521,7 @@ ipflow_create(const struct route *ro, st
out:
KERNEL_UNLOCK_ONE(NULL);
+ mutex_exit(&ipflow_lock);
}
int
@@ -496,6 +531,9 @@ ipflow_invalidate_all(int new_size)
int s, error;
error = 0;
+
+ mutex_enter(&ipflow_lock);
+
s = splnet();
for (ipf = LIST_FIRST(&ipflowlist); ipf != NULL; ipf = next_ipf) {
next_ipf = LIST_NEXT(ipf, ipf_list);
@@ -506,6 +544,8 @@ ipflow_invalidate_all(int new_size)
error = ipflow_reinit(new_size);
splx(s);
+ mutex_exit(&ipflow_lock);
+
return error;
}
@@ -523,11 +563,13 @@ sysctl_net_inet_ip_maxflows(SYSCTLFN_ARG
return (error);
mutex_enter(softnet_lock);
+ mutex_enter(&ipflow_lock);
KERNEL_LOCK(1, NULL);
ipflow_reap(false);
KERNEL_UNLOCK_ONE(NULL);
+ mutex_exit(&ipflow_lock);
mutex_exit(softnet_lock);
return (0);
Index: src/sys/netinet6/ip6_flow.c
diff -u src/sys/netinet6/ip6_flow.c:1.24 src/sys/netinet6/ip6_flow.c:1.25
--- src/sys/netinet6/ip6_flow.c:1.24 Mon Mar 23 18:33:17 2015
+++ src/sys/netinet6/ip6_flow.c Mon Jun 13 08:34:23 2016
@@ -1,4 +1,4 @@
-/* $NetBSD: ip6_flow.c,v 1.24 2015/03/23 18:33:17 roy Exp $ */
+/* $NetBSD: ip6_flow.c,v 1.25 2016/06/13 08:34:23 knakahara Exp $ */
/*-
* Copyright (c) 2007 The NetBSD Foundation, Inc.
@@ -38,7 +38,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip6_flow.c,v 1.24 2015/03/23 18:33:17 roy Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip6_flow.c,v 1.25 2016/06/13 08:34:23 knakahara Exp $");
#include <sys/param.h>
#include <sys/systm.h>
@@ -88,6 +88,14 @@ LIST_HEAD(ip6flowhead, ip6flow);
#define IP6FLOW_TIMER (5 * PR_SLOWHZ)
#define IP6FLOW_DEFAULT_HASHSIZE (1 << IP6FLOW_HASHBITS)
+/*
+ * ip6_flow.c internal lock.
+ * If we use softnet_lock, it would cause recursive lock.
+ *
+ * This is a tentative workaround.
+ * We should make it scalable somehow in the future.
+ */
+static kmutex_t ip6flow_lock;
static struct ip6flowhead *ip6flowtable = NULL;
static struct ip6flowhead ip6flowlist;
static int ip6flow_inuse;
@@ -149,6 +157,8 @@ ip6flow_lookup(const struct ip6_hdr *ip6
size_t hash;
struct ip6flow *ip6f;
+ KASSERT(mutex_owned(&ip6flow_lock));
+
hash = ip6flow_hash(ip6);
LIST_FOREACH(ip6f, &ip6flowtable[hash], ip6f_hash) {
@@ -177,12 +187,14 @@ ip6flow_poolinit(void)
* If a newly sized table cannot be malloc'ed we just continue
* to use the old one.
*/
-int
-ip6flow_init(int table_size)
+static int
+ip6flow_init_locked(int table_size)
{
struct ip6flowhead *new_table;
size_t i;
+ KASSERT(mutex_owned(&ip6flow_lock));
+
new_table = (struct ip6flowhead *)malloc(sizeof(struct ip6flowhead) *
table_size, M_RTABLE, M_NOWAIT);
@@ -202,6 +214,20 @@ ip6flow_init(int table_size)
return 0;
}
+int
+ip6flow_init(int table_size)
+{
+ int ret;
+
+ mutex_init(&ip6flow_lock, MUTEX_DEFAULT, IPL_NONE);
+
+ mutex_enter(&ip6flow_lock);
+ ret = ip6flow_init_locked(table_size);
+ mutex_exit(&ip6flow_lock);
+
+ return ret;
+}
+
/*
* IPv6 Fast Forward routine. Attempt to forward the packet -
* if any problems are found return to the main IPv6 input
@@ -216,35 +242,38 @@ ip6flow_fastforward(struct mbuf **mp)
struct mbuf *m;
const struct sockaddr *dst;
int error;
+ int ret = 0;
+
+ mutex_enter(&ip6flow_lock);
/*
* Are we forwarding packets and have flows?
*/
if (!ip6_forwarding || ip6flow_inuse == 0)
- return 0;
+ goto out;
m = *mp;
/*
* At least size of IPv6 Header?
*/
if (m->m_len < sizeof(struct ip6_hdr))
- return 0;
+ goto out;
/*
* Was packet received as a link-level multicast or broadcast?
* If so, don't try to fast forward.
*/
if ((m->m_flags & (M_BCAST|M_MCAST)) != 0)
- return 0;
+ goto out;
if (IP6_HDR_ALIGNED_P(mtod(m, const void *)) == 0) {
if ((m = m_copyup(m, sizeof(struct ip6_hdr),
(max_linkhdr + 3) & ~3)) == NULL) {
- return 0;
+ goto out;
}
*mp = m;
} else if (__predict_false(m->m_len < sizeof(struct ip6_hdr))) {
if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
- return 0;
+ goto out;
}
*mp = m;
}
@@ -253,7 +282,7 @@ ip6flow_fastforward(struct mbuf **mp)
if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
/* Bad version. */
- return 0;
+ goto out;
}
/*
@@ -261,14 +290,14 @@ ip6flow_fastforward(struct mbuf **mp)
* We just leave this up to ip6_input to deal with.
*/
if (ip6->ip6_nxt == IPPROTO_HOPOPTS)
- return 0;
+ goto out;
/*
* Attempt to find a flow.
*/
if ((ip6f = ip6flow_lookup(ip6)) == NULL) {
/* No flow found. */
- return 0;
+ goto out;
}
/*
@@ -277,14 +306,14 @@ ip6flow_fastforward(struct mbuf **mp)
if ((rt = rtcache_validate(&ip6f->ip6f_ro)) == NULL ||
(rt->rt_ifp->if_flags & IFF_UP) == 0 ||
(rt->rt_flags & RTF_BLACKHOLE) != 0)
- return 0;
+ goto out;
/*
* Packet size greater than MTU?
*/
if (m->m_pkthdr.len > rt->rt_ifp->if_mtu) {
/* Return to main IPv6 input function. */
- return 0;
+ goto out;
}
/*
@@ -293,7 +322,7 @@ ip6flow_fastforward(struct mbuf **mp)
m->m_pkthdr.csum_flags = 0;
if (ip6->ip6_hlim <= IPV6_HLIMDEC)
- return 0;
+ goto out;
/* Decrement hop limit (same as TTL) */
ip6->ip6_hlim -= IPV6_HLIMDEC;
@@ -315,7 +344,10 @@ ip6flow_fastforward(struct mbuf **mp)
ip6f->ip6f_forwarded++;
}
KERNEL_UNLOCK_ONE(NULL);
- return 1;
+ ret = 1;
+ out:
+ mutex_exit(&ip6flow_lock);
+ return ret;
}
/*
@@ -347,6 +379,8 @@ ip6flow_free(struct ip6flow *ip6f)
{
int s;
+ KASSERT(mutex_owned(&ip6flow_lock));
+
/*
* Remove the flow from the hash table (at elevated IPL).
* Once it's off the list, we can deal with it at normal
@@ -361,13 +395,12 @@ ip6flow_free(struct ip6flow *ip6f)
pool_put(&ip6flow_pool, ip6f);
}
-/*
- * Reap one or more flows - ip6flow_reap may remove
- * multiple flows if net.inet6.ip6.maxflows is reduced.
- */
-struct ip6flow *
-ip6flow_reap(int just_one)
+static struct ip6flow *
+ip6flow_reap_locked(int just_one)
{
+
+ KASSERT(mutex_owned(&ip6flow_lock));
+
while (just_one || ip6flow_inuse > ip6_maxflows) {
struct ip6flow *ip6f, *maybe_ip6f = NULL;
int s;
@@ -414,12 +447,28 @@ ip6flow_reap(int just_one)
return NULL;
}
+/*
+ * Reap one or more flows - ip6flow_reap may remove
+ * multiple flows if net.inet6.ip6.maxflows is reduced.
+ */
+struct ip6flow *
+ip6flow_reap(int just_one)
+{
+ struct ip6flow *ip6f;
+
+ mutex_enter(&ip6flow_lock);
+ ip6f = ip6flow_reap_locked(just_one);
+ mutex_exit(&ip6flow_lock);
+ return ip6f;
+}
+
void
ip6flow_slowtimo(void)
{
struct ip6flow *ip6f, *next_ip6f;
mutex_enter(softnet_lock);
+ mutex_enter(&ip6flow_lock);
KERNEL_LOCK(1, NULL);
for (ip6f = LIST_FIRST(&ip6flowlist); ip6f != NULL; ip6f = next_ip6f) {
@@ -437,6 +486,7 @@ ip6flow_slowtimo(void)
}
KERNEL_UNLOCK_ONE(NULL);
+ mutex_exit(&ip6flow_lock);
mutex_exit(softnet_lock);
}
@@ -452,6 +502,8 @@ ip6flow_create(const struct route *ro, s
size_t hash;
int s;
+ mutex_enter(&ip6flow_lock);
+
ip6 = mtod(m, const struct ip6_hdr *);
/*
@@ -460,8 +512,10 @@ ip6flow_create(const struct route *ro, s
*
* Don't create a flow for ICMPv6 messages.
*/
- if (ip6_maxflows == 0 || ip6->ip6_nxt == IPPROTO_IPV6_ICMP)
+ if (ip6_maxflows == 0 || ip6->ip6_nxt == IPPROTO_IPV6_ICMP) {
+ mutex_exit(&ip6flow_lock);
return;
+ }
KERNEL_LOCK(1, NULL);
@@ -479,7 +533,7 @@ ip6flow_create(const struct route *ro, s
ip6f = ip6flow_lookup(ip6);
if (ip6f == NULL) {
if (ip6flow_inuse >= ip6_maxflows) {
- ip6f = ip6flow_reap(1);
+ ip6f = ip6flow_reap_locked(1);
} else {
ip6f = pool_get(&ip6flow_pool, PR_NOWAIT);
if (ip6f == NULL)
@@ -518,6 +572,7 @@ ip6flow_create(const struct route *ro, s
out:
KERNEL_UNLOCK_ONE(NULL);
+ mutex_exit(&ip6flow_lock);
}
/*
@@ -531,6 +586,9 @@ ip6flow_invalidate_all(int new_size)
int s, error;
error = 0;
+
+ mutex_enter(&ip6flow_lock);
+
s = splnet();
for (ip6f = LIST_FIRST(&ip6flowlist); ip6f != NULL; ip6f = next_ip6f) {
next_ip6f = LIST_NEXT(ip6f, ip6f_list);
@@ -538,8 +596,10 @@ ip6flow_invalidate_all(int new_size)
}
if (new_size)
- error = ip6flow_init(new_size);
+ error = ip6flow_init_locked(new_size);
splx(s);
+ mutex_exit(&ip6flow_lock);
+
return error;
}