Module Name: src
Committed By: knakahara
Date: Wed Dec 7 08:28:46 UTC 2022
Modified Files:
src/sys/netinet: ip_encap.c ip_encap.h
Log Message:
Implement encap_attach_addr() which is used by IP-encaped tunnels.
The tunnels attached by encap_attach() can process receiving packets
fastly as the softc is searched by radix-tree. However, the tunnels
cannot use priority function which decides tunnel's softc by not only
source and destination but also other informations.
On the other hand, the tunnels attached by encap_attach_func() can
use priority function. However, the tunnels can be slow receiving
processing as the softc is searched by linear search (and uses each
priority function).
encap_attach_addr() can be used for tunnels which is fixed tunnel
source address and tunnel destination address. The tunnels attached
by encap_attach_addr() is searched by thmap(9), so the receiving processing
can be fast. Moreover, the tunnels can use priority function.
To generate a diff of this commit:
cvs rdiff -u -r1.75 -r1.76 src/sys/netinet/ip_encap.c
cvs rdiff -u -r1.26 -r1.27 src/sys/netinet/ip_encap.h
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/netinet/ip_encap.c
diff -u src/sys/netinet/ip_encap.c:1.75 src/sys/netinet/ip_encap.c:1.76
--- src/sys/netinet/ip_encap.c:1.75 Wed Dec 7 08:27:03 2022
+++ src/sys/netinet/ip_encap.c Wed Dec 7 08:28:46 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: ip_encap.c,v 1.75 2022/12/07 08:27:03 knakahara Exp $ */
+/* $NetBSD: ip_encap.c,v 1.76 2022/12/07 08:28:46 knakahara Exp $ */
/* $KAME: ip_encap.c,v 1.73 2001/10/02 08:30:58 itojun Exp $ */
/*
@@ -68,7 +68,7 @@
#define USE_RADIX
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.75 2022/12/07 08:27:03 knakahara Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v 1.76 2022/12/07 08:28:46 knakahara Exp $");
#ifdef _KERNEL_OPT
#include "opt_mrouting.h"
@@ -89,6 +89,7 @@ __KERNEL_RCSID(0, "$NetBSD: ip_encap.c,v
#include <sys/condvar.h>
#include <sys/psref.h>
#include <sys/pslist.h>
+#include <sys/thmap.h>
#include <net/if.h>
@@ -134,6 +135,9 @@ static int mask_matchlen(const struct so
static int mask_match(const struct encaptab *, const struct sockaddr *,
const struct sockaddr *);
#endif
+static void encap_key_init(struct encap_key *, const struct sockaddr *,
+ const struct sockaddr *);
+static void encap_key_inc(struct encap_key *);
/*
* In encap[46]_lookup(), ep->func can sleep(e.g. rtalloc1) while walking
@@ -159,6 +163,8 @@ struct radix_node_head *encap_head[2]; /
static bool encap_head_updating = false;
#endif
+static thmap_t *encap_map[2]; /* 0 for AF_INET, 1 for AF_INET6 */
+
static bool encap_initialized = false;
/*
* must be done before other encap interfaces initialization.
@@ -210,6 +216,11 @@ encap_init(void)
sizeof(struct sockaddr_pack) << 3);
#endif
#endif
+
+ encap_map[0] = thmap_create(0, NULL, THMAP_NOCOPY);
+#ifdef INET6
+ encap_map[1] = thmap_create(0, NULL, THMAP_NOCOPY);
+#endif
}
#ifdef INET
@@ -226,6 +237,8 @@ encap4_lookup(struct mbuf *m, int off, i
struct radix_node_head *rnh = encap_rnh(AF_INET);
struct radix_node *rn;
#endif
+ thmap_t *emap = encap_map[0];
+ struct encap_key key;
KASSERT(m->m_len >= sizeof(*ip));
@@ -267,6 +280,51 @@ encap4_lookup(struct mbuf *m, int off, i
mask_matchlen(match->dstmask);
}
#endif
+
+ encap_key_init(&key, sintosa(&pack.mine), sintosa(&pack.yours));
+ while ((ep = thmap_get(emap, &key, sizeof(key))) != NULL) {
+ struct psref elem_psref;
+
+ KASSERT(ep->af == AF_INET);
+
+ if (ep->proto >= 0 && ep->proto != proto) {
+ encap_key_inc(&key);
+ continue;
+ }
+
+ psref_acquire(&elem_psref, &ep->psref,
+ encaptab.elem_class);
+ if (ep->func) {
+ pserialize_read_exit(s);
+ prio = (*ep->func)(m, off, proto, ep->arg);
+ s = pserialize_read_enter();
+ } else {
+ prio = pack.mine.sin_len + pack.yours.sin_len;
+ }
+
+ if (prio <= 0) {
+ psref_release(&elem_psref, &ep->psref,
+ encaptab.elem_class);
+ encap_key_inc(&key);
+ continue;
+ }
+ if (prio > matchprio) {
+ /* release last matched ep */
+ if (match != NULL)
+ psref_release(match_psref, &match->psref,
+ encaptab.elem_class);
+
+ psref_copy(match_psref, &elem_psref,
+ encaptab.elem_class);
+ matchprio = prio;
+ match = ep;
+ }
+
+ psref_release(&elem_psref, &ep->psref,
+ encaptab.elem_class);
+ encap_key_inc(&key);
+ }
+
PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
struct psref elem_psref;
@@ -386,6 +444,8 @@ encap6_lookup(struct mbuf *m, int off, i
struct radix_node_head *rnh = encap_rnh(AF_INET6);
struct radix_node *rn;
#endif
+ thmap_t *emap = encap_map[1];
+ struct encap_key key;
KASSERT(m->m_len >= sizeof(*ip6));
@@ -427,6 +487,50 @@ encap6_lookup(struct mbuf *m, int off, i
mask_matchlen(match->dstmask);
}
#endif
+
+ encap_key_init(&key, sin6tosa(&pack.mine), sin6tosa(&pack.yours));
+ while ((ep = thmap_get(emap, &key, sizeof(key))) != NULL) {
+ struct psref elem_psref;
+
+ KASSERT(ep->af == AF_INET6);
+
+ if (ep->proto >= 0 && ep->proto != proto) {
+ encap_key_inc(&key);
+ continue;
+ }
+
+ psref_acquire(&elem_psref, &ep->psref,
+ encaptab.elem_class);
+ if (ep->func) {
+ pserialize_read_exit(s);
+ prio = (*ep->func)(m, off, proto, ep->arg);
+ s = pserialize_read_enter();
+ } else {
+ prio = pack.mine.sin6_len + pack.yours.sin6_len;
+ }
+
+ if (prio <= 0) {
+ psref_release(&elem_psref, &ep->psref,
+ encaptab.elem_class);
+ encap_key_inc(&key);
+ continue;
+ }
+ if (prio > matchprio) {
+ /* release last matched ep */
+ if (match != NULL)
+ psref_release(match_psref, &match->psref,
+ encaptab.elem_class);
+
+ psref_copy(match_psref, &elem_psref,
+ encaptab.elem_class);
+ matchprio = prio;
+ match = ep;
+ }
+ psref_release(&elem_psref, &ep->psref,
+ encaptab.elem_class);
+ encap_key_inc(&key);
+ }
+
PSLIST_READER_FOREACH(ep, &encap_table, struct encaptab, chain) {
struct psref elem_psref;
@@ -799,6 +903,111 @@ gc:
return NULL;
}
+static void
+encap_key_init(struct encap_key *key,
+ const struct sockaddr *local, const struct sockaddr *remote)
+{
+
+ memset(key, 0, sizeof(*key));
+
+ sockaddr_copy(&key->local_sa, sizeof(key->local_u), local);
+ sockaddr_copy(&key->remote_sa, sizeof(key->remote_u), remote);
+}
+
+static void
+encap_key_inc(struct encap_key *key)
+{
+
+ (key->seq)++;
+}
+
+static void
+encap_key_dec(struct encap_key *key)
+{
+
+ (key->seq)--;
+}
+
+static void
+encap_key_copy(struct encap_key *dst, const struct encap_key *src)
+{
+
+ memset(dst, 0, sizeof(*dst));
+ *dst = *src;
+}
+
+/*
+ * src is always my side, and dst is always remote side.
+ * Return value will be necessary as input (cookie) for encap_detach().
+ */
+const struct encaptab *
+encap_attach_addr(int af, int proto,
+ const struct sockaddr *src, const struct sockaddr *dst,
+ encap_priofunc_t *func,
+ const struct encapsw *esw, void *arg)
+{
+ struct encaptab *ep;
+ size_t l;
+ thmap_t *emap;
+ void *retep;
+ struct ip_pack4 *pack4;
+#ifdef INET6
+ struct ip_pack6 *pack6;
+#endif
+
+ ASSERT_SLEEPABLE();
+
+ encap_afcheck(af, src, dst);
+
+ switch (af) {
+ case AF_INET:
+ l = sizeof(*pack4);
+ emap = encap_map[0];
+ break;
+#ifdef INET6
+ case AF_INET6:
+ l = sizeof(*pack6);
+ emap = encap_map[1];
+ break;
+#endif
+ default:
+ return NULL;
+ }
+
+ ep = kmem_zalloc(sizeof(*ep), KM_SLEEP);
+ ep->addrpack = kmem_zalloc(l, KM_SLEEP);
+ ep->addrpack->sa_len = l & 0xff;
+ ep->af = af;
+ ep->proto = proto;
+ ep->flag = IP_ENCAP_ADDR_ENABLE;
+ switch (af) {
+ case AF_INET:
+ pack4 = (struct ip_pack4 *)ep->addrpack;
+ ep->src = (struct sockaddr *)&pack4->mine;
+ ep->dst = (struct sockaddr *)&pack4->yours;
+ break;
+#ifdef INET6
+ case AF_INET6:
+ pack6 = (struct ip_pack6 *)ep->addrpack;
+ ep->src = (struct sockaddr *)&pack6->mine;
+ ep->dst = (struct sockaddr *)&pack6->yours;
+ break;
+#endif
+ }
+ memcpy(ep->src, src, src->sa_len);
+ memcpy(ep->dst, dst, dst->sa_len);
+ ep->esw = esw;
+ ep->arg = arg;
+ ep->func = func;
+ psref_target_init(&ep->psref, encaptab.elem_class);
+
+ encap_key_init(&ep->key, src, dst);
+ while ((retep = thmap_put(emap, &ep->key, sizeof(ep->key), ep)) != ep)
+ encap_key_inc(&ep->key);
+ return ep;
+}
+
+
/* XXX encap4_ctlinput() is necessary if we set DF=1 on outer IPv4 header */
#ifdef INET6
@@ -900,6 +1109,62 @@ encap6_ctlinput(int cmd, const struct so
}
#endif
+static int
+encap_detach_addr(const struct encaptab *ep)
+{
+ thmap_t *emap;
+ struct encaptab *retep;
+ struct encaptab *target;
+ void *thgc;
+ struct encap_key key;
+
+ KASSERT(encap_lock_held());
+ KASSERT(ep->flag & IP_ENCAP_ADDR_ENABLE);
+
+ switch (ep->af) {
+ case AF_INET:
+ emap = encap_map[0];
+ break;
+#ifdef INET6
+ case AF_INET6:
+ emap = encap_map[1];
+ break;
+#endif
+ default:
+ return EINVAL;
+ }
+
+ retep = thmap_del(emap, &ep->key, sizeof(ep->key));
+ if (retep != ep) {
+ return ENOENT;
+ }
+ target = retep;
+
+ /*
+ * To keep continuity, decrement seq after detached encaptab.
+ */
+ encap_key_copy(&key, &ep->key);
+ encap_key_inc(&key);
+ while ((retep = thmap_del(emap, &key, sizeof(key))) != NULL) {
+ void *pp;
+
+ encap_key_dec(&retep->key);
+ pp = thmap_put(emap, &retep->key, sizeof(retep->key), retep);
+ KASSERT(retep == pp);
+
+ encap_key_inc(&key);
+ }
+
+ thgc = thmap_stage_gc(emap);
+ pserialize_perform(encaptab.psz);
+ thmap_gc(emap, thgc);
+ psref_target_destroy(&target->psref, encaptab.elem_class);
+ kmem_free(target->addrpack, target->addrpack->sa_len);
+ kmem_free(target, sizeof(*target));
+
+ return 0;
+}
+
int
encap_detach(const struct encaptab *cookie)
{
@@ -909,6 +1174,9 @@ encap_detach(const struct encaptab *cook
KASSERT(encap_lock_held());
+ if (ep->flag & IP_ENCAP_ADDR_ENABLE)
+ return encap_detach_addr(ep);
+
PSLIST_WRITER_FOREACH(p, &encap_table, struct encaptab, chain) {
if (p == ep) {
error = encap_remove(p);
Index: src/sys/netinet/ip_encap.h
diff -u src/sys/netinet/ip_encap.h:1.26 src/sys/netinet/ip_encap.h:1.27
--- src/sys/netinet/ip_encap.h:1.26 Wed Dec 7 08:27:03 2022
+++ src/sys/netinet/ip_encap.h Wed Dec 7 08:28:46 2022
@@ -1,4 +1,4 @@
-/* $NetBSD: ip_encap.h,v 1.26 2022/12/07 08:27:03 knakahara Exp $ */
+/* $NetBSD: ip_encap.h,v 1.27 2022/12/07 08:28:46 knakahara Exp $ */
/* $KAME: ip_encap.h,v 1.7 2000/03/25 07:23:37 sumikawa Exp $ */
/*
@@ -64,6 +64,28 @@ struct encapsw {
typedef int encap_priofunc_t(struct mbuf *, int, int, void *);
+struct encap_key {
+ union {
+ struct sockaddr local_u_sa;
+ struct sockaddr_in local_u_sin;
+ struct sockaddr_in6 local_u_sin6;
+ } local_u;
+#define local_sa local_u.local_u_sa
+#define local_sin local_u.local_u_sin
+#define local_sin6 local_u.local_u_sin6
+
+ union {
+ struct sockaddr remote_u_sa;
+ struct sockaddr_in remote_u_sin;
+ struct sockaddr_in6 remote_u_sin6;
+ } remote_u;
+#define remote_sa remote_u.remote_u_sa
+#define remote_sin remote_u.remote_u_sin
+#define remote_sin6 remote_u.remote_u_sin6
+
+ u_int seq;
+};
+
struct encaptab {
struct radix_node nodes[2];
struct pslist_entry chain;
@@ -78,9 +100,13 @@ struct encaptab {
encap_priofunc_t *func;
const struct encapsw *esw;
void *arg;
+ struct encap_key key;
+ u_int flag;
struct psref_target psref;
};
+#define IP_ENCAP_ADDR_ENABLE __BIT(0)
+
/* to lookup a pair of address using radix tree */
struct sockaddr_pack {
u_int8_t sp_len;
@@ -110,6 +136,9 @@ const struct encaptab *encap_attach(int,
const struct encaptab *encap_attach_func(int, int,
encap_priofunc_t *,
const struct encapsw *, void *);
+const struct encaptab *encap_attach_addr(int, int,
+ const struct sockaddr *, const struct sockaddr *,
+ encap_priofunc_t *, const struct encapsw *, void *);
void *encap6_ctlinput(int, const struct sockaddr *, void *);
int encap_detach(const struct encaptab *);