Module Name: src
Committed By: knakahara
Date: Wed Jan 10 10:56:31 UTC 2018
Modified Files:
src/distrib/sets/lists/comp: mi
src/sys/conf: files
src/sys/net: Makefile files.net if_types.h
src/sys/netinet: in.c in.h ip_var.h
src/sys/netinet6: in6.c in6.h ip6_var.h
src/sys/netipsec: Makefile files.netipsec ipsec.h key.c key.h
Added Files:
src/sys/net: if_ipsec.c if_ipsec.h
src/sys/netipsec: ipsecif.c ipsecif.h
Log Message:
add ipsec(4) interface, which is used for route-based VPN.
man and ATF are added later, please see man for details.
reviewed by [email protected], [email protected] and [email protected], thanks.
https://mail-index.netbsd.org/tech-net/2017/12/18/msg006557.html
To generate a diff of this commit:
cvs rdiff -u -r1.2167 -r1.2168 src/distrib/sets/lists/comp/mi
cvs rdiff -u -r1.1190 -r1.1191 src/sys/conf/files
cvs rdiff -u -r1.33 -r1.34 src/sys/net/Makefile
cvs rdiff -u -r1.13 -r1.14 src/sys/net/files.net
cvs rdiff -u -r0 -r1.1 src/sys/net/if_ipsec.c src/sys/net/if_ipsec.h
cvs rdiff -u -r1.27 -r1.28 src/sys/net/if_types.h
cvs rdiff -u -r1.213 -r1.214 src/sys/netinet/in.c
cvs rdiff -u -r1.102 -r1.103 src/sys/netinet/in.h
cvs rdiff -u -r1.121 -r1.122 src/sys/netinet/ip_var.h
cvs rdiff -u -r1.256 -r1.257 src/sys/netinet6/in6.c
cvs rdiff -u -r1.87 -r1.88 src/sys/netinet6/in6.h
cvs rdiff -u -r1.74 -r1.75 src/sys/netinet6/ip6_var.h
cvs rdiff -u -r1.5 -r1.6 src/sys/netipsec/Makefile
cvs rdiff -u -r1.12 -r1.13 src/sys/netipsec/files.netipsec
cvs rdiff -u -r1.61 -r1.62 src/sys/netipsec/ipsec.h
cvs rdiff -u -r0 -r1.1 src/sys/netipsec/ipsecif.c src/sys/netipsec/ipsecif.h
cvs rdiff -u -r1.246 -r1.247 src/sys/netipsec/key.c
cvs rdiff -u -r1.33 -r1.34 src/sys/netipsec/key.h
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/distrib/sets/lists/comp/mi
diff -u src/distrib/sets/lists/comp/mi:1.2167 src/distrib/sets/lists/comp/mi:1.2168
--- src/distrib/sets/lists/comp/mi:1.2167 Tue Jan 9 03:31:13 2018
+++ src/distrib/sets/lists/comp/mi Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-# $NetBSD: mi,v 1.2167 2018/01/09 03:31:13 christos Exp $
+# $NetBSD: mi,v 1.2168 2018/01/10 10:56:30 knakahara Exp $
#
# Note: don't delete entries from here - mark them as "obsolete" instead.
./etc/mtree/set.comp comp-sys-root
@@ -2224,6 +2224,7 @@
./usr/include/net/if_hippi.h comp-c-include
./usr/include/net/if_ieee1394.h comp-c-include
./usr/include/net/if_ieee80211.h comp-obsolete obsolete
+./usr/include/net/if_ipsec.h comp-c-include
./usr/include/net/if_l2tp.h comp-c-include
./usr/include/net/if_llc.h comp-c-include
./usr/include/net/if_media.h comp-c-include
@@ -2382,6 +2383,7 @@
./usr/include/netipsec/ipcomp_var.h comp-c-include
./usr/include/netipsec/ipip_var.h comp-c-include
./usr/include/netipsec/ipsec.h comp-c-include
+./usr/include/netipsec/ipsecif.h comp-c-include
./usr/include/netipsec/ipsec_var.h comp-c-include
./usr/include/netipsec/keydb.h comp-obsolete obsolete
./usr/include/netipsec/keysock.h comp-c-include
Index: src/sys/conf/files
diff -u src/sys/conf/files:1.1190 src/sys/conf/files:1.1191
--- src/sys/conf/files:1.1190 Tue Jan 9 03:31:12 2018
+++ src/sys/conf/files Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-# $NetBSD: files,v 1.1190 2018/01/09 03:31:12 christos Exp $
+# $NetBSD: files,v 1.1191 2018/01/10 10:56:30 knakahara Exp $
# @(#)files.newconf 7.5 (Berkeley) 5/10/93
version 20171118
@@ -1463,6 +1463,7 @@ defpseudo carp: ifnet, ether, arp
defpseudodev etherip: ifnet, ether, arp
defpseudodev l2tp: ifnet, ether, arp
defpseudo canloop: ifnet
+defpseudo ipsecif: ifnet # avoid to confuse ipsec itself option
defpseudo sequencer
defpseudo clockctl
Index: src/sys/net/Makefile
diff -u src/sys/net/Makefile:1.33 src/sys/net/Makefile:1.34
--- src/sys/net/Makefile:1.33 Thu Feb 16 08:12:44 2017
+++ src/sys/net/Makefile Wed Jan 10 10:56:30 2018
@@ -1,10 +1,10 @@
-# $NetBSD: Makefile,v 1.33 2017/02/16 08:12:44 knakahara Exp $
+# $NetBSD: Makefile,v 1.34 2018/01/10 10:56:30 knakahara Exp $
INCSDIR= /usr/include/net
INCS= bpf.h bpfjit.h bpfdesc.h dlt.h ethertypes.h if.h if_arc.h if_arp.h \
if_atm.h if_bridgevar.h if_dl.h if_ether.h if_etherip.h if_fddi.h if_gif.h \
- if_gre.h if_hippi.h if_ieee1394.h if_llc.h if_media.h if_mpls.h \
+ if_gre.h if_hippi.h if_ieee1394.h if_ipsec.h if_llc.h if_media.h if_mpls.h \
if_pflog.h if_ppp.h if_pppoe.h if_l2tp.h if_sppp.h if_srt.h if_stf.h \
if_tap.h if_token.h if_tun.h if_types.h if_vlanvar.h net_stats.h \
netisr.h pfil.h pfkeyv2.h pfvar.h ppp-comp.h ppp_defs.h radix.h \
Index: src/sys/net/files.net
diff -u src/sys/net/files.net:1.13 src/sys/net/files.net:1.14
--- src/sys/net/files.net:1.13 Thu Feb 16 08:12:44 2017
+++ src/sys/net/files.net Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-# $NetBSD: files.net,v 1.13 2017/02/16 08:12:44 knakahara Exp $
+# $NetBSD: files.net,v 1.14 2018/01/10 10:56:30 knakahara Exp $
# XXX CLEANUP
define net
@@ -22,6 +22,7 @@ file net/if_gif.c gif needs-flag
file net/if_gre.c gre needs-flag
file net/if_hippisubr.c hippi needs-flag
file net/if_ieee1394subr.c ieee1394
+file net/if_ipsec.c ipsec & ipsecif
file net/if_llatbl.c inet | inet6
file net/if_l2tp.c l2tp needs-flag
file net/if_loop.c loop
Index: src/sys/net/if_types.h
diff -u src/sys/net/if_types.h:1.27 src/sys/net/if_types.h:1.28
--- src/sys/net/if_types.h:1.27 Thu Feb 16 08:12:44 2017
+++ src/sys/net/if_types.h Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: if_types.h,v 1.27 2017/02/16 08:12:44 knakahara Exp $ */
+/* $NetBSD: if_types.h,v 1.28 2018/01/10 10:56:30 knakahara Exp $ */
/*
* Copyright (c) 1989, 1993, 1994
@@ -265,5 +265,6 @@
#define IFT_PFSYNC 0xf6 /* Packet filter state syncing */
#define IFT_L2TP 0xf7 /* L2TPv3 I/F */
#define IFT_CARP 0xf8 /* Common Address Redundancy Protocol */
+#define IFT_IPSEC 0xf9 /* IPsec I/F */
#endif /* !_NET_IF_TYPES_H_ */
Index: src/sys/netinet/in.c
diff -u src/sys/netinet/in.c:1.213 src/sys/netinet/in.c:1.214
--- src/sys/netinet/in.c:1.213 Wed Dec 27 08:35:20 2017
+++ src/sys/netinet/in.c Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: in.c,v 1.213 2017/12/27 08:35:20 ozaki-r Exp $ */
+/* $NetBSD: in.c,v 1.214 2018/01/10 10:56:30 knakahara Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
@@ -91,7 +91,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: in.c,v 1.213 2017/12/27 08:35:20 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: in.c,v 1.214 2018/01/10 10:56:30 knakahara Exp $");
#include "arp.h"
@@ -1877,6 +1877,44 @@ out:
return ia;
}
+int
+in_tunnel_validate(const struct ip *ip, struct in_addr src, struct in_addr dst)
+{
+ struct in_ifaddr *ia4;
+ int s;
+
+ /* check for address match */
+ if (src.s_addr != ip->ip_dst.s_addr ||
+ dst.s_addr != ip->ip_src.s_addr)
+ return 0;
+
+ /* martian filters on outer source - NOT done in ip_input! */
+ if (IN_MULTICAST(ip->ip_src.s_addr))
+ return 0;
+ switch ((ntohl(ip->ip_src.s_addr) & 0xff000000) >> 24) {
+ case 0:
+ case 127:
+ case 255:
+ return 0;
+ }
+ /* reject packets with broadcast on source */
+ s = pserialize_read_enter();
+ IN_ADDRLIST_READER_FOREACH(ia4) {
+ if ((ia4->ia_ifa.ifa_ifp->if_flags & IFF_BROADCAST) == 0)
+ continue;
+ if (ip->ip_src.s_addr == ia4->ia_broadaddr.sin_addr.s_addr) {
+ pserialize_read_exit(s);
+ return 0;
+ }
+ }
+ pserialize_read_exit(s);
+
+ /* NOTE: packet may dropped by uRPF */
+
+ /* return valid bytes length */
+ return sizeof(src) + sizeof(dst);
+}
+
#if NARP > 0
struct in_llentry {
Index: src/sys/netinet/in.h
diff -u src/sys/netinet/in.h:1.102 src/sys/netinet/in.h:1.103
--- src/sys/netinet/in.h:1.102 Mon Jan 1 00:51:36 2018
+++ src/sys/netinet/in.h Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: in.h,v 1.102 2018/01/01 00:51:36 christos Exp $ */
+/* $NetBSD: in.h,v 1.103 2018/01/10 10:56:30 knakahara Exp $ */
/*
* Copyright (c) 1982, 1986, 1990, 1993
@@ -587,6 +587,9 @@ struct ip_moptions;
struct in_ifaddr *in_selectsrc(struct sockaddr_in *,
struct route *, int, struct ip_moptions *, int *, struct psref *);
+struct ip;
+int in_tunnel_validate(const struct ip *, struct in_addr, struct in_addr);
+
#define in_hosteq(s,t) ((s).s_addr == (t).s_addr)
#define in_nullhost(x) ((x).s_addr == INADDR_ANY)
Index: src/sys/netinet/ip_var.h
diff -u src/sys/netinet/ip_var.h:1.121 src/sys/netinet/ip_var.h:1.122
--- src/sys/netinet/ip_var.h:1.121 Mon Dec 11 05:47:18 2017
+++ src/sys/netinet/ip_var.h Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: ip_var.h,v 1.121 2017/12/11 05:47:18 ryo Exp $ */
+/* $NetBSD: ip_var.h,v 1.122 2018/01/10 10:56:30 knakahara Exp $ */
/*
* Copyright (c) 1982, 1986, 1993
@@ -164,8 +164,9 @@ struct ip_pktopts {
#define IP_STAT_NOGIF 28 /* no match gif found */
#define IP_STAT_BADADDR 29 /* invalid address on header */
#define IP_STAT_NOL2TP 30 /* no match l2tp found */
+#define IP_STAT_NOIPSEC 31 /* no match ipsec(4) found */
-#define IP_NSTATS 31
+#define IP_NSTATS 32
#ifdef _KERNEL
Index: src/sys/netinet6/in6.c
diff -u src/sys/netinet6/in6.c:1.256 src/sys/netinet6/in6.c:1.257
--- src/sys/netinet6/in6.c:1.256 Mon Dec 25 04:41:49 2017
+++ src/sys/netinet6/in6.c Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: in6.c,v 1.256 2017/12/25 04:41:49 ozaki-r Exp $ */
+/* $NetBSD: in6.c,v 1.257 2018/01/10 10:56:30 knakahara Exp $ */
/* $KAME: in6.c,v 1.198 2001/07/18 09:12:38 itojun Exp $ */
/*
@@ -62,7 +62,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: in6.c,v 1.256 2017/12/25 04:41:49 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: in6.c,v 1.257 2018/01/10 10:56:30 knakahara Exp $");
#ifdef _KERNEL_OPT
#include "opt_inet.h"
@@ -2324,6 +2324,24 @@ in6_setmaxmtu(void)
in6_maxmtu = maxmtu;
}
+int
+in6_tunnel_validate(const struct ip6_hdr *ip6, const struct in6_addr *src,
+ const struct in6_addr *dst)
+{
+
+ /* check for address match */
+ if (!IN6_ARE_ADDR_EQUAL(src, &ip6->ip6_dst) ||
+ !IN6_ARE_ADDR_EQUAL(dst, &ip6->ip6_src))
+ return 0;
+
+ /* martian filters on outer source - done in ip6_input */
+
+ /* NOTE: the pakcet may be dropped by uRPF. */
+
+ /* return valid bytes length */
+ return sizeof(*src) + sizeof(*dst);
+}
+
/*
* Provide the length of interface identifiers to be used for the link attached
* to the given interface. The length should be defined in "IPv6 over
Index: src/sys/netinet6/in6.h
diff -u src/sys/netinet6/in6.h:1.87 src/sys/netinet6/in6.h:1.88
--- src/sys/netinet6/in6.h:1.87 Mon Feb 15 14:59:03 2016
+++ src/sys/netinet6/in6.h Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: in6.h,v 1.87 2016/02/15 14:59:03 rtr Exp $ */
+/* $NetBSD: in6.h,v 1.88 2018/01/10 10:56:30 knakahara Exp $ */
/* $KAME: in6.h,v 1.83 2001/03/29 02:55:07 jinmei Exp $ */
/*
@@ -712,6 +712,10 @@ extern void in6_if_down(struct ifnet *);
extern void addrsel_policy_init(void);
extern u_char ip6_protox[];
+struct ip6_hdr;
+int in6_tunnel_validate(const struct ip6_hdr *, const struct in6_addr *,
+ const struct in6_addr *);
+
#define satosin6(sa) ((struct sockaddr_in6 *)(sa))
#define satocsin6(sa) ((const struct sockaddr_in6 *)(sa))
#define sin6tosa(sin6) ((struct sockaddr *)(sin6))
Index: src/sys/netinet6/ip6_var.h
diff -u src/sys/netinet6/ip6_var.h:1.74 src/sys/netinet6/ip6_var.h:1.75
--- src/sys/netinet6/ip6_var.h:1.74 Fri Mar 3 07:13:06 2017
+++ src/sys/netinet6/ip6_var.h Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: ip6_var.h,v 1.74 2017/03/03 07:13:06 ozaki-r Exp $ */
+/* $NetBSD: ip6_var.h,v 1.75 2018/01/10 10:56:30 knakahara Exp $ */
/* $KAME: ip6_var.h,v 1.33 2000/06/11 14:59:20 jinmei Exp $ */
/*
@@ -226,8 +226,9 @@ struct ip6_pktopts {
#define IP6_STAT_FORWARD_CACHEMISS 399
#define IP6_STAT_FASTFORWARD 400 /* packets fast forwarded */
#define IP6_STAT_FASTFORWARDFLOWS 401 /* number of fast forward flows */
+#define IP6_STAT_NOIPSEC 402 /* no match ipsec(4) found */
-#define IP6_NSTATS 402
+#define IP6_NSTATS 403
#define IP6FLOW_HASHBITS 6 /* should not be a multiple of 8 */
Index: src/sys/netipsec/Makefile
diff -u src/sys/netipsec/Makefile:1.5 src/sys/netipsec/Makefile:1.6
--- src/sys/netipsec/Makefile:1.5 Fri Jan 6 14:17:11 2012
+++ src/sys/netipsec/Makefile Wed Jan 10 10:56:30 2018
@@ -1,9 +1,10 @@
-# $NetBSD: Makefile,v 1.5 2012/01/06 14:17:11 drochner Exp $
+# $NetBSD: Makefile,v 1.6 2018/01/10 10:56:30 knakahara Exp $
INCSDIR= /usr/include/netipsec
INCS= ah_var.h esp_var.h ipcomp_var.h ipip_var.h ipsec_var.h \
keysock.h
INCS+= ipsec.h
+INCS+= ipsecif.h
.include <bsd.kinc.mk>
Index: src/sys/netipsec/files.netipsec
diff -u src/sys/netipsec/files.netipsec:1.12 src/sys/netipsec/files.netipsec:1.13
--- src/sys/netipsec/files.netipsec:1.12 Wed Jun 5 19:01:26 2013
+++ src/sys/netipsec/files.netipsec Wed Jan 10 10:56:30 2018
@@ -1,8 +1,9 @@
-# $Id: files.netipsec,v 1.12 2013/06/05 19:01:26 christos Exp $
+# $Id: files.netipsec,v 1.13 2018/01/10 10:56:30 knakahara Exp $
#
#
defflag opt_ipsec.h IPSEC: opencrypto
defflag opt_ipsec.h IPSEC_DEBUG
+defflag opt_ipsec.h IPSEC_TX_TOS_CLEAR
file netipsec/ipsec.c ipsec needs-flag
file netipsec/ipsec_input.c ipsec
@@ -19,3 +20,5 @@ file netipsec/key_debug.c ipsec
file netipsec/keysock.c ipsec
file netipsec/xform_tcp.c ipsec & tcp_signature
+
+file netipsec/ipsecif.c ipsec & ipsecif
Index: src/sys/netipsec/ipsec.h
diff -u src/sys/netipsec/ipsec.h:1.61 src/sys/netipsec/ipsec.h:1.62
--- src/sys/netipsec/ipsec.h:1.61 Tue Oct 3 08:56:52 2017
+++ src/sys/netipsec/ipsec.h Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: ipsec.h,v 1.61 2017/10/03 08:56:52 ozaki-r Exp $ */
+/* $NetBSD: ipsec.h,v 1.62 2018/01/10 10:56:30 knakahara Exp $ */
/* $FreeBSD: /usr/local/www/cvsroot/FreeBSD/src/sys/netipsec/ipsec.h,v 1.2.4.2 2004/02/14 22:23:23 bms Exp $ */
/* $KAME: ipsec.h,v 1.53 2001/11/20 08:32:38 itojun Exp $ */
@@ -84,6 +84,10 @@ struct secpolicy {
#define IPSEC_SPSTATE_DEAD 0
#define IPSEC_SPSTATE_ALIVE 1
+ u_int origin; /* who generate this SP. */
+#define IPSEC_SPORIGIN_USER 0
+#define IPSEC_SPORIGIN_KERNEL 1
+
u_int policy; /* DISCARD, NONE or IPSEC, see keyv2.h */
struct ipsecrequest *req;
/* pointer to the ipsec request tree, */
Index: src/sys/netipsec/key.c
diff -u src/sys/netipsec/key.c:1.246 src/sys/netipsec/key.c:1.247
--- src/sys/netipsec/key.c:1.246 Fri Dec 1 06:34:14 2017
+++ src/sys/netipsec/key.c Wed Jan 10 10:56:30 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: key.c,v 1.246 2017/12/01 06:34:14 ozaki-r Exp $ */
+/* $NetBSD: key.c,v 1.247 2018/01/10 10:56:30 knakahara Exp $ */
/* $FreeBSD: src/sys/netipsec/key.c,v 1.3.2.3 2004/02/14 22:23:23 bms Exp $ */
/* $KAME: key.c,v 1.191 2001/06/27 10:46:49 sakane Exp $ */
@@ -32,7 +32,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: key.c,v 1.246 2017/12/01 06:34:14 ozaki-r Exp $");
+__KERNEL_RCSID(0, "$NetBSD: key.c,v 1.247 2018/01/10 10:56:30 knakahara Exp $");
/*
* This code is referred to RFC 2367
@@ -196,6 +196,10 @@ static u_int32_t acq_seq = 0;
* however, a socket can be destroyed in softint so we cannot destroy
* it directly instead we just mark it DEAD and delay the destruction
* until GC by the timer
+ * - SP origin
+ * - SPs can be created by both userland programs and kernel components.
+ * The SPs created in kernel must not be removed by userland programs,
+ * although the SPs can be read by userland programs.
*/
/*
* Locking notes on SAD:
@@ -584,13 +588,6 @@ struct _keystat {
u_long getspi_count; /* the avarage of count to try to get new SPI */
} keystat;
-struct sadb_msghdr {
- struct sadb_msg *msg;
- void *ext[SADB_EXT_MAX + 1];
- int extoff[SADB_EXT_MAX + 1];
- int extlen[SADB_EXT_MAX + 1];
-};
-
static void
key_init_spidx_bymsghdr(struct secpolicyindex *, const struct sadb_msghdr *);
@@ -621,10 +618,9 @@ static void key_freesp_so(struct secpoli
#endif
static struct secpolicy *key_getsp (const struct secpolicyindex *);
static struct secpolicy *key_getspbyid (u_int32_t);
-static struct secpolicy *key_lookup_and_remove_sp(const struct secpolicyindex *);
-static struct secpolicy *key_lookupbyid_and_remove_sp(u_int32_t);
+static struct secpolicy *key_lookup_and_remove_sp(const struct secpolicyindex *, bool);
+static struct secpolicy *key_lookupbyid_and_remove_sp(u_int32_t, bool);
static void key_destroy_sp(struct secpolicy *);
-static u_int16_t key_newreqid (void);
static struct mbuf *key_gather_mbuf (struct mbuf *,
const struct sadb_msghdr *, int, int, ...);
static int key_api_spdadd(struct socket *, struct mbuf *,
@@ -1642,14 +1638,19 @@ key_getsp(const struct secpolicyindex *s
* others : found, pointer to a SP.
*/
static struct secpolicy *
-key_lookup_and_remove_sp(const struct secpolicyindex *spidx)
+key_lookup_and_remove_sp(const struct secpolicyindex *spidx, bool from_kernel)
{
struct secpolicy *sp = NULL;
mutex_enter(&key_spd.lock);
SPLIST_WRITER_FOREACH(sp, spidx->dir) {
KASSERT(sp->state != IPSEC_SPSTATE_DEAD);
-
+ /*
+ * SPs created in kernel(e.g. ipsec(4) I/F) must not be
+ * removed by userland programs.
+ */
+ if (!from_kernel && sp->origin == IPSEC_SPORIGIN_KERNEL)
+ continue;
if (key_spidx_match_exactly(spidx, &sp->spidx)) {
key_unlink_sp(sp);
goto out;
@@ -1702,19 +1703,31 @@ out:
* others : found, pointer to a SP.
*/
static struct secpolicy *
-key_lookupbyid_and_remove_sp(u_int32_t id)
+key_lookupbyid_and_remove_sp(u_int32_t id, bool from_kernel)
{
struct secpolicy *sp;
mutex_enter(&key_spd.lock);
SPLIST_READER_FOREACH(sp, IPSEC_DIR_INBOUND) {
KASSERT(sp->state != IPSEC_SPSTATE_DEAD);
+ /*
+ * SPs created in kernel(e.g. ipsec(4) I/F) must not be
+ * removed by userland programs.
+ */
+ if (!from_kernel && sp->origin == IPSEC_SPORIGIN_KERNEL)
+ continue;
if (sp->id == id)
goto out;
}
SPLIST_READER_FOREACH(sp, IPSEC_DIR_OUTBOUND) {
KASSERT(sp->state != IPSEC_SPSTATE_DEAD);
+ /*
+ * SPs created in kernel(e.g. ipsec(4) I/F) must not be
+ * removed by userland programs.
+ */
+ if (!from_kernel && sp->origin == IPSEC_SPORIGIN_KERNEL)
+ continue;
if (sp->id == id)
goto out;
}
@@ -1742,8 +1755,9 @@ key_newsp(const char* where, int tag)
* NOTE: `state', `secpolicyindex' in secpolicy structure are not set,
* so must be set properly later.
*/
-struct secpolicy *
-key_msg2sp(const struct sadb_x_policy *xpl0, size_t len, int *error)
+static struct secpolicy *
+_key_msg2sp(const struct sadb_x_policy *xpl0, size_t len, int *error,
+ bool from_kernel)
{
struct secpolicy *newsp;
@@ -1852,10 +1866,21 @@ key_msg2sp(const struct sadb_x_policy *x
xisr_reqid = xisr->sadb_x_ipsecrequest_reqid;
/* validity check */
/*
+ * case 1) from_kernel == false
+ * That means the request comes from userland.
* If range violation of reqid, kernel will
* update it, don't refuse it.
+ *
+ * case 2) from_kernel == true
+ * That means the request comes from kernel
+ * (e.g. ipsec(4) I/F).
+ * Use thre requested reqid to avoid inconsistency
+ * between kernel's reqid and the reqid in pf_key
+ * message sent to userland. The pf_key message is
+ * built by diverting request mbuf.
*/
- if (xisr_reqid > IPSEC_MANUAL_REQID_MAX) {
+ if (!from_kernel &&
+ xisr_reqid > IPSEC_MANUAL_REQID_MAX) {
IPSECLOG(LOG_DEBUG,
"reqid=%d range "
"violation, updated by kernel.\n",
@@ -1939,7 +1964,14 @@ free_exit:
return NULL;
}
-static u_int16_t
+struct secpolicy *
+key_msg2sp(const struct sadb_x_policy *xpl0, size_t len, int *error)
+{
+
+ return _key_msg2sp(xpl0, len, error, false);
+}
+
+u_int16_t
key_newreqid(void)
{
static u_int16_t auto_reqid = IPSEC_MANUAL_REQID_MAX + 1;
@@ -2086,24 +2118,13 @@ key_gather_mbuf(struct mbuf *m, const st
}
/*
- * SADB_X_SPDADD, SADB_X_SPDSETIDX or SADB_X_SPDUPDATE processing
- * add an entry to SP database, when received
- * <base, address(SD), (lifetime(H),) policy>
- * from the user(?).
- * Adding to SP database,
- * and send
- * <base, address(SD), (lifetime(H),) policy>
- * to the socket which was send.
- *
- * SPDADD set a unique policy entry.
- * SPDSETIDX like SPDADD without a part of policy requests.
- * SPDUPDATE replace a unique policy entry.
- *
- * m will always be freed.
+ * The argument _sp must not overwrite until SP is created and registered
+ * successfully.
*/
static int
-key_api_spdadd(struct socket *so, struct mbuf *m,
- const struct sadb_msghdr *mhp)
+key_spdadd(struct socket *so, struct mbuf *m,
+ const struct sadb_msghdr *mhp, struct secpolicy **_sp,
+ bool from_kernel)
{
const struct sockaddr *src, *dst;
const struct sadb_x_policy *xpl0;
@@ -2184,7 +2205,7 @@ key_api_spdadd(struct socket *so, struct
struct secpolicy *sp;
if (mhp->msg->sadb_msg_type == SADB_X_SPDUPDATE) {
- sp = key_lookup_and_remove_sp(&spidx);
+ sp = key_lookup_and_remove_sp(&spidx, from_kernel);
if (sp != NULL)
key_destroy_sp(sp);
} else {
@@ -2198,7 +2219,7 @@ key_api_spdadd(struct socket *so, struct
}
/* allocation new SP entry */
- newsp = key_msg2sp(xpl0, PFKEY_EXTLEN(xpl0), &error);
+ newsp = _key_msg2sp(xpl0, PFKEY_EXTLEN(xpl0), &error, from_kernel);
if (newsp == NULL) {
return key_senderror(so, m, error);
}
@@ -2214,11 +2235,20 @@ key_api_spdadd(struct socket *so, struct
newsp->lastused = newsp->created;
newsp->lifetime = lft ? lft->sadb_lifetime_addtime : 0;
newsp->validtime = lft ? lft->sadb_lifetime_usetime : 0;
+ if (from_kernel)
+ newsp->origin = IPSEC_SPORIGIN_KERNEL;
+ else
+ newsp->origin = IPSEC_SPORIGIN_USER;
key_init_sp(newsp);
+ if (from_kernel)
+ KEY_SP_REF(newsp);
sadb_x_policy_id = newsp->id;
+ if (_sp != NULL)
+ *_sp = newsp;
+
mutex_enter(&key_spd.lock);
SPLIST_WRITER_INSERT_TAIL(newsp->spidx.dir, newsp);
mutex_exit(&key_spd.lock);
@@ -2275,13 +2305,19 @@ key_api_spdadd(struct socket *so, struct
sizeof(*xpl), &off);
if (mpolicy == NULL) {
/* n is already freed */
+ /*
+ * valid sp has been created, so we does not overwrite _sp
+ * NULL here. let caller decide to use the sp or not.
+ */
return key_senderror(so, m, ENOBUFS);
}
xpl = (struct sadb_x_policy *)(mtod(mpolicy, char *) + off);
if (xpl->sadb_x_policy_exttype != SADB_X_EXT_POLICY) {
m_freem(n);
+ /* ditto */
return key_senderror(so, m, EINVAL);
}
+
xpl->sadb_x_policy_id = sadb_x_policy_id;
m_freem(m);
@@ -2290,6 +2326,55 @@ key_api_spdadd(struct socket *so, struct
}
/*
+ * SADB_X_SPDADD, SADB_X_SPDSETIDX or SADB_X_SPDUPDATE processing
+ * add an entry to SP database, when received
+ * <base, address(SD), (lifetime(H),) policy>
+ * from the user(?).
+ * Adding to SP database,
+ * and send
+ * <base, address(SD), (lifetime(H),) policy>
+ * to the socket which was send.
+ *
+ * SPDADD set a unique policy entry.
+ * SPDSETIDX like SPDADD without a part of policy requests.
+ * SPDUPDATE replace a unique policy entry.
+ *
+ * m will always be freed.
+ */
+static int
+key_api_spdadd(struct socket *so, struct mbuf *m,
+ const struct sadb_msghdr *mhp)
+{
+
+ return key_spdadd(so, m, mhp, NULL, false);
+}
+
+struct secpolicy *
+key_kpi_spdadd(struct mbuf *m)
+{
+ struct sadb_msghdr mh;
+ int error;
+ struct secpolicy *sp = NULL;
+
+ error = key_align(m, &mh);
+ if (error)
+ return NULL;
+
+ error = key_spdadd(NULL, m, &mh, &sp, true);
+ if (error) {
+ /*
+ * Currently, when key_spdadd() cannot send a PFKEY message
+ * which means SP has been created, key_spdadd() returns error
+ * although SP is created successfully.
+ * Kernel components would not care PFKEY messages, so return
+ * the "sp" regardless of error code. key_spdadd() overwrites
+ * the argument only if SP is created successfully.
+ */
+ }
+ return sp;
+}
+
+/*
* get new policy id.
* OUT:
* 0: failure.
@@ -2370,7 +2455,7 @@ key_api_spddelete(struct socket *so, str
key_init_spidx_bymsghdr(&spidx, mhp);
/* Is there SP in SPD ? */
- sp = key_lookup_and_remove_sp(&spidx);
+ sp = key_lookup_and_remove_sp(&spidx, false);
if (sp == NULL) {
IPSECLOG(LOG_DEBUG, "no SP found.\n");
return key_senderror(so, m, EINVAL);
@@ -2426,8 +2511,8 @@ key_alloc_mbuf_simple(int len, int mflag
* m will always be freed.
*/
static int
-key_api_spddelete2(struct socket *so, struct mbuf *m,
- const struct sadb_msghdr *mhp)
+key_spddelete2(struct socket *so, struct mbuf *m,
+ const struct sadb_msghdr *mhp, bool from_kernel)
{
u_int32_t id;
struct secpolicy *sp;
@@ -2443,7 +2528,7 @@ key_api_spddelete2(struct socket *so, st
id = xpl->sadb_x_policy_id;
/* Is there SP in SPD ? */
- sp = key_lookupbyid_and_remove_sp(id);
+ sp = key_lookupbyid_and_remove_sp(id, from_kernel);
if (sp == NULL) {
IPSECLOG(LOG_DEBUG, "no SP found id:%u.\n", id);
return key_senderror(so, m, EINVAL);
@@ -2486,6 +2571,39 @@ key_api_spddelete2(struct socket *so, st
}
/*
+ * SADB_SPDDELETE2 processing
+ * receive
+ * <base, policy(*)>
+ * from the user(?), and set SADB_SASTATE_DEAD,
+ * and send,
+ * <base, policy(*)>
+ * to the ikmpd.
+ * policy(*) including direction of policy.
+ *
+ * m will always be freed.
+ */
+static int
+key_api_spddelete2(struct socket *so, struct mbuf *m,
+ const struct sadb_msghdr *mhp)
+{
+
+ return key_spddelete2(so, m, mhp, false);
+}
+
+int
+key_kpi_spddelete2(struct mbuf *m)
+{
+ struct sadb_msghdr mh;
+ int error;
+
+ error = key_align(m, &mh);
+ if (error)
+ return EINVAL;
+
+ return key_spddelete2(NULL, m, &mh, true);
+}
+
+/*
* SADB_X_GET processing
* receive
* <base, policy(*)>
@@ -2630,10 +2748,17 @@ key_api_spdflush(struct socket *so, stru
mutex_enter(&key_spd.lock);
SPLIST_WRITER_FOREACH(sp, dir) {
KASSERT(sp->state != IPSEC_SPSTATE_DEAD);
- key_unlink_sp(sp);
- mutex_exit(&key_spd.lock);
- key_destroy_sp(sp);
- goto retry;
+ /*
+ * Userlang programs can remove SPs created by userland
+ * probrams only, that is, they cannot remove SPs
+ * created in kernel(e.g. ipsec(4) I/F).
+ */
+ if (sp->origin == IPSEC_SPORIGIN_USER) {
+ key_unlink_sp(sp);
+ mutex_exit(&key_spd.lock);
+ key_destroy_sp(sp);
+ goto retry;
+ }
}
mutex_exit(&key_spd.lock);
}
@@ -7696,6 +7821,16 @@ key_senderror(struct socket *so, struct
KASSERT(m->m_len >= sizeof(struct sadb_msg));
+ if (so == NULL) {
+ /*
+ * This means the request comes from kernel.
+ * As the request comes from kernel, it is unnecessary to
+ * send message to userland. Just return errcode directly.
+ */
+ m_freem(m);
+ return code;
+ }
+
msg = mtod(m, struct sadb_msg *);
msg->sadb_msg_errno = code;
return key_sendup_mbuf(so, m, KEY_SENDUP_ONE);
Index: src/sys/netipsec/key.h
diff -u src/sys/netipsec/key.h:1.33 src/sys/netipsec/key.h:1.34
--- src/sys/netipsec/key.h:1.33 Tue Nov 21 07:03:08 2017
+++ src/sys/netipsec/key.h Wed Jan 10 10:56:31 2018
@@ -1,4 +1,4 @@
-/* $NetBSD: key.h,v 1.33 2017/11/21 07:03:08 ozaki-r Exp $ */
+/* $NetBSD: key.h,v 1.34 2018/01/10 10:56:31 knakahara Exp $ */
/* $FreeBSD: src/sys/netipsec/key.h,v 1.1.4.1 2003/01/24 05:11:36 sam Exp $ */
/* $KAME: key.h,v 1.21 2001/07/27 03:51:30 itojun Exp $ */
@@ -42,11 +42,18 @@ struct ipsecrequest;
struct secasvar;
struct sockaddr;
struct socket;
-struct sadb_msg;
-struct sadb_x_policy;
struct secasindex;
union sockaddr_union;
+#include <net/pfkeyv2.h>
+
+struct sadb_msghdr {
+ struct sadb_msg *msg;
+ void *ext[SADB_EXT_MAX + 1];
+ int extoff[SADB_EXT_MAX + 1];
+ int extlen[SADB_EXT_MAX + 1];
+};
+
int key_havesp(u_int dir);
struct secpolicy *key_lookup_sp_byspidx(const struct secpolicyindex *, u_int,
const char*, int);
@@ -116,7 +123,10 @@ int key_get_used(void);
u_int16_t key_portfromsaddr (const union sockaddr_union *);
-
+/* for ipsec(4) */
+struct secpolicy *key_kpi_spdadd(struct mbuf *);
+int key_kpi_spddelete2(struct mbuf *);
+u_int16_t key_newreqid(void);
#ifdef MALLOC_DECLARE
MALLOC_DECLARE(M_SECA);
Added files:
Index: src/sys/net/if_ipsec.c
diff -u /dev/null src/sys/net/if_ipsec.c:1.1
--- /dev/null Wed Jan 10 10:56:31 2018
+++ src/sys/net/if_ipsec.c Wed Jan 10 10:56:30 2018
@@ -0,0 +1,1736 @@
+/* $NetBSD: if_ipsec.c,v 1.1 2018/01/10 10:56:30 knakahara Exp $ */
+
+/*
+ * Copyright (c) 2017 Internet Initiative Japan Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: if_ipsec.c,v 1.1 2018/01/10 10:56:30 knakahara Exp $");
+
+#ifdef _KERNEL_OPT
+#include "opt_inet.h"
+#endif
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/syslog.h>
+#include <sys/cpu.h>
+#include <sys/kmem.h>
+#include <sys/mutex.h>
+#include <sys/pserialize.h>
+#include <sys/psref.h>
+
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/route.h>
+#include <net/bpf.h>
+#include <net/pfkeyv2.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#ifdef INET
+#include <netinet/in_var.h>
+#endif /* INET */
+
+#ifdef INET6
+#include <netinet6/in6_var.h>
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#endif /* INET6 */
+
+#include <netinet/ip_encap.h>
+
+#include <net/if_ipsec.h>
+
+#include <net/raw_cb.h>
+#include <net/pfkeyv2.h>
+
+#include <netipsec/key.h>
+#include <netipsec/ipsec.h>
+#include <netipsec/ipsecif.h>
+
+static void if_ipsec_ro_init_pc(void *, void *, struct cpu_info *);
+static void if_ipsec_ro_fini_pc(void *, void *, struct cpu_info *);
+
+static int if_ipsec_clone_create(struct if_clone *, int);
+static int if_ipsec_clone_destroy(struct ifnet *);
+
+static inline int if_ipsec_out_direct(struct ipsec_variant *, struct mbuf *, int);
+static inline void if_ipsec_in_enqueue(struct mbuf *, int, struct ifnet *);
+
+static int if_ipsec_encap_attach(struct ipsec_variant *);
+static int if_ipsec_encap_detach(struct ipsec_variant *);
+static int if_ipsec_set_tunnel(struct ifnet *,
+ struct sockaddr *, struct sockaddr *);
+static void if_ipsec_delete_tunnel(struct ifnet *);
+static int if_ipsec_ensure_flags(struct ifnet *, short);
+static void if_ipsec_attach0(struct ipsec_softc *);
+
+static int if_ipsec_update_variant(struct ipsec_softc *,
+ struct ipsec_variant *, struct ipsec_variant *);
+
+/* sadb_msg */
+static inline void if_ipsec_add_mbuf(struct mbuf *, void *, size_t);
+static inline void if_ipsec_add_pad(struct mbuf *, size_t);
+static inline size_t if_ipsec_set_sadb_addr(struct sadb_address *,
+ struct sockaddr *, int, uint16_t);
+static inline size_t if_ipsec_set_sadb_src(struct sadb_address *,
+ struct sockaddr *, int);
+static inline size_t if_ipsec_set_sadb_dst(struct sadb_address *,
+ struct sockaddr *, int);
+static inline size_t if_ipsec_set_sadb_x_policy(struct sadb_x_policy *,
+ struct sadb_x_ipsecrequest *, uint16_t, uint8_t, uint32_t, uint8_t);
+static inline void if_ipsec_set_sadb_msg(struct sadb_msg *, uint16_t, uint8_t);
+static inline void if_ipsec_set_sadb_msg_add(struct sadb_msg *, uint16_t);
+static inline void if_ipsec_set_sadb_msg_del(struct sadb_msg *, uint16_t);
+/* SPD */
+static int if_ipsec_share_sp(struct ipsec_variant *);
+static int if_ipsec_unshare_sp(struct ipsec_variant *);
+static inline struct secpolicy *if_ipsec_add_sp0(struct sockaddr *,
+ in_port_t, struct sockaddr *, in_port_t, int, int, int, u_int);
+static inline int if_ipsec_del_sp0(struct secpolicy *);
+static int if_ipsec_add_sp(struct ipsec_variant *,
+ struct sockaddr *, in_port_t, struct sockaddr *, in_port_t);
+static void if_ipsec_del_sp(struct ipsec_variant *);
+static int if_ipsec_replace_sp(struct ipsec_softc *, struct ipsec_variant *,
+ struct ipsec_variant *);
+
+static int if_ipsec_set_addr_port(struct sockaddr *, struct sockaddr *,
+ in_port_t);
+#define IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, target) \
+ if_ipsec_set_addr_port(target, (var)->iv_psrc, (var)->iv_sport)
+#define IF_IPSEC_GATHER_PDST_ADDR_PORT(var, target) \
+ if_ipsec_set_addr_port(target, (var)->iv_pdst, (var)->iv_dport)
+
+/*
+ * ipsec global variable definitions
+ */
+
+/* This list is used in ioctl context only. */
+LIST_HEAD(ipsec_sclist, ipsec_softc);
+static struct {
+ struct ipsec_sclist list;
+ kmutex_t lock;
+} ipsec_softcs __cacheline_aligned;
+
+pserialize_t ipsec_psz __read_mostly;
+struct psref_class *iv_psref_class __read_mostly;
+
+struct if_clone ipsec_cloner =
+ IF_CLONE_INITIALIZER("ipsec", if_ipsec_clone_create, if_ipsec_clone_destroy);
+static int max_ipsec_nesting = MAX_IPSEC_NEST;
+
+/* ARGSUSED */
+void
+ipsecifattach(int count)
+{
+
+ mutex_init(&ipsec_softcs.lock, MUTEX_DEFAULT, IPL_NONE);
+ LIST_INIT(&ipsec_softcs.list);
+
+ ipsec_psz = pserialize_create();
+ iv_psref_class = psref_class_create("ipsecvar", IPL_SOFTNET);
+
+ if_clone_attach(&ipsec_cloner);
+}
+
+static int
+if_ipsec_clone_create(struct if_clone *ifc, int unit)
+{
+ struct ipsec_softc *sc;
+ struct ipsec_variant *var;
+
+ sc = kmem_zalloc(sizeof(*sc), KM_SLEEP);
+
+ if_initname(&sc->ipsec_if, ifc->ifc_name, unit);
+
+ if_ipsec_attach0(sc);
+
+ var = kmem_zalloc(sizeof(*var), KM_SLEEP);
+ var->iv_softc = sc;
+ psref_target_init(&var->iv_psref, iv_psref_class);
+
+ sc->ipsec_var = var;
+ mutex_init(&sc->ipsec_lock, MUTEX_DEFAULT, IPL_NONE);
+ sc->ipsec_ro_percpu = percpu_alloc(sizeof(struct ipsec_ro));
+ percpu_foreach(sc->ipsec_ro_percpu, if_ipsec_ro_init_pc, NULL);
+
+ mutex_enter(&ipsec_softcs.lock);
+ LIST_INSERT_HEAD(&ipsec_softcs.list, sc, ipsec_list);
+ mutex_exit(&ipsec_softcs.lock);
+ return 0;
+}
+
+static void
+if_ipsec_attach0(struct ipsec_softc *sc)
+{
+
+ sc->ipsec_if.if_addrlen = 0;
+ sc->ipsec_if.if_mtu = IPSEC_MTU;
+ sc->ipsec_if.if_flags = IFF_POINTOPOINT | IFF_MULTICAST;
+ /* set ipsec(4) specific default flags. */
+ sc->ipsec_if.if_flags |= IFF_FWD_IPV6;
+ sc->ipsec_if.if_extflags = IFEF_NO_LINK_STATE_CHANGE | IFEF_MPSAFE;
+ sc->ipsec_if.if_ioctl = if_ipsec_ioctl;
+ sc->ipsec_if.if_output = if_ipsec_output;
+ sc->ipsec_if.if_type = IFT_IPSEC;
+ sc->ipsec_if.if_dlt = DLT_NULL;
+ sc->ipsec_if.if_softc = sc;
+ IFQ_SET_READY(&sc->ipsec_if.if_snd);
+ if_initialize(&sc->ipsec_if);
+ if_alloc_sadl(&sc->ipsec_if);
+ bpf_attach(&sc->ipsec_if, DLT_NULL, sizeof(u_int));
+ if_register(&sc->ipsec_if);
+}
+
+static void
+if_ipsec_ro_init_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
+{
+ struct ipsec_ro *iro = p;
+
+ mutex_init(&iro->ir_lock, MUTEX_DEFAULT, IPL_NONE);
+}
+
+static void
+if_ipsec_ro_fini_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
+{
+ struct ipsec_ro *iro = p;
+
+ rtcache_free(&iro->ir_ro);
+
+ mutex_destroy(&iro->ir_lock);
+}
+
+static int
+if_ipsec_clone_destroy(struct ifnet *ifp)
+{
+ struct ipsec_softc *sc = ifp->if_softc;
+ struct ipsec_variant *var;
+ int bound;
+
+ mutex_enter(&ipsec_softcs.lock);
+ LIST_REMOVE(sc, ipsec_list);
+ mutex_exit(&ipsec_softcs.lock);
+
+ bound = curlwp_bind();
+ if_ipsec_delete_tunnel(&sc->ipsec_if);
+ curlwp_bindx(bound);
+
+ bpf_detach(ifp);
+ if_detach(ifp);
+
+ percpu_foreach(sc->ipsec_ro_percpu, if_ipsec_ro_fini_pc, NULL);
+ percpu_free(sc->ipsec_ro_percpu, sizeof(struct ipsec_ro));
+
+ mutex_destroy(&sc->ipsec_lock);
+
+ var = sc->ipsec_var;
+ kmem_free(var, sizeof(*var));
+ kmem_free(sc, sizeof(*sc));
+
+ return 0;
+}
+
+static inline bool
+if_ipsec_nat_t(struct ipsec_softc *sc)
+{
+
+ return (sc->ipsec_if.if_flags & IFF_NAT_T) != 0;
+}
+
+static inline bool
+if_ipsec_fwd_ipv6(struct ipsec_softc *sc)
+{
+
+ return (sc->ipsec_if.if_flags & IFF_FWD_IPV6) != 0;
+}
+
+int
+if_ipsec_encap_func(struct mbuf *m, int off, int proto, void *arg)
+{
+ struct ip ip;
+ struct ipsec_softc *sc;
+ struct ipsec_variant *var = NULL;
+ struct psref psref;
+ int ret = 0;
+
+ sc = arg;
+ KASSERT(sc != NULL);
+
+ if ((sc->ipsec_if.if_flags & IFF_UP) == 0)
+ goto out;
+
+ var = if_ipsec_getref_variant(sc, &psref);
+ if (if_ipsec_variant_is_unconfigured(var))
+ goto out;
+
+ switch (proto) {
+ case IPPROTO_IPV4:
+ case IPPROTO_IPV6:
+ break;
+ default:
+ goto out;
+ }
+
+ if (m->m_pkthdr.len < sizeof(ip))
+ goto out;
+
+ m_copydata(m, 0, sizeof(ip), &ip);
+ switch (ip.ip_v) {
+#ifdef INET
+ case IPVERSION:
+ if (var->iv_psrc->sa_family != AF_INET ||
+ var->iv_pdst->sa_family != AF_INET)
+ goto out;
+ ret = ipsecif4_encap_func(m, &ip, var);
+ break;
+#endif
+ default:
+ goto out;
+ }
+
+out:
+ if (var != NULL)
+ if_ipsec_putref_variant(var, &psref);
+ return ret;
+}
+
+/*
+ * ipsec(4) I/F may cause infinite recursion calls when misconfigured.
+ * We'll prevent this by introducing upper limit.
+ */
+static int
+if_ipsec_check_nesting(struct ifnet *ifp, struct mbuf *m)
+{
+
+ return if_tunnel_check_nesting(ifp, m, max_ipsec_nesting);
+}
+
+int
+if_ipsec_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
+ const struct rtentry *rt)
+{
+ struct ipsec_softc *sc = ifp->if_softc;
+ struct ipsec_variant *var;
+ struct psref psref;
+ int error;
+ int bound;
+
+ IFQ_CLASSIFY(&ifp->if_snd, m, dst->sa_family);
+
+ error = if_ipsec_check_nesting(ifp, m);
+ if (error) {
+ m_freem(m);
+ goto noref_end;
+ }
+
+ if ((ifp->if_flags & IFF_UP) == 0) {
+ m_freem(m);
+ error = ENETDOWN;
+ goto noref_end;
+ }
+
+
+ bound = curlwp_bind();
+ var = if_ipsec_getref_variant(sc, &psref);
+ if (if_ipsec_variant_is_unconfigured(var)) {
+ m_freem(m);
+ error = ENETDOWN;
+ goto end;
+ }
+
+ m->m_flags &= ~(M_BCAST|M_MCAST);
+
+ /* use DLT_NULL encapsulation here to pass inner af type */
+ M_PREPEND(m, sizeof(int), M_DONTWAIT);
+ if (!m) {
+ error = ENOBUFS;
+ goto end;
+ }
+ *mtod(m, int *) = dst->sa_family;
+
+#if INET6
+ /* drop IPv6 packet if IFF_FWD_IPV6 is not set */
+ if (dst->sa_family == AF_INET6 &&
+ !if_ipsec_fwd_ipv6(sc)) {
+ /*
+ * IPv6 packet is not allowed to forward,that is not error.
+ */
+ error = 0;
+ IF_DROP(&ifp->if_snd);
+ m_freem(m);
+ goto end;
+ }
+#endif
+
+ error = if_ipsec_out_direct(var, m, dst->sa_family);
+
+end:
+ if_ipsec_putref_variant(var, &psref);
+ curlwp_bindx(bound);
+noref_end:
+ if (error)
+ ifp->if_oerrors++;
+
+ return error;
+}
+
+static inline int
+if_ipsec_out_direct(struct ipsec_variant *var, struct mbuf *m, int family)
+{
+ struct ifnet *ifp = &var->iv_softc->ipsec_if;
+ int error;
+ int len;
+
+ KASSERT(if_ipsec_heldref_variant(var));
+ KASSERT(var->iv_output != NULL);
+
+ len = m->m_pkthdr.len;
+
+ /* input DLT_NULL frame to BPF */
+ bpf_mtap(ifp, m);
+
+ /* grab and chop off inner af type */
+ /* XXX need pullup? */
+ m_adj(m, sizeof(int));
+
+ error = var->iv_output(var, family, m);
+ if (error)
+ return error;
+
+ ifp->if_opackets++;
+ ifp->if_obytes += len;
+
+ return 0;
+}
+
+void
+if_ipsec_input(struct mbuf *m, int af, struct ifnet *ifp)
+{
+
+ KASSERT(ifp != NULL);
+
+ m_set_rcvif(m, ifp);
+
+ bpf_mtap_af(ifp, af, m);
+
+ if_ipsec_in_enqueue(m, af, ifp);
+
+ return;
+}
+
+static inline void
+if_ipsec_in_enqueue(struct mbuf *m, int af, struct ifnet *ifp)
+{
+ pktqueue_t *pktq;
+ int pktlen;
+
+ /*
+ * Put the packet to the network layer input queue according to the
+ * specified address family.
+ */
+ switch (af) {
+#ifdef INET
+ case AF_INET:
+ pktq = ip_pktq;
+ break;
+#endif
+#ifdef INET6
+ case AF_INET6:
+ pktq = ip6_pktq;
+ break;
+#endif
+ default:
+ ifp->if_ierrors++;
+ m_freem(m);
+ return;
+ }
+
+#if 1
+ const u_int h = curcpu()->ci_index;
+#else
+ const uint32_t h = pktq_rps_hash(m);
+#endif
+ pktlen = m->m_pkthdr.len;
+ if (__predict_true(pktq_enqueue(pktq, m, h))) {
+ ifp->if_ibytes += pktlen;
+ ifp->if_ipackets++;
+ } else {
+ m_freem(m);
+ }
+
+ return;
+}
+
+static inline int
+if_ipsec_check_salen(struct sockaddr *addr)
+{
+
+ switch (addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ if (addr->sa_len != sizeof(struct sockaddr_in))
+ return EINVAL;
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ if (addr->sa_len != sizeof(struct sockaddr_in6))
+ return EINVAL;
+ break;
+#endif /* INET6 */
+ default:
+ return EAFNOSUPPORT;
+ }
+
+ return 0;
+}
+
+/* XXX how should we handle IPv6 scope on SIOC[GS]IFPHYADDR? */
+int
+if_ipsec_ioctl(struct ifnet *ifp, u_long cmd, void *data)
+{
+ struct ipsec_softc *sc = ifp->if_softc;
+ struct ipsec_variant *var = NULL;
+ struct ifreq *ifr = (struct ifreq*)data;
+ struct ifaddr *ifa = (struct ifaddr*)data;
+ int error = 0, size;
+ struct sockaddr *dst, *src;
+ u_long mtu;
+ short oflags = ifp->if_flags;
+ int bound;
+ struct psref psref;
+
+ switch (cmd) {
+ case SIOCINITIFADDR:
+ ifp->if_flags |= IFF_UP;
+ ifa->ifa_rtrequest = p2p_rtrequest;
+ break;
+
+ case SIOCSIFDSTADDR:
+ break;
+
+ case SIOCADDMULTI:
+ case SIOCDELMULTI:
+ switch (ifr->ifr_addr.sa_family) {
+#ifdef INET
+ case AF_INET: /* IP supports Multicast */
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6: /* IP6 supports Multicast */
+ break;
+#endif /* INET6 */
+ default: /* Other protocols doesn't support Multicast */
+ error = EAFNOSUPPORT;
+ break;
+ }
+ break;
+
+ case SIOCSIFMTU:
+ mtu = ifr->ifr_mtu;
+ if (mtu < IPSEC_MTU_MIN || mtu > IPSEC_MTU_MAX)
+ return EINVAL;
+ else if ((error = ifioctl_common(ifp, cmd, data)) == ENETRESET)
+ error = 0;
+ break;
+
+#ifdef INET
+ case SIOCSIFPHYADDR:
+#endif
+#ifdef INET6
+ case SIOCSIFPHYADDR_IN6:
+#endif /* INET6 */
+ case SIOCSLIFPHYADDR:
+ switch (cmd) {
+#ifdef INET
+ case SIOCSIFPHYADDR:
+ src = (struct sockaddr *)
+ &(((struct in_aliasreq *)data)->ifra_addr);
+ dst = (struct sockaddr *)
+ &(((struct in_aliasreq *)data)->ifra_dstaddr);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case SIOCSIFPHYADDR_IN6:
+ src = (struct sockaddr *)
+ &(((struct in6_aliasreq *)data)->ifra_addr);
+ dst = (struct sockaddr *)
+ &(((struct in6_aliasreq *)data)->ifra_dstaddr);
+ break;
+#endif /* INET6 */
+ case SIOCSLIFPHYADDR:
+ src = (struct sockaddr *)
+ &(((struct if_laddrreq *)data)->addr);
+ dst = (struct sockaddr *)
+ &(((struct if_laddrreq *)data)->dstaddr);
+ break;
+ default:
+ return EINVAL;
+ }
+
+ /* sa_family must be equal */
+ if (src->sa_family != dst->sa_family)
+ return EINVAL;
+
+ error = if_ipsec_check_salen(src);
+ if (error)
+ return error;
+ error = if_ipsec_check_salen(dst);
+ if (error)
+ return error;
+
+ /* check sa_family looks sane for the cmd */
+ switch (cmd) {
+#ifdef INET
+ case SIOCSIFPHYADDR:
+ if (src->sa_family == AF_INET)
+ break;
+ return EAFNOSUPPORT;
+#endif /* INET */
+#ifdef INET6
+ case SIOCSIFPHYADDR_IN6:
+ if (src->sa_family == AF_INET6)
+ break;
+ return EAFNOSUPPORT;
+#endif /* INET6 */
+ case SIOCSLIFPHYADDR:
+ /* checks done in the above */
+ break;
+ }
+ /*
+ * calls if_ipsec_getref_variant() for other softcs to check
+ * address pair duplicattion
+ */
+ bound = curlwp_bind();
+ error = if_ipsec_set_tunnel(&sc->ipsec_if, src, dst);
+ if (error)
+ goto bad;
+ break;
+
+ case SIOCDIFPHYADDR:
+ bound = curlwp_bind();
+ if_ipsec_delete_tunnel(&sc->ipsec_if);
+ curlwp_bindx(bound);
+ break;
+
+ case SIOCGIFPSRCADDR:
+#ifdef INET6
+ case SIOCGIFPSRCADDR_IN6:
+#endif /* INET6 */
+ bound = curlwp_bind();
+ var = if_ipsec_getref_variant(sc, &psref);
+ if (var->iv_psrc == NULL) {
+ error = EADDRNOTAVAIL;
+ goto bad;
+ }
+ src = var->iv_psrc;
+ switch (cmd) {
+#ifdef INET
+ case SIOCGIFPSRCADDR:
+ dst = &ifr->ifr_addr;
+ size = sizeof(ifr->ifr_addr);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case SIOCGIFPSRCADDR_IN6:
+ dst = (struct sockaddr *)
+ &(((struct in6_ifreq *)data)->ifr_addr);
+ size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
+ break;
+#endif /* INET6 */
+ default:
+ error = EADDRNOTAVAIL;
+ goto bad;
+ }
+ if (src->sa_len > size) {
+ error = EINVAL;
+ goto bad;
+ }
+ error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst);
+ if (error)
+ goto bad;
+ if_ipsec_putref_variant(var, &psref);
+ curlwp_bindx(bound);
+ break;
+
+ case SIOCGIFPDSTADDR:
+#ifdef INET6
+ case SIOCGIFPDSTADDR_IN6:
+#endif /* INET6 */
+ bound = curlwp_bind();
+ var = if_ipsec_getref_variant(sc, &psref);
+ if (var->iv_pdst == NULL) {
+ error = EADDRNOTAVAIL;
+ goto bad;
+ }
+ src = var->iv_pdst;
+ switch (cmd) {
+#ifdef INET
+ case SIOCGIFPDSTADDR:
+ dst = &ifr->ifr_addr;
+ size = sizeof(ifr->ifr_addr);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case SIOCGIFPDSTADDR_IN6:
+ dst = (struct sockaddr *)
+ &(((struct in6_ifreq *)data)->ifr_addr);
+ size = sizeof(((struct in6_ifreq *)data)->ifr_addr);
+ break;
+#endif /* INET6 */
+ default:
+ error = EADDRNOTAVAIL;
+ goto bad;
+ }
+ if (src->sa_len > size) {
+ error = EINVAL;
+ goto bad;
+ }
+ error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst);
+ if (error)
+ goto bad;
+ if_ipsec_putref_variant(var, &psref);
+ curlwp_bindx(bound);
+ break;
+
+ case SIOCGLIFPHYADDR:
+ bound = curlwp_bind();
+ var = if_ipsec_getref_variant(sc, &psref);
+ if (if_ipsec_variant_is_unconfigured(var)) {
+ error = EADDRNOTAVAIL;
+ goto bad;
+ }
+
+ /* copy src */
+ src = var->iv_psrc;
+ dst = (struct sockaddr *)
+ &(((struct if_laddrreq *)data)->addr);
+ size = sizeof(((struct if_laddrreq *)data)->addr);
+ if (src->sa_len > size) {
+ error = EINVAL;
+ goto bad;
+ }
+ error = IF_IPSEC_GATHER_PSRC_ADDR_PORT(var, dst);
+ if (error)
+ goto bad;
+
+ /* copy dst */
+ src = var->iv_pdst;
+ dst = (struct sockaddr *)
+ &(((struct if_laddrreq *)data)->dstaddr);
+ size = sizeof(((struct if_laddrreq *)data)->dstaddr);
+ if (src->sa_len > size) {
+ error = EINVAL;
+ goto bad;
+ }
+ error = IF_IPSEC_GATHER_PDST_ADDR_PORT(var, dst);
+ if (error)
+ goto bad;
+ if_ipsec_putref_variant(var, &psref);
+ curlwp_bindx(bound);
+ break;
+
+ default:
+ error = ifioctl_common(ifp, cmd, data);
+ if (!error) {
+ bound = curlwp_bind();
+ error = if_ipsec_ensure_flags(&sc->ipsec_if, oflags);
+ if (error)
+ goto bad;
+ }
+ break;
+ }
+ return error;
+
+bad:
+ if (var != NULL)
+ if_ipsec_putref_variant(var, &psref);
+ curlwp_bindx(bound);
+
+ return error;
+}
+
+struct encap_funcs {
+ int (*ef_inet)(struct ipsec_variant *);
+ int (*ef_inet6)(struct ipsec_variant *);
+};
+
+static struct encap_funcs ipsec_encap_attach = {
+ .ef_inet = ipsecif4_attach,
+ .ef_inet6 = &ipsecif6_attach,
+};
+
+static struct encap_funcs ipsec_encap_detach = {
+ .ef_inet = ipsecif4_detach,
+ .ef_inet6 = &ipsecif6_detach,
+};
+
+static int
+if_ipsec_encap_common(struct ipsec_variant *var, struct encap_funcs *funcs)
+{
+ int error;
+
+ KASSERT(var != NULL);
+ KASSERT(if_ipsec_variant_is_configured(var));
+
+ switch (var->iv_psrc->sa_family) {
+#ifdef INET
+ case AF_INET:
+ error = (funcs->ef_inet)(var);
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ error = (funcs->ef_inet6)(var);
+ break;
+#endif /* INET6 */
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return error;
+}
+
+static int
+if_ipsec_encap_attach(struct ipsec_variant *var)
+{
+
+ return if_ipsec_encap_common(var, &ipsec_encap_attach);
+}
+
+static int
+if_ipsec_encap_detach(struct ipsec_variant *var)
+{
+
+ return if_ipsec_encap_common(var, &ipsec_encap_detach);
+}
+
+/*
+ * Validate and set ipsec(4) I/F configurations.
+ * (1) validate
+ * (1-1) Check the argument src and dst address pair will change
+ * configuration from current src and dst address pair.
+ * (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair
+ * with argument src and dst address pair, except for NAT-T shared
+ * tunnels.
+ * (2) set
+ * (2-1) Create variant for new configuration.
+ * (2-2) Create temporary "null" variant used to avoid to access
+ * dangling variant while SPs are deleted and added.
+ * (2-3) Swap variant include its SPs.
+ * (2-4) Cleanup last configurations.
+ */
+static int
+if_ipsec_set_tunnel(struct ifnet *ifp,
+ struct sockaddr *src, struct sockaddr *dst)
+{
+ struct ipsec_softc *sc = ifp->if_softc;
+ struct ipsec_softc *sc2;
+ struct ipsec_variant *ovar, *nvar, *nullvar;
+ struct sockaddr *osrc, *odst;
+ struct sockaddr *nsrc, *ndst;
+ in_port_t nsport = 0, ndport = 0;
+ int error;
+
+ error = encap_lock_enter();
+ if (error)
+ return error;
+
+ nsrc = sockaddr_dup(src, M_WAITOK);
+ ndst = sockaddr_dup(dst, M_WAITOK);
+ nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
+ nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
+
+ mutex_enter(&sc->ipsec_lock);
+
+ ovar = sc->ipsec_var;
+
+ switch(nsrc->sa_family) {
+#ifdef INET
+ case AF_INET:
+ nsport = ntohs(satosin(src)->sin_port);
+ /*
+ * avoid confuse SP when NAT-T disabled,
+ * e.g.
+ * expected: 10.0.1.2[any] 10.0.1.1[any] 4(ipv4)
+ * confuse : 10.0.1.2[600] 10.0.1.1[600] 4(ipv4)
+ */
+ satosin(nsrc)->sin_port = 0;
+ ndport = ntohs(satosin(dst)->sin_port);
+ satosin(ndst)->sin_port = 0;
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ nsport = ntohs(satosin6(src)->sin6_port);
+ satosin6(nsrc)->sin6_port = 0;
+ ndport = ntohs(satosin6(dst)->sin6_port);
+ satosin6(ndst)->sin6_port = 0;
+ break;
+#endif /* INET6 */
+ default:
+ log(LOG_DEBUG,
+ "%s: Invalid address family: %d.\n",
+ __func__, src->sa_family);
+ error = EINVAL;
+ goto out;
+ }
+
+ /*
+ * (1-1) Check the argument src and dst address pair will change
+ * configuration from current src and dst address pair.
+ */
+ if ((ovar->iv_pdst && sockaddr_cmp(ovar->iv_pdst, dst) == 0) &&
+ (ovar->iv_psrc && sockaddr_cmp(ovar->iv_psrc, src) == 0) &&
+ (ovar->iv_sport == nsport && ovar->iv_dport == ndport)) {
+ /* address and port pair not changed. */
+ error = 0;
+ goto out;
+ }
+
+ /*
+ * (1-2) Check any ipsec(4) I/F uses duplicated src and dst address pair
+ * with argument src and dst address pair, except for NAT-T shared
+ * tunnels.
+ */
+ mutex_enter(&ipsec_softcs.lock);
+ LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
+ struct ipsec_variant *var2;
+ struct psref psref;
+
+ if (sc2 == sc)
+ continue;
+ var2 = if_ipsec_getref_variant(sc2, &psref);
+ if (if_ipsec_variant_is_unconfigured(var2)) {
+ if_ipsec_putref_variant(var2, &psref);
+ continue;
+ }
+ if (if_ipsec_nat_t(sc) || if_ipsec_nat_t(sc2)) {
+ if_ipsec_putref_variant(var2, &psref);
+ continue; /* NAT-T shared tunnel */
+ }
+ if (sockaddr_cmp(var2->iv_pdst, dst) == 0 &&
+ sockaddr_cmp(var2->iv_psrc, src) == 0) {
+ if_ipsec_putref_variant(var2, &psref);
+ mutex_exit(&ipsec_softcs.lock);
+ error = EADDRNOTAVAIL;
+ goto out;
+ }
+
+ if_ipsec_putref_variant(var2, &psref);
+ /* XXX both end must be valid? (I mean, not 0.0.0.0) */
+ }
+ mutex_exit(&ipsec_softcs.lock);
+
+
+ osrc = ovar->iv_psrc;
+ odst = ovar->iv_pdst;
+
+ /*
+ * (2-1) Create ipsec_variant for new configuration.
+ */
+ if_ipsec_copy_variant(nvar, ovar);
+ nvar->iv_psrc = nsrc;
+ nvar->iv_pdst = ndst;
+ nvar->iv_sport = nsport;
+ nvar->iv_dport = ndport;
+ nvar->iv_encap_cookie4 = NULL;
+ nvar->iv_encap_cookie6 = NULL;
+ psref_target_init(&nvar->iv_psref, iv_psref_class);
+ error = if_ipsec_encap_attach(nvar);
+ if (error)
+ goto out;
+
+ /*
+ * (2-2) Create temporary "null" variant.
+ */
+ if_ipsec_copy_variant(nullvar, ovar);
+ if_ipsec_clear_config(nullvar);
+ psref_target_init(&nullvar->iv_psref, iv_psref_class);
+ membar_producer();
+ /*
+ * (2-3) Swap variant include its SPs.
+ */
+ error = if_ipsec_update_variant(sc, nvar, nullvar);
+ if (error) {
+ if_ipsec_encap_detach(nvar);
+ goto out;
+ }
+
+ mutex_exit(&sc->ipsec_lock);
+
+ /*
+ * (2-4) Cleanup last configurations.
+ */
+ if (if_ipsec_variant_is_configured(ovar))
+ if_ipsec_encap_detach(ovar);
+ encap_lock_exit();
+
+ if (osrc != NULL)
+ sockaddr_free(osrc);
+ if (odst != NULL)
+ sockaddr_free(odst);
+ kmem_free(ovar, sizeof(*ovar));
+ kmem_free(nullvar, sizeof(*nullvar));
+
+ return 0;
+
+out:
+ mutex_exit(&sc->ipsec_lock);
+ encap_lock_exit();
+
+ sockaddr_free(nsrc);
+ sockaddr_free(ndst);
+ kmem_free(nvar, sizeof(*nvar));
+ kmem_free(nullvar, sizeof(*nullvar));
+
+ return error;
+}
+
+/*
+ * Validate and delete ipsec(4) I/F configurations.
+ * (1) validate
+ * (1-1) Check current src and dst address pair are null,
+ * which means the ipsec(4) I/F is already done deletetunnel.
+ * (2) delete
+ * (2-1) Create variant for deleted status.
+ * (2-2) Create temporary "null" variant used to avoid to access
+ * dangling variant while SPs are deleted and added.
+ * NOTE:
+ * The contents of temporary "null" variant equal to the variant
+ * of (2-1), however two psref_target_destroy() synchronization
+ * points are necessary to avoid to access dangling variant
+ * while SPs are deleted and added. To implement that simply,
+ * we use the same manner as if_ipsec_set_tunnel(), that is,
+ * create extra "null" variant and use it temporarily.
+ * (2-3) Swap variant include its SPs.
+ * (2-4) Cleanup last configurations.
+ */
+static void
+if_ipsec_delete_tunnel(struct ifnet *ifp)
+{
+ struct ipsec_softc *sc = ifp->if_softc;
+ struct ipsec_variant *ovar, *nvar, *nullvar;
+ struct sockaddr *osrc, *odst;
+ int error;
+
+ error = encap_lock_enter();
+ if (error)
+ return;
+
+ nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
+ nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
+
+ mutex_enter(&sc->ipsec_lock);
+
+ ovar = sc->ipsec_var;
+ osrc = ovar->iv_psrc;
+ odst = ovar->iv_pdst;
+ /*
+ * (1-1) Check current src and dst address pair are null,
+ * which means the ipsec(4) I/F is already done deletetunnel.
+ */
+ if (osrc == NULL || odst == NULL) {
+ /* address pair not changed. */
+ mutex_exit(&sc->ipsec_lock);
+ encap_lock_exit();
+ kmem_free(nvar, sizeof(*nvar));
+ return;
+ }
+
+ /*
+ * (2-1) Create variant for deleted status.
+ */
+ if_ipsec_copy_variant(nvar, ovar);
+ if_ipsec_clear_config(nvar);
+ psref_target_init(&nvar->iv_psref, iv_psref_class);
+
+ /*
+ * (2-2) Create temporary "null" variant used to avoid to access
+ * dangling variant while SPs are deleted and added.
+ */
+ if_ipsec_copy_variant(nullvar, ovar);
+ if_ipsec_clear_config(nullvar);
+ psref_target_init(&nullvar->iv_psref, iv_psref_class);
+ membar_producer();
+ /*
+ * (2-3) Swap variant include its SPs.
+ */
+ /* if_ipsec_update_variant() does not fail when delete SP only. */
+ (void)if_ipsec_update_variant(sc, nvar, nullvar);
+
+ mutex_exit(&sc->ipsec_lock);
+
+ /*
+ * (2-4) Cleanup last configurations.
+ */
+ if (if_ipsec_variant_is_configured(ovar))
+ if_ipsec_encap_detach(ovar);
+ encap_lock_exit();
+
+ sockaddr_free(osrc);
+ sockaddr_free(odst);
+ kmem_free(ovar, sizeof(*ovar));
+ kmem_free(nullvar, sizeof(*nullvar));
+}
+
+/*
+ * Check IFF_NAT_T and IFF_FWD_IPV6 flags, therefore update SPs if needed.
+ * (1) check
+ * (1-1) Check flags are changed.
+ * (1-2) Check current src and dst address pair. If they are null,
+ * that means the ipsec(4) I/F is deletetunnel'ed, so it is
+ * not needed to update.
+ * (2) update
+ * (2-1) Create variant for new SPs.
+ * (2-2) Create temporary "null" variant used to avoid to access
+ * dangling variant while SPs are deleted and added.
+ * NOTE:
+ * There is the same problem as if_ipsec_delete_tunnel().
+ * (2-3) Swap variant include its SPs.
+ * (2-4) Cleanup unused configurations.
+ * NOTE: use the same encap_cookies.
+ */
+static int
+if_ipsec_ensure_flags(struct ifnet *ifp, short oflags)
+{
+ struct ipsec_softc *sc = ifp->if_softc;
+ struct ipsec_variant *ovar, *nvar, *nullvar;
+ int error;
+
+ /*
+ * (1) Check flags are changed.
+ */
+ if ((oflags & (IFF_NAT_T|IFF_FWD_IPV6)) ==
+ (ifp->if_flags & (IFF_NAT_T|IFF_FWD_IPV6)))
+ return 0; /* flags not changed. */
+
+ error = encap_lock_enter();
+ if (error)
+ return error;
+
+ nvar = kmem_zalloc(sizeof(*nvar), KM_SLEEP);
+ nullvar = kmem_zalloc(sizeof(*nullvar), KM_SLEEP);
+
+ mutex_enter(&sc->ipsec_lock);
+
+ ovar = sc->ipsec_var;
+ /*
+ * (1-2) Check current src and dst address pair.
+ */
+ if (if_ipsec_variant_is_unconfigured(ovar)) {
+ /* nothing to do */
+ mutex_exit(&sc->ipsec_lock);
+ return 0;
+ }
+
+ /*
+ * (2-1) Create variant for new SPs.
+ */
+ if_ipsec_copy_variant(nvar, ovar);
+ psref_target_init(&nvar->iv_psref, iv_psref_class);
+ /*
+ * (2-2) Create temporary "null" variant used to avoid to access
+ * dangling variant while SPs are deleted and added.
+ */
+ if_ipsec_copy_variant(nullvar, ovar);
+ if_ipsec_clear_config(nullvar);
+ psref_target_init(&nullvar->iv_psref, iv_psref_class);
+ membar_producer();
+ /*
+ * (2-3) Swap variant include its SPs.
+ */
+ error = if_ipsec_update_variant(sc, nvar, nullvar);
+
+ mutex_exit(&sc->ipsec_lock);
+ encap_lock_exit();
+
+ /*
+ * (2-4) Cleanup unused configurations.
+ */
+ if (!error)
+ kmem_free(ovar, sizeof(*ovar));
+ else
+ kmem_free(nvar, sizeof(*ovar));
+ kmem_free(nullvar, sizeof(*nullvar));
+
+ return error;
+}
+
+/*
+ * SPD management
+ */
+
+/*
+ * Share SP set with other NAT-T ipsec(4) I/F(s).
+ * Return 1, when "var" shares SP set.
+ * Return 0, when "var" cannot share SP set.
+ *
+ * NOTE:
+ * if_ipsec_share_sp() and if_ipsec_unshare_sp() would require global lock
+ * to exclude other ipsec(4) I/Fs set_tunnel/delete_tunnel. E.g. when ipsec0
+ * and ipsec1 can share SP set, running ipsec0's set_tunnel and ipsec1's
+ * set_tunnel causes race.
+ * Currently, (fortunately) encap_lock works as this global lock.
+ */
+static int
+if_ipsec_share_sp(struct ipsec_variant *var)
+{
+ struct ipsec_softc *sc = var->iv_softc;
+ struct ipsec_softc *sc2;
+ struct ipsec_variant *var2;
+ struct psref psref;
+
+ KASSERT(encap_lock_held());
+ KASSERT(var->iv_pdst != NULL && var->iv_pdst != NULL);
+
+ mutex_enter(&ipsec_softcs.lock);
+ LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
+ if (sc2 == sc)
+ continue;
+ var2 = if_ipsec_getref_variant(sc2, &psref);
+ if (if_ipsec_variant_is_unconfigured(var2)) {
+ if_ipsec_putref_variant(var2, &psref);
+ continue;
+ }
+ if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 ||
+ sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) {
+ if_ipsec_putref_variant(var2, &psref);
+ continue;
+ }
+
+ break;
+ }
+ mutex_exit(&ipsec_softcs.lock);
+ if (sc2 == NULL)
+ return 0; /* not shared */
+
+ IV_SP_IN(var) = IV_SP_IN(var2);
+ IV_SP_IN6(var) = IV_SP_IN6(var2);
+ IV_SP_OUT(var) = IV_SP_OUT(var2);
+ IV_SP_OUT6(var) = IV_SP_OUT6(var2);
+
+ if_ipsec_putref_variant(var2, &psref);
+ return 1; /* shared */
+}
+
+/*
+ * Unshare SP set with other NAT-T ipsec(4) I/F(s).
+ * Return 1, when "var" shared SP set, and then unshare them.
+ * Return 0, when "var" did not share SP set.
+ *
+ * NOTE:
+ * See if_ipsec_share_sp()'s note.
+ */
+static int
+if_ipsec_unshare_sp(struct ipsec_variant *var)
+{
+ struct ipsec_softc *sc = var->iv_softc;
+ struct ipsec_softc *sc2;
+ struct ipsec_variant *var2;
+ struct psref psref;
+
+ KASSERT(encap_lock_held());
+
+ if (!var->iv_pdst || !var->iv_psrc)
+ return 0;
+
+ mutex_enter(&ipsec_softcs.lock);
+ LIST_FOREACH(sc2, &ipsec_softcs.list, ipsec_list) {
+ if (sc2 == sc)
+ continue;
+ var2 = if_ipsec_getref_variant(sc2, &psref);
+ if (!var2->iv_pdst || !var2->iv_psrc) {
+ if_ipsec_putref_variant(var2, &psref);
+ continue;
+ }
+ if (sockaddr_cmp(var2->iv_pdst, var->iv_pdst) != 0 ||
+ sockaddr_cmp(var2->iv_psrc, var->iv_psrc) != 0) {
+ if_ipsec_putref_variant(var2, &psref);
+ continue;
+ }
+
+ break;
+ }
+ mutex_exit(&ipsec_softcs.lock);
+ if (sc2 == NULL)
+ return 0; /* not shared */
+
+ IV_SP_IN(var) = NULL;
+ IV_SP_IN6(var) = NULL;
+ IV_SP_OUT(var) = NULL;
+ IV_SP_OUT6(var) = NULL;
+ if_ipsec_putref_variant(var2, &psref);
+ return 1; /* shared */
+}
+
+static inline void
+if_ipsec_add_mbuf(struct mbuf *m0, void *data, size_t len)
+{
+ struct mbuf *m;
+
+ MGET(m, M_WAITOK | M_ZERO, MT_DATA);
+ m->m_len = PFKEY_ALIGN8(len);
+ m_copyback(m, 0, len, data);
+ m_cat(m0, m);
+}
+
+static inline void
+if_ipsec_add_pad(struct mbuf *m0, size_t len)
+{
+ struct mbuf *m;
+
+ if (len == 0)
+ return;
+
+ MGET(m, M_WAITOK | M_ZERO, MT_DATA);
+ m->m_len = len;
+ m_cat(m0, m);
+}
+
+static inline size_t
+if_ipsec_set_sadb_addr(struct sadb_address *saaddr, struct sockaddr *addr,
+ int proto, uint16_t exttype)
+{
+ size_t size;
+
+ KASSERT(saaddr != NULL);
+ KASSERT(addr != NULL);
+
+ size = sizeof(*saaddr) + PFKEY_ALIGN8(addr->sa_len);
+ saaddr->sadb_address_len = PFKEY_UNIT64(size);
+ saaddr->sadb_address_exttype = exttype;
+ saaddr->sadb_address_proto = proto;
+ switch (addr->sa_family) {
+#ifdef INET
+ case AF_INET:
+ saaddr->sadb_address_prefixlen = sizeof(struct in_addr) << 3;
+ break;
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6:
+ saaddr->sadb_address_prefixlen = sizeof(struct in6_addr) << 3;
+ break;
+#endif /* INET6 */
+ default:
+ log(LOG_DEBUG,
+ "%s: Invalid address family: %d.\n",
+ __func__, addr->sa_family);
+ break;
+ }
+ saaddr->sadb_address_reserved = 0;
+
+ return size;
+}
+
+static inline size_t
+if_ipsec_set_sadb_src(struct sadb_address *sasrc, struct sockaddr *src,
+ int proto)
+{
+
+ return if_ipsec_set_sadb_addr(sasrc, src, proto,
+ SADB_EXT_ADDRESS_SRC);
+}
+
+static inline size_t
+if_ipsec_set_sadb_dst(struct sadb_address *sadst, struct sockaddr *dst,
+ int proto)
+{
+
+ return if_ipsec_set_sadb_addr(sadst, dst, proto,
+ SADB_EXT_ADDRESS_DST);
+}
+
+static inline size_t
+if_ipsec_set_sadb_x_policy(struct sadb_x_policy *xpl,
+ struct sadb_x_ipsecrequest *xisr, uint16_t policy, uint8_t dir, uint32_t id,
+ uint8_t level)
+{
+ size_t size;
+
+ KASSERT(policy != IPSEC_POLICY_IPSEC || xisr != NULL);
+
+ size = sizeof(*xpl);
+ if (policy == IPSEC_POLICY_IPSEC) {
+ size += PFKEY_ALIGN8(sizeof(*xisr));
+ }
+ xpl->sadb_x_policy_len = PFKEY_UNIT64(size);
+ xpl->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
+ xpl->sadb_x_policy_type = policy;
+ xpl->sadb_x_policy_dir = dir;
+ xpl->sadb_x_policy_reserved = 0;
+ xpl->sadb_x_policy_id = id;
+ xpl->sadb_x_policy_reserved2 = 0;
+
+ if (policy == IPSEC_POLICY_IPSEC) {
+ xisr->sadb_x_ipsecrequest_len = PFKEY_ALIGN8(sizeof(*xisr));
+ xisr->sadb_x_ipsecrequest_proto = IPPROTO_ESP;
+ xisr->sadb_x_ipsecrequest_mode = IPSEC_MODE_TRANSPORT;
+ xisr->sadb_x_ipsecrequest_level = level;
+ xisr->sadb_x_ipsecrequest_reqid = key_newreqid();
+ }
+
+ return size;
+}
+
+static inline void
+if_ipsec_set_sadb_msg(struct sadb_msg *msg, uint16_t extlen, uint8_t msgtype)
+{
+
+ KASSERT(msg != NULL);
+
+ msg->sadb_msg_version = PF_KEY_V2;
+ msg->sadb_msg_type = msgtype;
+ msg->sadb_msg_errno = 0;
+ msg->sadb_msg_satype = SADB_SATYPE_UNSPEC;
+ msg->sadb_msg_len = PFKEY_UNIT64(sizeof(*msg)) + extlen;
+ msg->sadb_msg_reserved = 0;
+ msg->sadb_msg_seq = 0; /* XXXX */
+ msg->sadb_msg_pid = 0; /* XXXX */
+}
+
+static inline void
+if_ipsec_set_sadb_msg_add(struct sadb_msg *msg, uint16_t extlen)
+{
+
+ if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDADD);
+}
+
+static inline void
+if_ipsec_set_sadb_msg_del(struct sadb_msg *msg, uint16_t extlen)
+{
+
+ if_ipsec_set_sadb_msg(msg, extlen, SADB_X_SPDDELETE2);
+}
+
+static int
+if_ipsec_set_addr_port(struct sockaddr *addrport, struct sockaddr *addr,
+ in_port_t port)
+{
+ int error = 0;
+
+ sockaddr_copy(addrport, addr->sa_len, addr);
+
+ switch (addr->sa_family) {
+#ifdef INET
+ case AF_INET: {
+ struct sockaddr_in *sin = satosin(addrport);
+ sin->sin_port = htons(port);
+ break;
+ }
+#endif /* INET */
+#ifdef INET6
+ case AF_INET6: {
+ struct sockaddr_in6 *sin6 = satosin6(addrport);
+ sin6->sin6_port = htons(port);
+ break;
+ }
+#endif /* INET6 */
+ default:
+ log(LOG_DEBUG,
+ "%s: Invalid address family: %d.\n",
+ __func__, addr->sa_family);
+ error = EINVAL;
+ }
+
+ return error;
+}
+
+static struct secpolicy *
+if_ipsec_add_sp0(struct sockaddr *src, in_port_t sport,
+ struct sockaddr *dst, in_port_t dport,
+ int dir, int proto, int level, u_int policy)
+{
+ struct sadb_msg msg;
+ struct sadb_address xsrc, xdst;
+ struct sadb_x_policy xpl;
+ struct sadb_x_ipsecrequest xisr;
+ size_t size;
+ size_t padlen;
+ uint16_t ext_msg_len = 0;
+ struct mbuf *m;
+
+ memset(&msg, 0, sizeof(msg));
+ memset(&xsrc, 0, sizeof(xsrc));
+ memset(&xdst, 0, sizeof(xdst));
+ memset(&xpl, 0, sizeof(xpl));
+ memset(&xisr, 0, sizeof(xisr));
+
+ MGETHDR(m, M_WAITOK, MT_DATA);
+
+ size = if_ipsec_set_sadb_src(&xsrc, src, proto);
+ ext_msg_len += PFKEY_UNIT64(size);
+ size = if_ipsec_set_sadb_dst(&xdst, dst, proto);
+ ext_msg_len += PFKEY_UNIT64(size);
+ size = if_ipsec_set_sadb_x_policy(&xpl, &xisr, policy, dir, 0, level);
+ ext_msg_len += PFKEY_UNIT64(size);
+ if_ipsec_set_sadb_msg_add(&msg, ext_msg_len);
+
+ /* build PF_KEY message */
+
+ m->m_len = sizeof(msg);
+ m_copyback(m, 0, sizeof(msg), &msg);
+
+ if_ipsec_add_mbuf(m, &xsrc, sizeof(xsrc));
+ if (sport == 0) {
+ if_ipsec_add_mbuf(m, src, src->sa_len);
+ } else {
+ struct sockaddr addrport;
+
+ if_ipsec_set_addr_port(&addrport, src, sport);
+ if_ipsec_add_mbuf(m, &addrport, addrport.sa_len);
+ }
+ padlen = PFKEY_UNUNIT64(xsrc.sadb_address_len)
+ - (sizeof(xsrc) + PFKEY_ALIGN8(src->sa_len));
+ if_ipsec_add_pad(m, padlen);
+
+ if_ipsec_add_mbuf(m, &xdst, sizeof(xdst));
+ if (dport == 0) {
+ if_ipsec_add_mbuf(m, dst, dst->sa_len);
+ } else {
+ struct sockaddr addrport;
+
+ if_ipsec_set_addr_port(&addrport, dst, dport);
+ if_ipsec_add_mbuf(m, &addrport, addrport.sa_len);
+ }
+ padlen = PFKEY_UNUNIT64(xdst.sadb_address_len)
+ - (sizeof(xdst) + PFKEY_ALIGN8(dst->sa_len));
+ if_ipsec_add_pad(m, padlen);
+
+ if_ipsec_add_mbuf(m, &xpl, sizeof(xpl));
+ if (policy == IPSEC_POLICY_IPSEC)
+ if_ipsec_add_mbuf(m, &xisr, sizeof(xisr));
+
+ /* key_kpi_spdadd() has already done KEY_SP_REF(). */
+ return key_kpi_spdadd(m);
+}
+
+static int
+if_ipsec_add_sp(struct ipsec_variant *var,
+ struct sockaddr *src, in_port_t sport,
+ struct sockaddr *dst, in_port_t dport)
+{
+ struct ipsec_softc *sc = var->iv_softc;
+ int level;
+ u_int v6policy;
+
+ /*
+ * must delete sp before add it.
+ */
+ KASSERT(IV_SP_IN(var) == NULL);
+ KASSERT(IV_SP_OUT(var) == NULL);
+ KASSERT(IV_SP_IN6(var) == NULL);
+ KASSERT(IV_SP_OUT6(var) == NULL);
+
+ /*
+ * can be shared?
+ */
+ if (if_ipsec_share_sp(var))
+ return 0;
+
+ if (if_ipsec_nat_t(sc))
+ level = IPSEC_LEVEL_REQUIRE;
+ else
+ level = IPSEC_LEVEL_UNIQUE;
+
+ if (if_ipsec_fwd_ipv6(sc))
+ v6policy = IPSEC_POLICY_IPSEC;
+ else
+ v6policy = IPSEC_POLICY_DISCARD;
+
+ IV_SP_IN(var) = if_ipsec_add_sp0(dst, dport, src, sport,
+ IPSEC_DIR_INBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC);
+ if (IV_SP_IN(var) == NULL)
+ goto fail;
+ IV_SP_OUT(var) = if_ipsec_add_sp0(src, sport, dst, dport,
+ IPSEC_DIR_OUTBOUND, IPPROTO_IPIP, level, IPSEC_POLICY_IPSEC);
+ if (IV_SP_OUT(var) == NULL)
+ goto fail;
+ IV_SP_IN6(var) = if_ipsec_add_sp0(dst, dport, src, sport,
+ IPSEC_DIR_INBOUND, IPPROTO_IPV6, level, v6policy);
+ if (IV_SP_IN6(var) == NULL)
+ goto fail;
+ IV_SP_OUT6(var) = if_ipsec_add_sp0(src, sport, dst, dport,
+ IPSEC_DIR_OUTBOUND, IPPROTO_IPV6, level, v6policy);
+ if (IV_SP_OUT6(var) == NULL)
+ goto fail;
+
+ return 0;
+
+fail:
+ if (IV_SP_IN6(var) != NULL) {
+ if_ipsec_del_sp0(IV_SP_IN6(var));
+ IV_SP_IN6(var) = NULL;
+ }
+ if (IV_SP_OUT(var) != NULL) {
+ if_ipsec_del_sp0(IV_SP_OUT(var));
+ IV_SP_OUT(var) = NULL;
+ }
+ if (IV_SP_IN(var) != NULL) {
+ if_ipsec_del_sp0(IV_SP_IN(var));
+ IV_SP_IN(var) = NULL;
+ }
+
+ return EEXIST;
+}
+
+static int
+if_ipsec_del_sp0(struct secpolicy *sp)
+{
+ struct sadb_msg msg;
+ struct sadb_x_policy xpl;
+ size_t size;
+ uint16_t ext_msg_len = 0;
+ int error;
+ struct mbuf *m;
+
+ if (sp == NULL)
+ return 0;
+
+ memset(&msg, 0, sizeof(msg));
+ memset(&xpl, 0, sizeof(xpl));
+
+ MGETHDR(m, M_WAITOK, MT_DATA);
+
+ size = if_ipsec_set_sadb_x_policy(&xpl, NULL, 0, 0, sp->id, 0);
+ ext_msg_len += PFKEY_UNIT64(size);
+
+ if_ipsec_set_sadb_msg_del(&msg, ext_msg_len);
+
+ m->m_len = sizeof(msg);
+ m_copyback(m, 0, sizeof(msg), &msg);
+
+ if_ipsec_add_mbuf(m, &xpl, sizeof(xpl));
+
+ /* unreference correspond to key_kpi_spdadd(). */
+ KEY_SP_UNREF(&sp);
+ error = key_kpi_spddelete2(m);
+ if (error != 0) {
+ log(LOG_ERR, "%s: cannot delete SP(ID=%u) (error=%d).\n",
+ __func__, sp->id, error);
+ }
+ return error;
+}
+
+static void
+if_ipsec_del_sp(struct ipsec_variant *var)
+{
+
+ /* are the SPs shared? */
+ if (if_ipsec_unshare_sp(var))
+ return;
+
+ (void)if_ipsec_del_sp0(IV_SP_OUT(var));
+ (void)if_ipsec_del_sp0(IV_SP_IN(var));
+ (void)if_ipsec_del_sp0(IV_SP_OUT6(var));
+ (void)if_ipsec_del_sp0(IV_SP_IN6(var));
+ IV_SP_IN(var) = NULL;
+ IV_SP_IN6(var) = NULL;
+ IV_SP_OUT(var) = NULL;
+ IV_SP_OUT6(var) = NULL;
+}
+
+static int
+if_ipsec_replace_sp(struct ipsec_softc *sc, struct ipsec_variant *ovar,
+ struct ipsec_variant *nvar)
+{
+ in_port_t src_port = 0;
+ in_port_t dst_port = 0;
+ struct sockaddr *src;
+ struct sockaddr *dst;
+ int error = 0;
+
+ KASSERT(mutex_owned(&sc->ipsec_lock));
+
+ if_ipsec_del_sp(ovar);
+
+ src = nvar->iv_psrc;
+ dst = nvar->iv_pdst;
+ if (if_ipsec_nat_t(sc)) {
+ /* NAT-T enabled */
+ src_port = nvar->iv_sport;
+ dst_port = nvar->iv_dport;
+ }
+ if (src && dst)
+ error = if_ipsec_add_sp(nvar, src, src_port, dst, dst_port);
+
+ return error;
+}
+
+/*
+ * ipsec_variant and its SPs update API.
+ *
+ * Assumption:
+ * reader side dereferences sc->ipsec_var in reader critical section only,
+ * that is, all of reader sides do not reader the sc->ipsec_var after
+ * pserialize_perform().
+ */
+static int
+if_ipsec_update_variant(struct ipsec_softc *sc, struct ipsec_variant *nvar,
+ struct ipsec_variant *nullvar)
+{
+ struct ifnet *ifp = &sc->ipsec_if;
+ struct ipsec_variant *ovar = sc->ipsec_var;
+ int error;
+
+ KASSERT(mutex_owned(&sc->ipsec_lock));
+
+ /*
+ * To keep consistency between ipsec(4) I/F settings and SPs,
+ * we stop packet processing while replacing SPs, that is, we set
+ * "null" config variant to sc->ipsec_var.
+ */
+ sc->ipsec_var = nullvar;
+ pserialize_perform(ipsec_psz);
+ psref_target_destroy(&ovar->iv_psref, iv_psref_class);
+
+ error = if_ipsec_replace_sp(sc, ovar, nvar);
+ if (!error)
+ sc->ipsec_var = nvar;
+ else {
+ sc->ipsec_var = ovar; /* rollback */
+ psref_target_init(&ovar->iv_psref, iv_psref_class);
+ }
+
+ pserialize_perform(ipsec_psz);
+ psref_target_destroy(&nullvar->iv_psref, iv_psref_class);
+
+ if (if_ipsec_variant_is_configured(sc->ipsec_var))
+ ifp->if_flags |= IFF_RUNNING;
+ else
+ ifp->if_flags &= ~IFF_RUNNING;
+
+ return error;
+}
Index: src/sys/net/if_ipsec.h
diff -u /dev/null src/sys/net/if_ipsec.h:1.1
--- /dev/null Wed Jan 10 10:56:31 2018
+++ src/sys/net/if_ipsec.h Wed Jan 10 10:56:30 2018
@@ -0,0 +1,231 @@
+/* $NetBSD: if_ipsec.h,v 1.1 2018/01/10 10:56:30 knakahara Exp $ */
+
+/*
+ * Copyright (c) 2017 Internet Initiative Japan Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * if_ipsec.h
+ */
+
+#ifndef _NET_IF_IPSEC_H_
+#define _NET_IF_IPSEC_H_
+
+#include <sys/queue.h>
+#ifdef _KERNEL
+#include <sys/psref.h>
+#endif
+
+#ifdef _KERNEL_OPT
+#include "opt_inet.h"
+#endif
+
+#include <netinet/in.h>
+#include <netipsec/ipsec.h>
+
+#ifdef _KERNEL
+/*
+ * This macro controls the upper limitation on nesting of ipsec tunnels.
+ * Since, setting a large value to this macro with a careless configuration
+ * may introduce system crash, we don't allow any nestings by default.
+ * If you need to configure nested ipsec tunnels, you can define this macro
+ * in your kernel configuration file. However, if you do so, please be
+ * careful to configure the tunnels so that it won't make a loop.
+ */
+#ifndef MAX_IPSEC_NEST
+#define MAX_IPSEC_NEST 1
+#endif
+
+#define IFF_NAT_T IFF_LINK0 /* enable NAT-T */
+#define IFF_ECN IFF_LINK1 /* enable ECN */
+#define IFF_FWD_IPV6 IFF_LINK2 /* foward IPv6 packet */
+
+extern struct psref_class *iv_psref_class;
+
+struct ipsec_variant {
+ struct ipsec_softc *iv_softc;
+
+ struct sockaddr *iv_psrc; /* Physical src addr */
+ struct sockaddr *iv_pdst; /* Physical dst addr */
+ const struct encaptab *iv_encap_cookie4;
+ const struct encaptab *iv_encap_cookie6;
+ int (*iv_output)(struct ipsec_variant *, int, struct mbuf *);
+ in_port_t iv_sport;
+ in_port_t iv_dport;
+
+ /*
+ * IPsec SPs
+ * Don't change directly, use if_ipsec_replace_sp().
+ */
+ struct secpolicy *iv_sp[IPSEC_DIR_MAX];
+ struct secpolicy *iv_sp6[IPSEC_DIR_MAX];
+
+ struct psref_target iv_psref;
+};
+
+struct ipsec_ro {
+ struct route ir_ro;
+ kmutex_t ir_lock;
+};
+
+struct ipsec_softc {
+ struct ifnet ipsec_if; /* common area - must be at the top */
+ percpu_t *ipsec_ro_percpu; /* struct ipsec_ro */
+ struct ipsec_variant *ipsec_var; /*
+ * reader must use ipsec_getref_variant()
+ * instead of direct dereference.
+ */
+ kmutex_t ipsec_lock; /* writer lock for ipsec_var */
+
+ LIST_ENTRY(ipsec_softc) ipsec_list; /* list of all gifs */
+};
+
+#define IPSEC_MTU (1280) /* Default MTU */
+#define IPSEC_MTU_MIN (1280) /* Minimum MTU */
+#define IPSEC_MTU_MAX (8192) /* Maximum MTU */
+
+#define IV_SP_IN(x) ((x)->iv_sp[IPSEC_DIR_INBOUND])
+#define IV_SP_IN6(x) ((x)->iv_sp6[IPSEC_DIR_INBOUND])
+#define IV_SP_OUT(x) ((x)->iv_sp[IPSEC_DIR_OUTBOUND])
+#define IV_SP_OUT6(x) ((x)->iv_sp6[IPSEC_DIR_OUTBOUND])
+
+static inline bool
+if_ipsec_variant_is_configured(struct ipsec_variant *var)
+{
+
+ return (var->iv_psrc != NULL && var->iv_pdst != NULL);
+}
+
+static inline bool
+if_ipsec_variant_is_unconfigured(struct ipsec_variant *var)
+{
+
+ return (var->iv_psrc == NULL || var->iv_pdst == NULL);
+}
+
+static inline void
+if_ipsec_copy_variant(struct ipsec_variant *dst, struct ipsec_variant *src)
+{
+
+ dst->iv_softc = src->iv_softc;
+ dst->iv_psrc = src->iv_psrc;
+ dst->iv_pdst = src->iv_pdst;
+ dst->iv_encap_cookie4 = src->iv_encap_cookie4;
+ dst->iv_encap_cookie6 = src->iv_encap_cookie6;
+ dst->iv_output = src->iv_output;
+ dst->iv_sport = src->iv_sport;
+ dst->iv_dport = src->iv_dport;
+}
+
+static inline void
+if_ipsec_clear_config(struct ipsec_variant *var)
+{
+
+ var->iv_psrc = NULL;
+ var->iv_pdst = NULL;
+ var->iv_encap_cookie4 = NULL;
+ var->iv_encap_cookie6 = NULL;
+ var->iv_output = NULL;
+ var->iv_sport = 0;
+ var->iv_dport = 0;
+}
+
+/*
+ * Get ipsec_variant from ipsec_softc.
+ *
+ * Never return NULL by contract.
+ * ipsec_variant itself is protected not to be freed by lv_psref.
+ * Once a reader dereference sc->sc_var by this API, the reader must not
+ * re-dereference from sc->sc_var.
+ */
+static inline struct ipsec_variant *
+if_ipsec_getref_variant(struct ipsec_softc *sc, struct psref *psref)
+{
+ struct ipsec_variant *var;
+ int s;
+
+ s = pserialize_read_enter();
+ var = sc->ipsec_var;
+ KASSERT(var != NULL);
+ membar_datadep_consumer();
+ psref_acquire(psref, &var->iv_psref, iv_psref_class);
+ pserialize_read_exit(s);
+
+ return var;
+}
+
+static inline void
+if_ipsec_putref_variant(struct ipsec_variant *var, struct psref *psref)
+{
+
+ KASSERT(var != NULL);
+ psref_release(psref, &var->iv_psref, iv_psref_class);
+}
+
+static inline bool
+if_ipsec_heldref_variant(struct ipsec_variant *var)
+{
+
+ return psref_held(&var->iv_psref, iv_psref_class);
+}
+
+void ipsecifattach(int);
+int if_ipsec_encap_func(struct mbuf *, int, int, void *);
+void if_ipsec_input(struct mbuf *, int, struct ifnet *);
+int if_ipsec_output(struct ifnet *, struct mbuf *,
+ const struct sockaddr *, const struct rtentry *);
+int if_ipsec_ioctl(struct ifnet *, u_long, void *);
+#endif /* _KERNEL */
+
+/*
+ * sharing SP note:
+ * When ipsec(4) I/Fs use NAT-T, they can use the same src and dst address pair
+ * as long as they use different port. Howerver, SPD cannot have the SPs which
+ * use the same src and dst address pair and the same policy. So, such ipsec(4)
+ * I/Fs share the same SPs.
+ * To avoid race between ipsec0 set_tunnel/delete_tunnel and ipsec1
+ * t_tunnel/delete_tunnel, any global lock is needed. See also the following
+ * locking notes.
+ *
+ * Locking notes:
+ * + ipsec_softcs.list is protected by ipsec_softcs.lock (an adaptive mutex)
+ * ipsec_softc_list is list of all ipsec_softcs. It is used by ioctl
+ * context only.
+ * + ipsec_softc->ipsec_var is protected by
+ * - ipsec_softc->ipsec_lock (an adaptive mutex) for writer
+ * - ipsec_var->iv_psref for reader
+ * ipsec_softc->ipsec_var is used for variant values while the ipsec tunnel
+ * exists.
+ * + struct ipsec_ro->ir_ro is protected by struct ipsec_ro->ir_lock.
+ * This lock is required to exclude softnet/0 lwp(such as output
+ * processing softint) and processing lwp(such as DAD timer processing).
+ * + if_ipsec_share_sp() and if_ipsec_unshare_sp() operations are serialized by
+ * encap_lock
+ * This only need to be global lock, need not to be encap_lock.
+ *
+ * Locking order:
+ * - encap_lock => ipsec_softc->ipsec_lock => ipsec_softcs.lock
+ */
+#endif /* _NET_IF_IPSEC_H_ */
Index: src/sys/netipsec/ipsecif.c
diff -u /dev/null src/sys/netipsec/ipsecif.c:1.1
--- /dev/null Wed Jan 10 10:56:31 2018
+++ src/sys/netipsec/ipsecif.c Wed Jan 10 10:56:30 2018
@@ -0,0 +1,933 @@
+/* $NetBSD: ipsecif.c,v 1.1 2018/01/10 10:56:30 knakahara Exp $ */
+
+/*
+ * Copyright (c) 2017 Internet Initiative Japan Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__KERNEL_RCSID(0, "$NetBSD: ipsecif.c,v 1.1 2018/01/10 10:56:30 knakahara Exp $");
+
+#ifdef _KERNEL_OPT
+#include "opt_inet.h"
+#include "opt_ipsec.h"
+#endif
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/mbuf.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/syslog.h>
+#include <sys/kernel.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/ip_var.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_encap.h>
+#include <netinet/ip_ecn.h>
+#include <netinet/ip_private.h>
+#include <netinet/udp.h>
+
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#include <netinet6/ip6_private.h>
+#include <netinet6/in6_var.h>
+#include <netinet6/ip6protosw.h> /* for struct ip6ctlparam */
+#include <netinet/ip_ecn.h>
+#endif
+
+#include <netipsec/key.h>
+#include <netipsec/ipsecif.h>
+
+#include <net/if_ipsec.h>
+
+static void ipsecif4_input(struct mbuf *, int, int, void *);
+static int ipsecif4_output(struct ipsec_variant *, int, struct mbuf *);
+static int ipsecif4_filter4(const struct ip *, struct ipsec_variant *,
+ struct ifnet *);
+
+#ifdef INET6
+static int ipsecif6_input(struct mbuf **, int *, int, void *);
+static int ipsecif6_output(struct ipsec_variant *, int, struct mbuf *);
+static int ipsecif6_filter6(const struct ip6_hdr *, struct ipsec_variant *,
+ struct ifnet *);
+#endif
+
+static int ip_ipsec_ttl = IPSEC_TTL;
+static int ip_ipsec_copy_tos = 0;
+#ifdef INET6
+static int ip6_ipsec_hlim = IPSEC_HLIM;
+static int ip6_ipsec_pmtu = 0; /* XXX: per interface configuration?? */
+static int ip6_ipsec_copy_tos = 0;
+#endif
+
+struct encapsw ipsecif4_encapsw = {
+ .encapsw4 = {
+ .pr_input = ipsecif4_input,
+ .pr_ctlinput = NULL,
+ }
+};
+
+#ifdef INET6
+static const struct encapsw ipsecif6_encapsw;
+#endif
+
+static struct mbuf *
+ipsecif4_prepend_hdr(struct ipsec_variant *var, struct mbuf *m,
+ uint8_t proto, uint8_t tos)
+{
+ struct ip *ip;
+ struct sockaddr_in *src, *dst;
+
+ src = satosin(var->iv_psrc);
+ dst = satosin(var->iv_pdst);
+
+ if (in_nullhost(src->sin_addr) || in_nullhost(src->sin_addr) ||
+ src->sin_addr.s_addr == INADDR_BROADCAST ||
+ dst->sin_addr.s_addr == INADDR_BROADCAST) {
+ m_freem(m);
+ return NULL;
+ }
+ m->m_flags &= ~M_BCAST;
+
+ if (IN_MULTICAST(src->sin_addr.s_addr) ||
+ IN_MULTICAST(dst->sin_addr.s_addr)) {
+ m_freem(m);
+ return NULL;
+ }
+
+ M_PREPEND(m, sizeof(struct ip), M_DONTWAIT);
+ if (m && M_UNWRITABLE(m, sizeof(struct ip)))
+ m = m_pullup(m, sizeof(struct ip));
+ if (m == NULL)
+ return NULL;
+
+ ip = mtod(m, struct ip *);
+ ip->ip_v = IPVERSION;
+ ip->ip_off = htons(0);
+ ip->ip_id = 0;
+ ip->ip_hl = sizeof(*ip) >> 2;
+ if (ip_ipsec_copy_tos)
+ ip->ip_tos = tos;
+ else
+ ip->ip_tos = 0;
+ ip->ip_sum = 0;
+ ip->ip_src = src->sin_addr;
+ ip->ip_dst = dst->sin_addr;
+ ip->ip_p = proto;
+ ip->ip_ttl = ip_ipsec_ttl;
+ ip->ip_len = htons(m->m_pkthdr.len);
+#ifndef IPSEC_TX_TOS_CLEAR
+ struct ifnet *ifp = &var->iv_softc->ipsec_if;
+ if (ifp->if_flags & IFF_ECN)
+ ip_ecn_ingress(ECN_ALLOWED, &ip->ip_tos, &tos);
+ else
+ ip_ecn_ingress(ECN_NOCARE, &ip->ip_tos, &tos);
+#endif
+
+ return m;
+}
+
+static int
+ipsecif4_needfrag(struct mbuf *m, struct ipsecrequest *isr)
+{
+ struct ip ip0;
+ struct ip *ip;
+ int mtu;
+ struct secasvar *sav;
+
+ sav = key_lookup_sa_bysaidx(&isr->saidx);
+ if (sav == NULL)
+ return 0;
+
+ if (!(sav->natt_type & UDP_ENCAP_ESPINUDP) &&
+ !(sav->natt_type & UDP_ENCAP_ESPINUDP_NON_IKE)) {
+ mtu = 0;
+ goto out;
+ }
+
+ if (m->m_len < sizeof(struct ip)) {
+ m_copydata(m, 0, sizeof(ip0), &ip0);
+ ip = &ip0;
+
+ } else {
+ ip = mtod(m, struct ip *);
+ }
+ mtu = sav->esp_frag;
+ if (ntohs(ip->ip_len) <= mtu)
+ mtu = 0;
+
+out:
+ KEY_SA_UNREF(&sav);
+ return mtu;
+}
+
+static struct mbuf *
+ipsecif4_flowinfo(struct mbuf *m, int family, int *proto0, u_int8_t *tos0)
+{
+ const struct ip *ip;
+ int proto;
+ int tos;
+
+ KASSERT(proto0 != NULL);
+ KASSERT(tos0 != NULL);
+
+ switch (family) {
+ case AF_INET:
+ proto = IPPROTO_IPV4;
+ if (m->m_len < sizeof(*ip)) {
+ m = m_pullup(m, sizeof(*ip));
+ if (!m) {
+ *tos0 = 0;
+ *proto0 = 0;
+ return NULL;
+ }
+ }
+ ip = mtod(m, const struct ip *);
+ tos = ip->ip_tos;
+ /* TODO: support ALTQ for innner packet */
+ break;
+#ifdef INET6
+ case AF_INET6: {
+ const struct ip6_hdr *ip6;
+ proto = IPPROTO_IPV6;
+ if (m->m_len < sizeof(*ip6)) {
+ m = m_pullup(m, sizeof(*ip6));
+ if (!m) {
+ *tos0 = 0;
+ *proto0 = 0;
+ return NULL;
+ }
+ }
+ ip6 = mtod(m, const struct ip6_hdr *);
+ tos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+ /* TODO: support ALTQ for innner packet */
+ break;
+ }
+#endif /* INET6 */
+ default:
+ *tos0 = 0;
+ *proto0 = 0;
+ return NULL;
+ }
+
+ *proto0 = proto;
+ *tos0 = tos;
+ return m;
+}
+
+static int
+ipsecif4_fragout(struct ipsec_variant *var, int family, struct mbuf *m, int mtu)
+{
+ struct ifnet *ifp = &var->iv_softc->ipsec_if;
+ struct mbuf *next;
+ struct m_tag *mtag;
+ int error;
+
+ KASSERT(if_ipsec_heldref_variant(var));
+
+ mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL);
+ if (mtag)
+ m_tag_delete(m, mtag);
+
+ error = ip_fragment(m, ifp, mtu);
+ if (error)
+ return error;
+
+ for (error = 0; m; m = next) {
+ next = m->m_nextpkt;
+ m->m_next = NULL;
+ if (error) {
+ m_freem(m);
+ continue;
+ }
+
+ error = ipsecif4_output(var, family, m);
+ }
+ if (error == 0)
+ IP_STATINC(IP_STAT_FRAGMENTED);
+
+ return error;
+}
+
+int
+ipsecif4_encap_func(struct mbuf *m, struct ip *ip, struct ipsec_variant *var)
+{
+ struct m_tag *mtag;
+ struct sockaddr_in *src, *dst;
+ u_int16_t src_port = 0;
+ u_int16_t dst_port = 0;
+
+ KASSERT(var != NULL);
+
+ src = satosin(var->iv_psrc);
+ dst = satosin(var->iv_pdst);
+ mtag = m_tag_find(m, PACKET_TAG_IPSEC_NAT_T_PORTS, NULL);
+ if (mtag) {
+ u_int16_t *ports;
+
+ ports = (u_int16_t *)(mtag + 1);
+ src_port = ports[0];
+ dst_port = ports[1];
+ }
+
+ /* address match */
+ if (src->sin_addr.s_addr != ip->ip_dst.s_addr ||
+ dst->sin_addr.s_addr != ip->ip_src.s_addr)
+ return 0;
+
+ /* UDP encap? */
+ if (mtag == NULL && var->iv_sport == 0 && var->iv_dport == 0)
+ goto match;
+
+ /* port match */
+ if (src_port != var->iv_dport ||
+ dst_port != var->iv_sport) {
+#ifdef DEBUG
+ printf("%s: port mismatch: pkt(%u, %u), if(%u, %u)\n",
+ __func__, ntohs(src_port), ntohs(dst_port),
+ ntohs(var->iv_sport), ntohs(var->iv_dport));
+#endif
+ return 0;
+ }
+
+match:
+ /*
+ * hide NAT-T information from encapsulated traffics.
+ * they don't know about IPsec.
+ */
+ if (mtag)
+ m_tag_delete(m, mtag);
+ return sizeof(src->sin_addr) + sizeof(dst->sin_addr);
+}
+
+static int
+ipsecif4_output(struct ipsec_variant *var, int family, struct mbuf *m)
+{
+ struct secpolicy *sp = NULL;
+ u_int8_t tos;
+ int proto;
+ int error;
+ int mtu;
+ u_long sa_mtu = 0;
+
+ KASSERT(if_ipsec_heldref_variant(var));
+ KASSERT(if_ipsec_variant_is_configured(var));
+ KASSERT(var->iv_psrc->sa_family == AF_INET);
+ KASSERT(var->iv_pdst->sa_family == AF_INET);
+
+ sp = IV_SP_OUT(var);
+ KASSERT(sp != NULL);
+ /*
+ * The SPs in ipsec_variant are prevented from freed by
+ * ipsec_variant->iv_psref. So, KEY_SP_REF() is unnecessary here.
+ */
+
+ KASSERT(sp->policy != IPSEC_POLICY_NONE);
+ KASSERT(sp->policy != IPSEC_POLICY_ENTRUST);
+ KASSERT(sp->policy != IPSEC_POLICY_BYPASS);
+ if(sp->policy != IPSEC_POLICY_IPSEC) {
+ struct ifnet *ifp = &var->iv_softc->ipsec_if;
+ m_freem(m);
+ IF_DROP(&ifp->if_snd);
+ return 0;
+ }
+
+ /* get flowinfo */
+ m = ipsecif4_flowinfo(m, family, &proto, &tos);
+ if (m == NULL) {
+ error = ENETUNREACH;
+ goto done;
+ }
+
+ /* prepend new IP header */
+ m = ipsecif4_prepend_hdr(var, m, proto, tos);
+ if (m == NULL) {
+ error = ENETUNREACH;
+ goto done;
+ }
+
+ /*
+ * Normal netipsec's NAT-T fragmentation is done in ip_output().
+ * See "natt_frag" processing.
+ * However, ipsec(4) interface's one is not done in the same way,
+ * so we must do NAT-T fragmentation by own code.
+ */
+ /* NAT-T ESP fragmentation */
+ mtu = ipsecif4_needfrag(m, sp->req);
+ if (mtu > 0)
+ return ipsecif4_fragout(var, family, m, mtu);
+
+ /* IPsec output */
+ IP_STATINC(IP_STAT_LOCALOUT);
+ error = ipsec4_process_packet(m, sp->req, &sa_mtu);
+ if (error == ENOENT)
+ error = 0;
+ /*
+ * frangmentation is already done in ipsecif4_fragout(),
+ * so ipsec4_process_packet() must not do fragmentation here.
+ */
+ KASSERT(error != 0 || sa_mtu == 0);
+
+done:
+ return error;
+}
+
+#ifdef INET6
+static int
+ipsecif6_output(struct ipsec_variant *var, int family, struct mbuf *m)
+{
+ struct ifnet *ifp = &var->iv_softc->ipsec_if;
+ struct ipsec_softc *sc = ifp->if_softc;
+ struct ipsec_ro *iro;
+ struct rtentry *rt;
+ struct sockaddr_in6 *sin6_src;
+ struct sockaddr_in6 *sin6_dst;
+ struct ip6_hdr *ip6;
+ int proto, error;
+ u_int8_t itos, otos;
+ union {
+ struct sockaddr dst;
+ struct sockaddr_in6 dst6;
+ } u;
+
+ KASSERT(if_ipsec_heldref_variant(var));
+ KASSERT(if_ipsec_variant_is_configured(var));
+
+ sin6_src = satosin6(var->iv_psrc);
+ sin6_dst = satosin6(var->iv_pdst);
+
+ KASSERT(sin6_src->sin6_family == AF_INET6);
+ KASSERT(sin6_dst->sin6_family == AF_INET6);
+
+ switch (family) {
+#ifdef INET
+ case AF_INET:
+ {
+ struct ip *ip;
+
+ proto = IPPROTO_IPV4;
+ if (m->m_len < sizeof(*ip)) {
+ m = m_pullup(m, sizeof(*ip));
+ if (!m)
+ return ENOBUFS;
+ }
+ ip = mtod(m, struct ip *);
+ itos = ip->ip_tos;
+ /*
+ * TODO:
+ *support ALTQ for innner packet
+ */
+ break;
+ }
+#endif /* INET */
+ case AF_INET6:
+ {
+ struct ip6_hdr *xip6;
+ proto = IPPROTO_IPV6;
+ if (m->m_len < sizeof(*xip6)) {
+ m = m_pullup(m, sizeof(*xip6));
+ if (!m)
+ return ENOBUFS;
+ }
+ xip6 = mtod(m, struct ip6_hdr *);
+ itos = (ntohl(xip6->ip6_flow) >> 20) & 0xff;
+ /* TODO:
+ * support ALTQ for innner packet
+ */
+ break;
+ }
+ default:
+ m_freem(m);
+ return EAFNOSUPPORT;
+ }
+
+ /* prepend new IP header */
+ M_PREPEND(m, sizeof(struct ip6_hdr), M_DONTWAIT);
+ if (m && M_UNWRITABLE(m, sizeof(struct ip6_hdr)))
+ m = m_pullup(m, sizeof(struct ip6_hdr));
+ if (m == NULL)
+ return ENOBUFS;
+
+ ip6 = mtod(m, struct ip6_hdr *);
+ ip6->ip6_flow = 0;
+ ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
+ ip6->ip6_vfc |= IPV6_VERSION;
+ ip6->ip6_plen = htons((u_short)m->m_pkthdr.len);
+ ip6->ip6_nxt = proto;
+ ip6->ip6_hlim = ip6_ipsec_hlim;
+ ip6->ip6_src = sin6_src->sin6_addr;
+ /* bidirectional configured tunnel mode */
+ if (!IN6_IS_ADDR_UNSPECIFIED(&sin6_dst->sin6_addr)) {
+ ip6->ip6_dst = sin6_dst->sin6_addr;
+ } else {
+ m_freem(m);
+ return ENETUNREACH;
+ }
+#ifndef IPSEC_TX_TOS_CLEAR
+ if (ifp->if_flags & IFF_ECN)
+ ip_ecn_ingress(ECN_ALLOWED, &otos, &itos);
+ else
+ ip_ecn_ingress(ECN_NOCARE, &otos, &itos);
+
+ if (!ip6_ipsec_copy_tos)
+ otos = 0;
+#else
+ if (ip6_ipsec_copy_tos)
+ otos = itos;
+ else
+ otos = 0;
+#endif
+ ip6->ip6_flow &= ~ntohl(0xff00000);
+ ip6->ip6_flow |= htonl((u_int32_t)otos << 20);
+
+ sockaddr_in6_init(&u.dst6, &sin6_dst->sin6_addr, 0, 0, 0);
+
+ iro = percpu_getref(sc->ipsec_ro_percpu);
+ mutex_enter(&iro->ir_lock);
+ if ((rt = rtcache_lookup(&iro->ir_ro, &u.dst)) == NULL) {
+ mutex_exit(&iro->ir_lock);
+ percpu_putref(sc->ipsec_ro_percpu);
+ m_freem(m);
+ return ENETUNREACH;
+ }
+
+ if (rt->rt_ifp == ifp) {
+ rtcache_unref(rt, &iro->ir_ro);
+ rtcache_free(&iro->ir_ro);
+ mutex_exit(&iro->ir_lock);
+ percpu_putref(sc->ipsec_ro_percpu);
+ m_freem(m);
+ return ENETUNREACH;
+ }
+ rtcache_unref(rt, &iro->ir_ro);
+
+ /*
+ * force fragmentation to minimum MTU, to avoid path MTU discovery.
+ * it is too painful to ask for resend of inner packet, to achieve
+ * path MTU discovery for encapsulated packets.
+ */
+ error = ip6_output(m, 0, &iro->ir_ro,
+ ip6_ipsec_pmtu ? 0 : IPV6_MINMTU, 0, NULL, NULL);
+ if (error)
+ rtcache_free(&iro->ir_ro);
+
+ mutex_exit(&iro->ir_lock);
+ percpu_putref(sc->ipsec_ro_percpu);
+
+ return error;
+}
+#endif /* INET6 */
+
+static void
+ipsecif4_input(struct mbuf *m, int off, int proto, void *eparg)
+{
+ struct ifnet *ipsecp;
+ struct ipsec_softc *sc = eparg;
+ struct ipsec_variant *var;
+ const struct ip *ip;
+ int af;
+#ifndef IPSEC_TX_TOS_CLEAR
+ u_int8_t otos;
+#endif
+ struct psref psref_rcvif;
+ struct psref psref_var;
+ struct ifnet *rcvif;
+
+ KASSERT(sc != NULL);
+
+ ipsecp = &sc->ipsec_if;
+ if ((ipsecp->if_flags & IFF_UP) == 0) {
+ m_freem(m);
+ ip_statinc(IP_STAT_NOIPSEC);
+ return;
+ }
+
+ var = if_ipsec_getref_variant(sc, &psref_var);
+ if (if_ipsec_variant_is_unconfigured(var)) {
+ if_ipsec_putref_variant(var, &psref_var);
+ m_freem(m);
+ ip_statinc(IP_STAT_NOIPSEC);
+ return;
+ }
+
+ ip = mtod(m, const struct ip *);
+
+ rcvif = m_get_rcvif_psref(m, &psref_rcvif);
+ if (rcvif == NULL || !ipsecif4_filter4(ip, var, rcvif)) {
+ m_put_rcvif_psref(rcvif, &psref_rcvif);
+ if_ipsec_putref_variant(var, &psref_var);
+ m_freem(m);
+ ip_statinc(IP_STAT_NOIPSEC);
+ return;
+ }
+ m_put_rcvif_psref(rcvif, &psref_rcvif);
+ if_ipsec_putref_variant(var, &psref_var);
+#ifndef IPSEC_TX_TOS_CLEAR
+ otos = ip->ip_tos;
+#endif
+ m_adj(m, off);
+
+ switch (proto) {
+ case IPPROTO_IPV4:
+ {
+ struct ip *xip;
+ af = AF_INET;
+ if (M_UNWRITABLE(m, sizeof(*xip))) {
+ m = m_pullup(m, sizeof(*xip));
+ if (!m)
+ return;
+ }
+ xip = mtod(m, struct ip *);
+#ifndef IPSEC_TX_TOS_CLEAR
+ if (ipsecp->if_flags & IFF_ECN)
+ ip_ecn_egress(ECN_ALLOWED, &otos, &xip->ip_tos);
+ else
+ ip_ecn_egress(ECN_NOCARE, &otos, &xip->ip_tos);
+#endif
+ break;
+ }
+#ifdef INET6
+ case IPPROTO_IPV6:
+ {
+ struct ip6_hdr *ip6;
+ u_int8_t itos;
+ af = AF_INET6;
+ if (M_UNWRITABLE(m, sizeof(*ip6))) {
+ m = m_pullup(m, sizeof(*ip6));
+ if (!m)
+ return;
+ }
+ ip6 = mtod(m, struct ip6_hdr *);
+ itos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
+#ifndef IPSEC_TX_TOS_CLEAR
+ if (ipsecp->if_flags & IFF_ECN)
+ ip_ecn_egress(ECN_ALLOWED, &otos, &itos);
+ else
+ ip_ecn_egress(ECN_NOCARE, &otos, &itos);
+#endif
+ ip6->ip6_flow &= ~htonl(0xff << 20);
+ ip6->ip6_flow |= htonl((u_int32_t)itos << 20);
+ break;
+ }
+#endif /* INET6 */
+ default:
+ ip_statinc(IP_STAT_NOIPSEC);
+ m_freem(m);
+ return;
+ }
+ if_ipsec_input(m, af, ipsecp);
+
+ return;
+}
+
+/*
+ * validate and filter the pakcet
+ */
+static int
+ipsecif4_filter4(const struct ip *ip, struct ipsec_variant *var,
+ struct ifnet *ifp)
+{
+ struct sockaddr_in *src, *dst;
+
+ src = satosin(var->iv_psrc);
+ dst = satosin(var->iv_pdst);
+
+ return in_tunnel_validate(ip, src->sin_addr, dst->sin_addr);
+}
+
+#ifdef INET6
+static int
+ipsecif6_input(struct mbuf **mp, int *offp, int proto, void *eparg)
+{
+ struct mbuf *m = *mp;
+ struct ifnet *ipsecp;
+ struct ipsec_softc *sc = eparg;
+ struct ipsec_variant *var;
+ struct ip6_hdr *ip6;
+ int af = 0;
+#ifndef IPSEC_TX_TOS_CLEAR
+ u_int32_t otos;
+#endif
+ struct psref psref_rcvif;
+ struct psref psref_var;
+ struct ifnet *rcvif;
+
+ KASSERT(eparg != NULL);
+
+ ipsecp = &sc->ipsec_if;
+ if ((ipsecp->if_flags & IFF_UP) == 0) {
+ m_freem(m);
+ IP6_STATINC(IP6_STAT_NOIPSEC);
+ return IPPROTO_DONE;
+ }
+
+ var = if_ipsec_getref_variant(sc, &psref_var);
+ if (if_ipsec_variant_is_unconfigured(var)) {
+ if_ipsec_putref_variant(var, &psref_var);
+ m_freem(m);
+ IP6_STATINC(IP6_STAT_NOIPSEC);
+ return IPPROTO_DONE;
+ }
+
+ ip6 = mtod(m, struct ip6_hdr *);
+
+ rcvif = m_get_rcvif_psref(m, &psref_rcvif);
+ if (rcvif == NULL || !ipsecif6_filter6(ip6, var, rcvif)) {
+ m_put_rcvif_psref(rcvif, &psref_rcvif);
+ if_ipsec_putref_variant(var, &psref_var);
+ m_freem(m);
+ IP6_STATINC(IP6_STAT_NOIPSEC);
+ return IPPROTO_DONE;
+ }
+ m_put_rcvif_psref(rcvif, &psref_rcvif);
+ if_ipsec_putref_variant(var, &psref_var);
+
+#ifndef IPSEC_TX_TOS_CLEAR
+ otos = ip6->ip6_flow;
+#endif
+ m_adj(m, *offp);
+
+ switch (proto) {
+#ifdef INET
+ case IPPROTO_IPV4:
+ {
+ af = AF_INET;
+#ifndef IPSEC_TX_TOS_CLEAR
+ struct ip *ip;
+ u_int8_t otos8;
+ otos8 = (ntohl(otos) >> 20) & 0xff;
+
+ if (M_UNWRITABLE(m, sizeof(*ip))) {
+ m = m_pullup(m, sizeof(*ip));
+ if (!m)
+ return IPPROTO_DONE;
+ }
+ ip = mtod(m, struct ip *);
+ if (ipsecp->if_flags & IFF_ECN)
+ ip_ecn_egress(ECN_ALLOWED, &otos8, &ip->ip_tos);
+ else
+ ip_ecn_egress(ECN_NOCARE, &otos8, &ip->ip_tos);
+#endif
+ break;
+ }
+#endif /* INET */
+ case IPPROTO_IPV6:
+ {
+ af = AF_INET6;
+#ifndef IPSEC_TX_TOS_CLEAR
+ struct ip6_hdr *xip6;
+
+ if (M_UNWRITABLE(m, sizeof(*xip6))) {
+ m = m_pullup(m, sizeof(*xip6));
+ if (!m)
+ return IPPROTO_DONE;
+ }
+ xip6 = mtod(m, struct ip6_hdr *);
+ if (ipsecp->if_flags & IFF_ECN)
+ ip6_ecn_egress(ECN_ALLOWED, &otos, &xip6->ip6_flow);
+ else
+ ip6_ecn_egress(ECN_NOCARE, &otos, &xip6->ip6_flow);
+ break;
+#endif
+ }
+ default:
+ IP6_STATINC(IP6_STAT_NOIPSEC);
+ m_freem(m);
+ return IPPROTO_DONE;
+ }
+
+ if_ipsec_input(m, af, ipsecp);
+ return IPPROTO_DONE;
+}
+
+/*
+ * validate and filter the packet.
+ */
+static int
+ipsecif6_filter6(const struct ip6_hdr *ip6, struct ipsec_variant *var,
+ struct ifnet *ifp)
+{
+ struct sockaddr_in6 *src, *dst;
+
+ src = satosin6(var->iv_psrc);
+ dst = satosin6(var->iv_pdst);
+
+ return in6_tunnel_validate(ip6, &src->sin6_addr, &dst->sin6_addr);
+}
+#endif /* INET6 */
+
+int
+ipsecif4_attach(struct ipsec_variant *var)
+{
+ struct ipsec_softc *sc = var->iv_softc;
+
+ KASSERT(if_ipsec_variant_is_configured(var));
+
+ if (var->iv_encap_cookie4 != NULL)
+ return EALREADY;
+ var->iv_encap_cookie4 = encap_attach_func(AF_INET, -1, if_ipsec_encap_func,
+ &ipsecif4_encapsw, sc);
+ if (var->iv_encap_cookie4 == NULL)
+ return EEXIST;
+
+ var->iv_output = ipsecif4_output;
+ return 0;
+}
+
+int
+ipsecif4_detach(struct ipsec_variant *var)
+{
+ int error;
+
+ if (var->iv_encap_cookie4 == NULL)
+ return 0;
+
+ var->iv_output = NULL;
+ error = encap_detach(var->iv_encap_cookie4);
+ if (error == 0)
+ var->iv_encap_cookie4 = NULL;
+
+ return error;
+}
+
+#ifdef INET6
+int
+ipsecif6_attach(struct ipsec_variant *var)
+{
+ struct sockaddr_in6 mask6;
+ struct ipsec_softc *sc = var->iv_softc;
+
+ KASSERT(if_ipsec_variant_is_configured(var));
+ KASSERT(var->iv_encap_cookie6 == NULL);
+
+ memset(&mask6, 0, sizeof(mask6));
+ mask6.sin6_len = sizeof(struct sockaddr_in6);
+ mask6.sin6_addr.s6_addr32[0] = mask6.sin6_addr.s6_addr32[1] =
+ mask6.sin6_addr.s6_addr32[2] = mask6.sin6_addr.s6_addr32[3] = ~0;
+
+ var->iv_encap_cookie6 = encap_attach(AF_INET6, -1,
+ var->iv_psrc, (struct sockaddr *)&mask6,
+ var->iv_pdst, (struct sockaddr *)&mask6,
+ &ipsecif6_encapsw, sc);
+ if (var->iv_encap_cookie6 == NULL)
+ return EEXIST;
+
+ var->iv_output = ipsecif6_output;
+ return 0;
+}
+
+static void
+ipsecif6_rtcache_free_pc(void *p, void *arg __unused, struct cpu_info *ci __unused)
+{
+ struct ipsec_ro *iro = p;
+
+ mutex_enter(&iro->ir_lock);
+ rtcache_free(&iro->ir_ro);
+ mutex_exit(&iro->ir_lock);
+}
+
+int
+ipsecif6_detach(struct ipsec_variant *var)
+{
+ struct ipsec_softc *sc = var->iv_softc;
+ int error;
+
+ KASSERT(var->iv_encap_cookie6 != NULL);
+
+ percpu_foreach(sc->ipsec_ro_percpu, ipsecif6_rtcache_free_pc, NULL);
+
+ var->iv_output = NULL;
+ error = encap_detach(var->iv_encap_cookie6);
+ if (error == 0)
+ var->iv_encap_cookie6 = NULL;
+ return error;
+}
+
+void *
+ipsecif6_ctlinput(int cmd, const struct sockaddr *sa, void *d, void *eparg)
+{
+ struct ipsec_softc *sc = eparg;
+ struct ip6ctlparam *ip6cp = NULL;
+ struct ip6_hdr *ip6;
+ const struct sockaddr_in6 *dst6;
+ struct ipsec_ro *iro;
+
+ if (sa->sa_family != AF_INET6 ||
+ sa->sa_len != sizeof(struct sockaddr_in6))
+ return NULL;
+
+ if ((unsigned)cmd >= PRC_NCMDS)
+ return NULL;
+ if (cmd == PRC_HOSTDEAD)
+ d = NULL;
+ else if (inet6ctlerrmap[cmd] == 0)
+ return NULL;
+
+ /* if the parameter is from icmp6, decode it. */
+ if (d != NULL) {
+ ip6cp = (struct ip6ctlparam *)d;
+ ip6 = ip6cp->ip6c_ip6;
+ } else {
+ ip6 = NULL;
+ }
+
+ if (!ip6)
+ return NULL;
+
+ iro = percpu_getref(sc->ipsec_ro_percpu);
+ mutex_enter(&iro->ir_lock);
+ dst6 = satocsin6(rtcache_getdst(&iro->ir_ro));
+ /* XXX scope */
+ if (dst6 == NULL)
+ ;
+ else if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &dst6->sin6_addr))
+ /* flush route cache */
+ rtcache_free(&iro->ir_ro);
+
+ mutex_exit(&iro->ir_lock);
+ percpu_putref(sc->ipsec_ro_percpu);
+
+ return NULL;
+}
+
+ENCAP_PR_WRAP_CTLINPUT(ipsecif6_ctlinput)
+#define ipsecif6_ctlinput ipsecif6_ctlinput_wrapper
+
+static const struct encapsw ipsecif6_encapsw = {
+ .encapsw6 = {
+ .pr_input = ipsecif6_input,
+ .pr_ctlinput = ipsecif6_ctlinput,
+ }
+};
+#endif /* INET6 */
Index: src/sys/netipsec/ipsecif.h
diff -u /dev/null src/sys/netipsec/ipsecif.h:1.1
--- /dev/null Wed Jan 10 10:56:31 2018
+++ src/sys/netipsec/ipsecif.h Wed Jan 10 10:56:30 2018
@@ -0,0 +1,47 @@
+/* $NetBSD: ipsecif.h,v 1.1 2018/01/10 10:56:30 knakahara Exp $ */
+
+/*
+ * Copyright (c) 2017 Internet Initiative Japan Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _NETIPSEC_IPSECIF_H_
+#define _NETIPSEC_IPSECIF_H_
+
+#include <net/if_ipsec.h>
+
+#define IPSEC_TTL 64
+#define IPSEC_HLIM 64
+
+#ifdef _KERNEL
+int ipsecif4_encap_func(struct mbuf *, struct ip *, struct ipsec_variant *);
+int ipsecif4_attach(struct ipsec_variant *);
+int ipsecif4_detach(struct ipsec_variant *);
+
+int ipsecif6_attach(struct ipsec_variant *);
+int ipsecif6_detach(struct ipsec_variant *);
+void *ipsecif6_ctlinput(int, const struct sockaddr *, void *, void *);
+#endif
+
+#endif /*_NETIPSEC_IPSECIF_H_*/