tl;dr: this adds sec(4) p2p ip interfaces. Traffic in and out of these interfaces is protected by IPsec security associations (SAs), but there's no flows (security policy database (SPD) entries) associated with these SAs. The policy for using the sec(4) interfaces and their SAs is route-based instead.
Longer version: I was going to use "make ipsec great again^W" as the subject line, but thought better of it. The reason I started on this was to better interoperate with "site-to-site" vpns, in particular AWS Site-to-Site VPNs, and the Auto-Discovery VPN (ADVPN) stuff on fortinet fortigate appliances. Both of these negotiate IPsec tunnels that can carry any traffic at the IPsec level, but use BGP and routes to direct traffic into those tunnels. sec(4) is equivalent to a gif(4) interface with its encapsulated packets protected by ESP in transport mode. You route packets into the interface (sec or gif), and it gets encrypted and sent to the peer, which decaspulates the traffic. The main difference is in how the SAs for these connections are negotiated. Neither of these things want to negotiate esp transport mode to protect gif(4) packets, they want to negotiate esp tunnel mode for 0.0.0.0/0 to 0.0.0.0/0. The fact that IPsec in tunnel mode and gif both use the same ip protocol number also causes a lot of confusion in the kernel in the SPD. After trying a bunch of different configurations out, and then trying to hack up ipsecctl and isakmpd, and then talking to markus@, tobhe@, and sthen@, we came up with sec(4). The idea isn't unique to us though. It has been mooted in RFC3884 section 4.1.1, Cisco has VTI, Juniper has st0, Linux has vti and xfrm interfaces, FreeBSD has ipsec_if, NetBSD has ipsecif... The kernel has been modified so ike daemons can inject a SA with an iface extention message attached which specifies which sec(4) the SA is for, and which direction it should be processing traffic for. If a SA has this iface config on it, the ipsp code skips the SPD side of things and instead makes these SAs available to sec(4) for it to use. I've tweaked isakmpd and ipsecctl so they support new config options that let you configure SAs for sec(4). Most of the changes in isakmpd are so it can continue to negotiate the right stuff with the peer, but then short circuits the kernel config so only the SAs with the iface extension are injected, none of the flows get inserted. tobhe@ has done the same for iked, but he's reused the "iface" config and special cased the handling of sec interfaces. For ipsecctl and isakmpd, config looks like this in ipsec.conf: h_self="130.102.96.46" h_s2s1="52.65.9.248" h_s2s1_key="one" h_s2s2="54.153.175.223" h_s2s2_key="two" ike interface sec0 local $h_self peer $h_s2s1 \ main auth hmac-sha2-256 enc aes-256 group modp3072 lifetime 28800 \ quick auth hmac-sha2-256 enc aes-256 group modp3072 lifetime 3600 \ psk $h_s2s1_key ike interface sec1 local $h_self peer $h_s2s2 \ main auth hmac-sha2-256 enc aes-256 group modp3072 lifetime 28800 \ quick auth hmac-sha2-256 enc aes-256 group modp3072 lifetime 3600 \ psk $h_s2s2_key sec interface config: dlg@ix ~$ sudo cat /etc/hostname.sec0 inet 169.254.64.94 255.255.255.252 169.254.64.93 up dlg@ix ~$ sudo cat /etc/hostname.sec1 inet 169.254.105.134 255.255.255.252 169.254.105.133 up aws s2s says we can then talk bgp: dlg@ix ~$ sudo cat /etc/bgpd.conf AS 65001 router-id 130.102.96.46 group aws { remote-as 64512 neighbor 169.254.64.93 neighbor 169.254.105.133 } with isakmpd running and ipsecctl having injected its config into it, it then sets up SAs: dlg@ix ~$ sudo ipsecctl -sa FLOWS: No flows SAD: esp tunnel from 54.153.175.223 to 130.102.96.46 spi 0x13ca145b auth hmac-sha2-256 enc aes-256 esp tunnel from 52.65.9.248 to 130.102.96.46 spi 0x8e5fec4b auth hmac-sha2-256 enc aes-256 esp tunnel from 130.102.96.46 to 54.153.175.223 spi 0xc9d2adc1 auth hmac-sha2-256 enc aes-256 esp tunnel from 130.102.96.46 to 52.65.9.248 spi 0xca1adc30 auth hmac-sha2-256 enc aes-256 dlg@ix ~$ sudo ipsecctl -sa -v FLOWS: No flows SAD: esp tunnel from 54.153.175.223 to 130.102.96.46 spi 0x13ca145b auth hmac-sha2-256 enc aes-256 sa: spi 0x13ca145b auth hmac-sha2-256 enc aes state mature replay 16 flags 0x204<tunnel,udpencap> lifetime_cur: alloc 0 bytes 752 add 1684451878 first 1684451880 lifetime_hard: alloc 0 bytes 0 add 3600 first 0 lifetime_soft: alloc 0 bytes 0 add 3240 first 0 address_src: 54.153.175.223 address_dst: 130.102.96.46 identity_src: type prefix id 0: 54.153.175.223/32 identity_dst: type prefix id 0: 130.102.96.46/32 src_mask: 0.0.0.0 dst_mask: 0.0.0.0 protocol: proto 0 flags 0 flow_type: type use direction in src_flow: 0.0.0.0 dst_flow: 0.0.0.0 udpencap: udpencap port 4500 lifetime_lastuse: alloc 0 bytes 0 add 0 first 1684451888 counter: 9 input packets 2044 input bytes 853 input bytes, decompressed 9 packets dropped on input replay: rpl 9 interface: sec1 direction in esp tunnel from 52.65.9.248 to 130.102.96.46 spi 0x8e5fec4b auth hmac-sha2-256 enc aes-256 sa: spi 0x8e5fec4b auth hmac-sha2-256 enc aes state mature replay 16 flags 0x204<tunnel,udpencap> lifetime_cur: alloc 0 bytes 528 add 1684451878 first 1684451882 lifetime_hard: alloc 0 bytes 0 add 3600 first 0 lifetime_soft: alloc 0 bytes 0 add 3240 first 0 address_src: 52.65.9.248 address_dst: 130.102.96.46 identity_src: type prefix id 0: 52.65.9.248/32 identity_dst: type prefix id 0: 130.102.96.46/32 src_mask: 0.0.0.0 dst_mask: 0.0.0.0 protocol: proto 0 flags 0 flow_type: type use direction in src_flow: 0.0.0.0 dst_flow: 0.0.0.0 udpencap: udpencap port 4500 lifetime_lastuse: alloc 0 bytes 0 add 0 first 1684451887 counter: 6 input packets 1416 input bytes 597 input bytes, decompressed 6 packets dropped on input replay: rpl 6 interface: sec0 direction in esp tunnel from 130.102.96.46 to 54.153.175.223 spi 0xc9d2adc1 auth hmac-sha2-256 enc aes-256 sa: spi 0xc9d2adc1 auth hmac-sha2-256 enc aes state mature replay 16 flags 0x204<tunnel,udpencap> lifetime_cur: alloc 0 bytes 511 add 1684451878 first 1684451880 lifetime_hard: alloc 0 bytes 0 add 3600 first 0 lifetime_soft: alloc 0 bytes 0 add 3240 first 0 address_src: 130.102.96.46 address_dst: 54.153.175.223 identity_src: type prefix id 0: 130.102.96.46/32 identity_dst: type prefix id 0: 54.153.175.223/32 src_mask: 0.0.0.0 dst_mask: 0.0.0.0 protocol: proto 0 flags 0 flow_type: type use direction out src_flow: 0.0.0.0 dst_flow: 0.0.0.0 udpencap: udpencap port 4500 lifetime_lastuse: alloc 0 bytes 0 add 0 first 1684451888 counter: 8 output packets 1136 output bytes 671 output bytes, uncompressed replay: rpl 9 interface: sec1 direction out esp tunnel from 130.102.96.46 to 52.65.9.248 spi 0xca1adc30 auth hmac-sha2-256 enc aes-256 sa: spi 0xca1adc30 auth hmac-sha2-256 enc aes state mature replay 16 flags 0x204<tunnel,udpencap> lifetime_cur: alloc 0 bytes 452 add 1684451878 first 1684451882 lifetime_hard: alloc 0 bytes 0 add 3600 first 0 lifetime_soft: alloc 0 bytes 0 add 3240 first 0 address_src: 130.102.96.46 address_dst: 52.65.9.248 identity_src: type prefix id 0: 130.102.96.46/32 identity_dst: type prefix id 0: 52.65.9.248/32 src_mask: 0.0.0.0 dst_mask: 0.0.0.0 protocol: proto 0 flags 0 flow_type: type use direction out src_flow: 0.0.0.0 dst_flow: 0.0.0.0 udpencap: udpencap port 4500 lifetime_lastuse: alloc 0 bytes 0 add 0 first 1684451887 counter: 7 output packets 1004 output bytes 592 output bytes, uncompressed replay: rpl 8 interface: sec0 direction out dlg@ix ~$ ifconfig sec sec0: flags=8051<UP,POINTOPOINT,RUNNING,MULTICAST> mtu 1280 index 14 priority 0 llprio 3 groups: sec inet 169.254.64.94 --> 169.254.64.93 netmask 0xfffffffc sec1: flags=8051<UP,POINTOPOINT,RUNNING,MULTICAST> mtu 1280 index 15 priority 0 llprio 3 groups: sec inet 169.254.105.134 --> 169.254.105.133 netmask 0xfffffffc dlg@ix ~$ ping -qc4 169.254.64.93 PING 169.254.64.93 (169.254.64.93): 56 data bytes --- 169.254.64.93 ping statistics --- 4 packets transmitted, 4 packets received, 0.0% packet loss round-trip min/avg/max/std-dev = 16.878/17.062/17.230/0.131 ms dlg@ix ~$ ping -qc4 169.254.105.133 PING 169.254.105.133 (169.254.105.133): 56 data bytes --- 169.254.105.133 ping statistics --- 4 packets transmitted, 4 packets received, 0.0% packet loss round-trip min/avg/max/std-dev = 15.110/15.690/16.538/0.524 ms and bgp comes up: dlg@ix ~$ sudo bgpctl sh Neighbor AS MsgRcvd MsgSent OutQ Up/Down State/PrfRcvd 169.254.64.93 64512 2534 2505 0 00:01:43 1 169.254.105.133 64512 4140 4137 0 00:01:38 1 dlg@ix ~$ sudo bgpctl sh rib in flags: * = Valid, > = Selected, I = via IBGP, A = Announced, S = Stale, E = Error origin validation state: N = not-found, V = valid, ! = invalid aspa validation state: ? = unknown, V = valid, ! = invalid origin: i = IGP, e = EGP, ? = Incomplete flags vs destination gateway lpref med aspath origin N-? 100.64.64.0/22 169.254.105.133 100 100 64512 i N-? 100.64.64.0/22 169.254.64.93 100 200 64512 i ive got equivalent config with iked working, but tobhe@ wrote that so i don't think it's fair for me to steal his thunder. thoughts? is it worth continuing with? Index: sbin/iked/iked.h =================================================================== RCS file: /cvs/src/sbin/iked/iked.h,v retrieving revision 1.220 diff -u -p -r1.220 iked.h --- sbin/iked/iked.h 28 Jun 2023 14:10:24 -0000 1.220 +++ sbin/iked/iked.h 29 Jun 2023 03:09:45 -0000 @@ -260,6 +260,7 @@ struct iked_policy { #define IKED_POLICY_SKIP 0x10 #define IKED_POLICY_IPCOMP 0x20 #define IKED_POLICY_TRANSPORT 0x40 +#define IKED_POLICY_ROUTING 0x80 int pol_refcnt; Index: sbin/iked/ikev2.c =================================================================== RCS file: /cvs/src/sbin/iked/ikev2.c,v retrieving revision 1.372 diff -u -p -r1.372 ikev2.c --- sbin/iked/ikev2.c 28 Jun 2023 14:10:24 -0000 1.372 +++ sbin/iked/ikev2.c 29 Jun 2023 03:09:45 -0000 @@ -1782,7 +1782,8 @@ ikev2_init_done(struct iked *env, struct ikev2_enable_timer(env, sa); ikev2_log_established(sa); ikev2_record_dstid(env, sa); - sa_configure_iface(env, sa, 1); + if (!(sa->sa_policy->pol_flags & IKED_POLICY_ROUTING)) + sa_configure_iface(env, sa, 1); } if (ret) Index: sbin/iked/parse.y =================================================================== RCS file: /cvs/src/sbin/iked/parse.y,v retrieving revision 1.143 diff -u -p -r1.143 parse.y --- sbin/iked/parse.y 14 Jun 2023 14:09:29 -0000 1.143 +++ sbin/iked/parse.y 29 Jun 2023 03:09:45 -0000 @@ -2519,6 +2519,10 @@ create_ike(char *name, int af, struct ip } if (iface != NULL) { + /* sec(4) */ + if (strncmp("sec", iface, strlen("sec")) == 0) + pol.pol_flags |= IKED_POLICY_ROUTING; + pol.pol_iface = if_nametoindex(iface); if (pol.pol_iface == 0) { yyerror("invalid iface"); Index: sbin/iked/pfkey.c =================================================================== RCS file: /cvs/src/sbin/iked/pfkey.c,v retrieving revision 1.82 diff -u -p -r1.82 pfkey.c --- sbin/iked/pfkey.c 13 Jun 2023 12:34:12 -0000 1.82 +++ sbin/iked/pfkey.c 29 Jun 2023 03:09:45 -0000 @@ -25,6 +25,7 @@ #include <netinet/in.h> #include <netinet/ip_ipsp.h> +#include <net/if.h> #include <net/pfkeyv2.h> #include <err.h> @@ -40,7 +41,7 @@ #include "ikev2.h" #define ROUNDUP(x) (((x) + (PFKEYV2_CHUNK - 1)) & ~(PFKEYV2_CHUNK - 1)) -#define IOV_CNT 27 +#define IOV_CNT 28 #define PFKEYV2_CHUNK sizeof(uint64_t) #define PFKEY_REPLY_TIMEOUT 1000 @@ -453,6 +454,7 @@ pfkey_flow(struct iked *env, uint8_t sat int pfkey_sa(struct iked *env, uint8_t satype, uint8_t action, struct iked_childsa *sa) { + char iface[IF_NAMESIZE]; struct sadb_msg smsg; struct sadb_sa sadb; struct sadb_address sa_src, sa_dst, sa_pxy; @@ -460,6 +462,7 @@ pfkey_sa(struct iked *env, uint8_t satyp struct sadb_lifetime sa_ltime_hard, sa_ltime_soft; struct sadb_x_udpencap udpencap; struct sadb_x_tag sa_tag; + struct sadb_x_iface sa_iface; char *tag = NULL; struct sadb_x_tap sa_tap; struct sadb_x_rdomain sa_rdomain; @@ -469,6 +472,8 @@ pfkey_sa(struct iked *env, uint8_t satyp struct iked_policy *pol; struct iked_addr *dst; struct iovec iov[IOV_CNT]; + const char *errstr = NULL; + uint32_t ifminor; uint32_t jitter; int iov_cnt; int ret, dotap = 0; @@ -688,6 +693,25 @@ pfkey_sa(struct iked *env, uint8_t satyp sa_tap.sadb_x_tap_unit = pol->pol_tap; } + if (pol->pol_flags & IKED_POLICY_ROUTING) { + bzero(&sa_iface, sizeof(sa_iface)); + sa_iface.sadb_x_iface_exttype = SADB_X_EXT_IFACE; + sa_iface.sadb_x_iface_len = sizeof(sa_iface) / 8; + if (if_indextoname(pol->pol_iface, iface) == NULL) { + log_warnx("%s: unsupported interface %s", + __func__, iface); + return (-1); + } + ifminor = strtonum(iface + strlen("sec"), 0, UINT_MAX, &errstr); + if (errstr != NULL) { + log_warnx("%s: unsupported interface %s", + __func__, iface); + return (-1); + } + sa_iface.sadb_x_iface_unit = ifminor; + sa_iface.sadb_x_iface_direction = sa->csa_dir; + } + send: #define PAD(len) \ @@ -814,6 +838,13 @@ pfkey_sa(struct iked *env, uint8_t satyp smsg.sadb_msg_len += sa_tag.sadb_x_tag_len; iov_cnt++; PAD(strlen(tag) + 1); + } + + if (sa_iface.sadb_x_iface_len) { + iov[iov_cnt].iov_base = &sa_iface; + iov[iov_cnt].iov_len = sa_iface.sadb_x_iface_len * 8; + smsg.sadb_msg_len += sa_iface.sadb_x_iface_len; + iov_cnt++; } if (dotap != 0) { Index: sbin/ipsecctl/ike.c =================================================================== RCS file: /cvs/src/sbin/ipsecctl/ike.c,v retrieving revision 1.83 diff -u -p -r1.83 ike.c --- sbin/ipsecctl/ike.c 25 Jun 2022 20:33:40 -0000 1.83 +++ sbin/ipsecctl/ike.c 29 Jun 2023 03:09:45 -0000 @@ -148,6 +148,10 @@ ike_section_ipsec(struct ipsec_rule *r, if (r->tag) fprintf(fd, SET "[%s]:PF-Tag=%s force\n", r->p2name, r->tag); + if (r->flags & IPSEC_RULE_F_IFACE) { + fprintf(fd, SET "[%s]:Interface=%u force\n", r->p2name, + r->iface); + } } static int @@ -842,21 +846,30 @@ ike_setup_ids(struct ipsec_rule *r) err(1, "ike_setup_ids"); /* Phase 2 name is from and to network, protocol, port*/ - sproto[0] = ssport[0] = sdport[0] = 0; - if (r->proto) - snprintf(sproto, sizeof sproto, "=%u", r->proto); - if (r->sport) - snprintf(ssport, sizeof ssport, ":%u", ntohs(r->sport)); - if (r->dport) - snprintf(sdport, sizeof sdport, ":%u", ntohs(r->dport)); - /* from-network/masklen=proto:port */ - if (asprintf(&r->p2lid, "from-%s%s%s", r->src->name, sproto, ssport) - == -1) - err(1, "ike_setup_ids"); - /* to-network/masklen=proto:port */ - if (asprintf(&r->p2rid, "to-%s%s%s", r->dst->name, sproto, sdport) - == -1) - err(1, "ike_setup_ids"); + if (r->flags & IPSEC_RULE_F_IFACE) { + if (asprintf(&r->p2lid, "from-sec%u", r->iface) == -1) + err(1, "ike_setup_ids"); + if (asprintf(&r->p2rid, "to-sec%u", r->iface) == -1) + err(1, "ike_setup_ids"); + } else { + sproto[0] = ssport[0] = sdport[0] = 0; + if (r->proto) + snprintf(sproto, sizeof sproto, "=%u", r->proto); + if (r->sport) + snprintf(ssport, sizeof ssport, ":%u", ntohs(r->sport)); + if (r->dport) + snprintf(sdport, sizeof sdport, ":%u", ntohs(r->dport)); + + /* from-network/masklen=proto:port */ + if (asprintf(&r->p2lid, "from-%s%s%s", r->src->name, + sproto, ssport) == -1) + err(1, "ike_setup_ids"); + /* to-network/masklen=proto:port */ + if (asprintf(&r->p2rid, "to-%s%s%s", r->dst->name, + sproto, sdport) == -1) + err(1, "ike_setup_ids"); + } + /* from-network/masklen=proto:port-to-network/masklen=proto:port */ if (asprintf(&r->p2name, "%s-%s", r->p2lid , r->p2rid) == -1) err(1, "ike_setup_ids"); Index: sbin/ipsecctl/ipsecctl.h =================================================================== RCS file: /cvs/src/sbin/ipsecctl/ipsecctl.h,v retrieving revision 1.75 diff -u -p -r1.75 ipsecctl.h --- sbin/ipsecctl/ipsecctl.h 22 Oct 2021 12:30:54 -0000 1.75 +++ sbin/ipsecctl/ipsecctl.h 29 Jun 2023 03:09:45 -0000 @@ -178,6 +178,9 @@ TAILQ_HEAD(dst_bundle_queue, ipsec_rule) struct ipsec_rule { u_int8_t type; + unsigned int flags; +#define IPSEC_RULE_F_IFACE (1 << 0) /* iface is valid */ + struct ipsec_addr_wrap *src; struct ipsec_addr_wrap *dst; struct ipsec_addr_wrap *dst2; @@ -215,6 +218,7 @@ struct ipsec_rule { u_int32_t spi; u_int32_t spi2; u_int32_t nr; + unsigned int iface; TAILQ_ENTRY(ipsec_rule) rule_entry; TAILQ_ENTRY(ipsec_rule) bundle_entry; Index: sbin/ipsecctl/parse.y =================================================================== RCS file: /cvs/src/sbin/ipsecctl/parse.y,v retrieving revision 1.181 diff -u -p -r1.181 parse.y --- sbin/ipsecctl/parse.y 22 Oct 2021 12:30:54 -0000 1.181 +++ sbin/ipsecctl/parse.y 29 Jun 2023 03:09:45 -0000 @@ -233,6 +233,7 @@ struct ipsec_transforms *ipsec_transform typedef struct { union { int64_t number; + uint32_t unit; u_int8_t ikemode; u_int8_t dir; u_int8_t satype; /* encapsulating prococol */ @@ -285,9 +286,10 @@ typedef struct { %token AUTHKEY ENCKEY FILENAME AUTHXF ENCXF ERROR IKE MAIN QUICK AGGRESSIVE %token PASSIVE ACTIVE ANY IPIP IPCOMP COMPXF TUNNEL TRANSPORT DYNAMIC LIFETIME %token TYPE DENY BYPASS LOCAL PROTO USE ACQUIRE REQUIRE DONTACQ GROUP PORT TAG -%token INCLUDE BUNDLE UDPENCAP +%token INCLUDE BUNDLE UDPENCAP INTERFACE %token <v.string> STRING %token <v.number> NUMBER +%type <v.unit> iface %type <v.string> string %type <v.dir> dir %type <v.satype> satype @@ -402,6 +404,41 @@ ikerule : IKE ikemode satype tmode prot if (expand_rule(r, &$7, 0, 0, NULL, NULL, NULL)) errx(1, "ikerule: expand_rule"); } + + /* ike interface sec0 local $h_self peer $h_s2s1 ... */ + | IKE ikemode iface peers + phase1mode phase2mode ids ikeauth { + uint8_t proto = 0; // IPPROTO_IPIP; + struct ipsec_hosts hosts; + struct ike_mode *phase1mode = $5; + struct ike_mode *phase2mode = $6; + uint8_t satype = IPSEC_ESP; + uint8_t tmode = IPSEC_TUNNEL; + uint8_t mode = $2; + struct ike_auth *authtype = &$8; + char *tag = NULL; + + struct ipsec_rule *r; + + hosts.src = host_v4("0.0.0.0/0", 1); + hosts.sport = htons(0); + hosts.dst = host_v4("0.0.0.0/0", 1); + hosts.dport = htons(0); + + r = create_ike(proto, &hosts, phase1mode, phase2mode, + satype, tmode, mode, $7.srcid, $7.dstid, + authtype, tag); + if (r == NULL) { + YYERROR; + } + + r->flags |= IPSEC_RULE_F_IFACE; + r->iface = $3; + + if (expand_rule(r, &$4, 0, 0, NULL, NULL, NULL)) + errx(1, "ikerule: expand interface rule"); + + } ; satype : /* empty */ { $$ = IPSEC_ESP; } @@ -910,6 +947,30 @@ tag : /* empty */ } ; +iface : INTERFACE STRING { + static const char prefix[] = "sec"; + const char *errstr = NULL; + size_t len, plen; + + plen = strlen(prefix); + len = strlen($2); + + if (len <= plen || memcmp($2, prefix, plen) != 0) { + yyerror("invalid %s interface name", prefix); + free($2); + YYERROR; + } + + $$ = strtonum($2 + plen, 0, UINT_MAX, &errstr); + free($2); + if (errstr != NULL) { + yyerror("invalid %s interface unit: %s", + prefix, errstr); + YYERROR; + } + } + ; + string : string STRING { if (asprintf(&$$, "%s %s", $1, $2) == -1) @@ -1010,6 +1071,7 @@ lookup(char *s) { "ike", IKE }, { "in", IN }, { "include", INCLUDE }, + { "interface", INTERFACE }, { "ipcomp", IPCOMP }, { "ipip", IPIP }, { "lifetime", LIFETIME }, @@ -2217,6 +2279,7 @@ copyrule(struct ipsec_rule *rule) r->enckey = copykey(rule->enckey); r->tag = copytag(rule->tag); + r->flags = rule->flags; r->p1ie = rule->p1ie; r->p2ie = rule->p2ie; r->type = rule->type; @@ -2232,6 +2295,7 @@ copyrule(struct ipsec_rule *rule) r->udpencap = rule->udpencap; r->udpdport = rule->udpdport; r->nr = rule->nr; + r->iface = rule->iface; return (r); } Index: sbin/ipsecctl/pfkdump.c =================================================================== RCS file: /cvs/src/sbin/ipsecctl/pfkdump.c,v retrieving revision 1.55 diff -u -p -r1.55 pfkdump.c --- sbin/ipsecctl/pfkdump.c 22 Oct 2021 12:30:54 -0000 1.55 +++ sbin/ipsecctl/pfkdump.c 29 Jun 2023 03:09:45 -0000 @@ -62,6 +62,7 @@ static void print_mtu(struct sadb_ext *, static void print_tap(struct sadb_ext *, struct sadb_msg *, int); static void print_satype(struct sadb_ext *, struct sadb_msg *, int); static void print_counter(struct sadb_ext *, struct sadb_msg *, int); +static void print_iface(struct sadb_ext *, struct sadb_msg *, int); static struct idname *lookup(struct idname *, u_int32_t); static char *lookup_name(struct idname *, u_int32_t); @@ -115,6 +116,7 @@ struct idname ext_types[] = { { SADB_X_EXT_TAP, "tap", print_tap }, { SADB_X_EXT_SATYPE2, "satype2", print_satype }, { SADB_X_EXT_COUNTER, "counter", print_counter }, + { SADB_X_EXT_IFACE, "interface", print_iface }, { 0, NULL, NULL } }; @@ -465,6 +467,24 @@ print_counter(struct sadb_ext *ext, stru p(sadb_x_counter_odrops, "\t\t%llu packet%s dropped on output\n"); #undef p #undef plural +} + +static void +print_iface(struct sadb_ext *ext, struct sadb_msg *msg, int opts) +{ + struct sadb_x_iface *siface = (struct sadb_x_iface *)ext; + const char *dir = "unknown"; + + switch (siface->sadb_x_iface_direction) { + case IPSP_DIRECTION_IN: + dir = "in"; + break; + case IPSP_DIRECTION_OUT: + dir = "out"; + break; + } + + printf("sec%u direction %s", siface->sadb_x_iface_unit, dir); } static char * Index: sbin/isakmpd/ipsec.c =================================================================== RCS file: /cvs/src/sbin/isakmpd/ipsec.c,v retrieving revision 1.152 diff -u -p -r1.152 ipsec.c --- sbin/isakmpd/ipsec.c 16 Jan 2022 14:30:11 -0000 1.152 +++ sbin/isakmpd/ipsec.c 29 Jun 2023 03:09:45 -0000 @@ -38,6 +38,7 @@ #include <arpa/inet.h> #include <stdlib.h> #include <string.h> +#include <limits.h> #include <net/if.h> #include <net/pfvar.h> @@ -131,6 +132,7 @@ static int ipsec_validate_transform static int ipsec_sa_check_flow(struct sa *, void *); static int ipsec_sa_check_flow_any(struct sa *, void *); static int ipsec_sa_tag(struct exchange *, struct sa *, struct sa *); +static int ipsec_sa_iface(struct exchange *, struct sa *, struct sa *); static struct doi ipsec_doi = { {0}, IPSEC_DOI_IPSEC, @@ -272,6 +274,12 @@ ipsec_sa_check_flow_any(struct sa *sa, v isa->dport != isa2->dport) return 0; + if ((sa->flags & SA_FLAG_IFACE) != (sa2->flags & SA_FLAG_IFACE)) + return 0; + + if (sa->flags & SA_FLAG_IFACE) + return sa->iface == sa2->iface; + /* * If at least one of the IPsec SAs is incomplete, we're done. */ @@ -379,6 +387,30 @@ ipsec_sa_tag(struct exchange *exchange, return (error); } +static int +ipsec_sa_iface(struct exchange *exchange, struct sa *sa, struct sa *isakmp_sa) +{ + char *section, *value; + const char *errstr = NULL; + + sa->tag = NULL; + + if (exchange->name == NULL || + (section = exchange->name) == NULL || + (value = conf_get_str(section, "Interface")) == NULL) + return (0); /* ignore if not present */ + + sa->iface = strtonum(value, 0, UINT_MAX, &errstr); + if (errstr != NULL) { + log_error("[%s]:Interface %s", section, errstr); + return (-1); + } + + sa->flags |= SA_FLAG_IFACE; + + return (0); +} + /* * Do IPsec DOI specific finalizations task for the exchange where MSG was * the final message. @@ -463,6 +495,9 @@ ipsec_finalize_exchange(struct message * if (ipsec_sa_tag(exchange, sa, isakmp_sa) == -1) return; + if (ipsec_sa_iface(exchange, sa, isakmp_sa) == -1) + return; + for (proto = TAILQ_FIRST(&sa->protos), last_proto = 0; proto; proto = TAILQ_NEXT(proto, link)) { @@ -514,6 +549,7 @@ ipsec_finalize_exchange(struct message * * (a.k.a. flow) set up. */ if (!(sa->flags & SA_FLAG_ONDEMAND || + sa->flags & SA_FLAG_IFACE || conf_get_str("General", "Acquire-Only") || acquire_only) && pf_key_v2_enable_sa(sa, isakmp_sa)) @@ -1596,7 +1632,8 @@ ipsec_delete_spi(struct sa *sa, struct p * We ignore any errors from the disabling of the flow. */ if (sa->flags & SA_FLAG_READY && !(sa->flags & SA_FLAG_ONDEMAND || - sa->flags & SA_FLAG_REPLACED || acquire_only || + sa->flags & SA_FLAG_REPLACED || sa->flags & SA_FLAG_IFACE || + acquire_only || conf_get_str("General", "Acquire-Only"))) pf_key_v2_disable_sa(sa, incoming); Index: sbin/isakmpd/pf_key_v2.c =================================================================== RCS file: /cvs/src/sbin/isakmpd/pf_key_v2.c,v retrieving revision 1.204 diff -u -p -r1.204 pf_key_v2.c --- sbin/isakmpd/pf_key_v2.c 31 Jan 2022 23:51:15 -0000 1.204 +++ sbin/isakmpd/pf_key_v2.c 29 Jun 2023 03:09:45 -0000 @@ -890,6 +890,7 @@ pf_key_v2_set_spi(struct sa *sa, struct struct sadb_protocol flowtype, tprotocol; struct sadb_x_udpencap udpencap; char *addr_str, *s; + char iface_str[32]; msg.sadb_msg_type = incoming ? SADB_UPDATE : SADB_ADD; switch (proto->proto) { @@ -1378,16 +1379,37 @@ nodid: goto cleanup; } + if (sa->flags & SA_FLAG_IFACE) { + struct sadb_x_iface *siface; + + len = sizeof(*siface); + siface = calloc(1, len); + if (siface == NULL) + goto cleanup; + + siface->sadb_x_iface_len = len / PF_KEY_V2_CHUNK; + siface->sadb_x_iface_exttype = SADB_X_EXT_IFACE; + siface->sadb_x_iface_unit = sa->iface; + siface->sadb_x_iface_direction = incoming ? + IPSP_DIRECTION_IN : IPSP_DIRECTION_OUT; + + if (pf_key_v2_msg_add(update, (struct sadb_ext *)siface, + PF_KEY_V2_NODE_MALLOCED) == -1) + goto cleanup; + + snprintf(iface_str, sizeof(iface_str), "iface %u", sa->iface); + } + /* XXX Here can sensitivity extensions be setup. */ if (sockaddr2text(dst, &addr_str, 0)) addr_str = 0; LOG_DBG((LOG_SYSDEP, 10, "pf_key_v2_set_spi: " - "satype %d dst %s SPI 0x%x%s%s", msg.sadb_msg_satype, + "satype %d dst %s SPI 0x%x%s%s%s", msg.sadb_msg_satype, addr_str ? addr_str : "unknown", ntohl(ssa.sadb_sa_spi), sa->tag ? " tag " : "", - sa->tag ? sa->tag : "")); + sa->tag ? sa->tag : "", iface_str)); free(addr_str); Index: sbin/isakmpd/sa.h =================================================================== RCS file: /cvs/src/sbin/isakmpd/sa.h,v retrieving revision 1.54 diff -u -p -r1.54 sa.h --- sbin/isakmpd/sa.h 15 Jan 2018 09:54:48 -0000 1.54 +++ sbin/isakmpd/sa.h 29 Jun 2023 03:09:45 -0000 @@ -211,6 +211,9 @@ struct sa { /* The add a pf tag to packets matching the established SA. */ char *tag; + + /* IPsec with Interface SAs, enabled with SA_FLAG_IFACE */ + unsigned int iface; }; /* This SA is alive. */ @@ -243,6 +246,9 @@ struct sa { /* NAT-T encapsulation state. Kept in isakmp_sa for the new p2 exchange. */ #define SA_FLAG_NAT_T_ENABLE 0x100 #define SA_FLAG_NAT_T_KEEPALIVE 0x200 + +/* Policy is handled by routing/filtering on the specified iface */ +#define SA_FLAG_IFACE 0x400 extern void proto_free(struct proto * proto); extern int sa_add_transform(struct sa *, struct payload *, int, Index: sys/conf/GENERIC =================================================================== RCS file: /cvs/src/sys/conf/GENERIC,v retrieving revision 1.288 diff -u -p -r1.288 GENERIC --- sys/conf/GENERIC 27 Mar 2023 09:39:21 -0000 1.288 +++ sys/conf/GENERIC 29 Jun 2023 03:09:45 -0000 @@ -90,6 +90,7 @@ pseudo-device veb # virtual Ethernet br pseudo-device carp # CARP protocol support pseudo-device etherip # EtherIP (RFC 3378) pseudo-device gif # IPv[46] over IPv[46] tunnel (RFC1933) +pseudo-device sec # route based IPsec VPN interface pseudo-device gre # GRE encapsulation interface pseudo-device loop # network loopback pseudo-device mpe # MPLS PE interface Index: sys/conf/files =================================================================== RCS file: /cvs/src/sys/conf/files,v retrieving revision 1.724 diff -u -p -r1.724 files --- sys/conf/files 23 Apr 2023 00:20:26 -0000 1.724 +++ sys/conf/files 29 Jun 2023 03:09:45 -0000 @@ -572,6 +572,7 @@ pseudo-device vlan: ifnet, ether pseudo-device carp: ifnet, ether pseudo-device sppp: ifnet pseudo-device gif: ifnet +pseudo-device sec: ifnet pseudo-device gre: ifnet, ether, etherbridge pseudo-device crypto: ifnet pseudo-device trunk: ifnet, ether, ifmedia @@ -1006,6 +1007,7 @@ file uvm/uvm_vnode.c # IPv6 file net/if_gif.c gif needs-count +file net/if_sec.c sec needs-count file netinet/ip_ecn.c file netinet6/in6_pcb.c inet6 file netinet6/in6.c inet6 Index: sys/net/if_sec.c =================================================================== RCS file: sys/net/if_sec.c diff -N sys/net/if_sec.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/net/if_sec.c 29 Jun 2023 03:09:45 -0000 @@ -0,0 +1,578 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2022 The University of Queensland + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * This code was written by David Gwynne <d...@uq.edu.au> as part + * of the Information Technology Infrastructure Group (ITIG) in the + * Faculty of Engineering, Architecture and Information Technology + * (EAIT). + */ + +#ifndef IPSEC +#error sec enabled without IPSEC defined +#endif + +#include "bpfilter.h" +#include "pf.h" + +#include <sys/param.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/sockio.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/timeout.h> +#include <sys/queue.h> +#include <sys/tree.h> +#include <sys/pool.h> +#include <sys/smr.h> +#include <sys/refcnt.h> + +#include <net/if.h> +#include <net/if_var.h> +#include <net/if_types.h> +#include <net/if_media.h> +#include <net/route.h> +#include <net/toeplitz.h> + +#include <netinet/in.h> +#include <netinet/in_var.h> +#include <netinet/if_ether.h> +#include <netinet/ip.h> +#include <netinet/ip_var.h> +#include <netinet/ip_ecn.h> +#include <netinet/ip_ipsp.h> + +#ifdef INET6 +#include <netinet/ip6.h> +#include <netinet6/ip6_var.h> +#include <netinet6/in6_var.h> +#endif + +#ifdef MPLS +#include <netmpls/mpls.h> +#endif /* MPLS */ + +#if NBPFILTER > 0 +#include <net/bpf.h> +#endif + +#if NPF > 0 +#include <net/pfvar.h> +#endif + +#define SEC_MTU 1280 +#define SEC_MTU_MIN 1280 +#define SEC_MTU_MAX 32768 /* could get closer to 64k... */ + +struct sec_softc { + struct ifnet sc_if; + + struct task sc_send; + + unsigned int sc_unit; + SMR_SLIST_ENTRY(sec_softc) sc_entry; + struct refcnt sc_refs; +}; + +SMR_SLIST_HEAD(sec_bucket, sec_softc); + +static int sec_output(struct ifnet *, struct mbuf *, struct sockaddr *, + struct rtentry *); +static int sec_enqueue(struct ifnet *, struct mbuf *); +static void sec_send(void *); +static void sec_start(struct ifnet *); + +static int sec_ioctl(struct ifnet *, u_long, caddr_t); +static int sec_up(struct sec_softc *); +static int sec_down(struct sec_softc *); + +static int sec_clone_create(struct if_clone *, int); +static int sec_clone_destroy(struct ifnet *); + +static struct tdb * + sec_tdb_get(unsigned int); +static void sec_tdb_gc(void *); + +static struct if_clone sec_cloner = + IF_CLONE_INITIALIZER("sec", sec_clone_create, sec_clone_destroy); + +static struct sec_bucket sec_map[256] __aligned(CACHELINESIZE); +static struct tdb *sec_tdbh[256] __aligned(CACHELINESIZE); + +static struct tdb *sec_tdb_gc_list; +static struct task sec_tdb_gc_task = + TASK_INITIALIZER(sec_tdb_gc, NULL); +static struct mutex sec_tdb_gc_mtx = + MUTEX_INITIALIZER(IPL_MPFLOOR); + +void +secattach(int n) +{ + if_clone_attach(&sec_cloner); +} + +static int +sec_clone_create(struct if_clone *ifc, int unit) +{ + struct sec_softc *sc; + struct ifnet *ifp; + + sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); + + sc->sc_unit = unit; + + task_set(&sc->sc_send, sec_send, sc); + + snprintf(sc->sc_if.if_xname, sizeof sc->sc_if.if_xname, "%s%d", + ifc->ifc_name, unit); + + ifp = &sc->sc_if; + ifp->if_softc = sc; + ifp->if_type = IFT_TUNNEL; + ifp->if_mtu = SEC_MTU; + ifp->if_flags = IFF_POINTOPOINT|IFF_MULTICAST; + ifp->if_xflags = IFXF_CLONED; + ifp->if_bpf_mtap = p2p_bpf_mtap; + ifp->if_input = p2p_input; + ifp->if_output = sec_output; + ifp->if_enqueue = sec_enqueue; + ifp->if_start = sec_start; + ifp->if_ioctl = sec_ioctl; + ifp->if_rtrequest = p2p_rtrequest; + + if_counters_alloc(ifp); + if_attach(ifp); + if_alloc_sadl(ifp); + +#if NBPFILTER > 0 + bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t)); +#endif + + return (0); +} + +static int +sec_clone_destroy(struct ifnet *ifp) +{ + struct sec_softc *sc = ifp->if_softc; + + NET_LOCK(); + if (ISSET(ifp->if_flags, IFF_RUNNING)) + sec_down(sc); + NET_UNLOCK(); + + if_detach(ifp); + + free(sc, M_DEVBUF, sizeof(*sc)); + + return (0); +} + +static int +sec_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + struct sec_softc *sc = ifp->if_softc; + struct ifreq *ifr = (struct ifreq *)data; + int error = 0; + + switch (cmd) { + case SIOCSIFADDR: + break; + + case SIOCSIFFLAGS: + if (ISSET(ifp->if_flags, IFF_UP)) { + if (!ISSET(ifp->if_flags, IFF_RUNNING)) + error = sec_up(sc); + else + error = 0; + } else { + if (ISSET(ifp->if_flags, IFF_RUNNING)) + error = sec_down(sc); + } + break; + + case SIOCADDMULTI: + case SIOCDELMULTI: + break; + + case SIOCSIFMTU: + if (ifr->ifr_mtu < SEC_MTU_MIN || + ifr->ifr_mtu > SEC_MTU_MAX) { + error = EINVAL; + break; + } + + ifp->if_mtu = ifr->ifr_mtu; + break; + + default: + error = ENOTTY; + break; + } + + return (error); +} + +static int +sec_up(struct sec_softc *sc) +{ + struct ifnet *ifp = &sc->sc_if; + unsigned int idx = stoeplitz_h32(sc->sc_unit) % nitems(sec_map); + + NET_ASSERT_LOCKED(); + + SET(ifp->if_flags, IFF_RUNNING); + refcnt_init(&sc->sc_refs); + + SMR_SLIST_INSERT_HEAD_LOCKED(&sec_map[idx], sc, sc_entry); + + return (0); +} + +static int +sec_down(struct sec_softc *sc) +{ + struct ifnet *ifp = &sc->sc_if; + unsigned int idx = stoeplitz_h32(sc->sc_unit) % nitems(sec_map); + + NET_ASSERT_LOCKED(); + + CLR(ifp->if_flags, IFF_RUNNING); + + SMR_SLIST_REMOVE_LOCKED(&sec_map[idx], sc, sec_softc, sc_entry); + + smr_barrier(); + taskq_del_barrier(systq, &sc->sc_send); + + refcnt_finalize(&sc->sc_refs, "secdown"); + + return (0); +} + +static int +sec_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, + struct rtentry *rt) +{ + struct m_tag *mtag; + int error = 0; + + if (!ISSET(ifp->if_flags, IFF_RUNNING)) { + error = ENETDOWN; + goto drop; + } + + switch (dst->sa_family) { + case AF_INET: +#ifdef INET6 + case AF_INET6: +#endif +#ifdef MPLS + case AF_MPLS: +#endif + break; + default: + error = EAFNOSUPPORT; + goto drop; + } + + mtag = NULL; + while ((mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) != NULL) { + if (ifp->if_index == *(int *)(mtag + 1)) { + error = EIO; + goto drop; + } + } + + m->m_pkthdr.ph_family = dst->sa_family; + + error = if_enqueue(ifp, m); + if (error != 0) + counters_inc(ifp->if_counters, ifc_oerrors); + + return (error); + +drop: + m_freem(m); + return (error); +} + +static int +sec_enqueue(struct ifnet *ifp, struct mbuf *m) +{ + struct sec_softc *sc = ifp->if_softc; + struct ifqueue *ifq = &ifp->if_snd; + int error; + + error = ifq_enqueue(ifq, m); + if (error) + return (error); + + task_add(systq, &sc->sc_send); + + return (0); +} + +static void +sec_send(void *arg) +{ + struct sec_softc *sc = arg; + struct ifnet *ifp = &sc->sc_if; + struct ifqueue *ifq = &ifp->if_snd; + struct tdb *tdb; + struct mbuf *m; + int error; + + if (!ISSET(ifp->if_flags, IFF_RUNNING)) + return; + + tdb = sec_tdb_get(sc->sc_unit); + if (tdb == NULL) + goto purge; + + NET_LOCK(); + while ((m = ifq_dequeue(ifq)) != NULL) { + CLR(m->m_flags, M_BCAST|M_MCAST); + +#if NPF > 0 + pf_pkt_addr_changed(m); +#endif + + error = ipsp_process_packet(m, tdb, + m->m_pkthdr.ph_family, /* already tunnelled? */ 0); + if (error != 0) + counters_inc(ifp->if_counters, ifc_oerrors); + } + NET_UNLOCK(); + + tdb_unref(tdb); + return; + +purge: + counters_add(ifp->if_counters, ifc_oerrors, ifq_purge(ifq)); +} + +static void +sec_start(struct ifnet *ifp) +{ + counters_add(ifp->if_counters, ifc_oerrors, ifq_purge(&ifp->if_snd)); +} + +/* + * ipsec_input handling + */ + +struct sec_softc * +sec_get(unsigned int unit) +{ + unsigned int idx = stoeplitz_h32(unit) % nitems(sec_map); + struct sec_bucket *sb = &sec_map[idx]; + struct sec_softc *sc; + + smr_read_enter(); + SMR_SLIST_FOREACH(sc, sb, sc_entry) { + if (sc->sc_unit == unit) { + refcnt_take(&sc->sc_refs); + break; + } + } + smr_read_leave(); + + return (sc); +} + +void +sec_input(struct sec_softc *sc, int af, int proto, struct mbuf *m) +{ + struct ip *iph; + int hlen; + + switch (af) { + case AF_INET: + iph = mtod(m, struct ip *); + hlen = iph->ip_hl << 2; + break; +#ifdef INET6 + case AF_INET6: + hlen = sizeof(struct ip6_hdr); + break; +#endif + default: + unhandled_af(af); + } + + m_adj(m, hlen); + + switch (proto) { + case IPPROTO_IPV4: + af = AF_INET; + break; + case IPPROTO_IPV6: + af = AF_INET6; + break; + case IPPROTO_MPLS: + af = AF_MPLS; + break; + default: + af = AF_UNSPEC; + break; + } + + m->m_pkthdr.ph_family = af; + + if_vinput(&sc->sc_if, m); +} + +void +sec_put(struct sec_softc *sc) +{ + refcnt_rele_wake(&sc->sc_refs); +} + +/* + * tdb handling + */ + +static int +sec_tdb_valid(struct tdb *tdb) +{ + KASSERT(ISSET(tdb->tdb_flags, TDBF_IFACE)); + + if (!ISSET(tdb->tdb_flags, TDBF_TUNNELING)) + return (0); + if (ISSET(tdb->tdb_flags, TDBF_INVALID)) + return (0); + + if (tdb->tdb_iface_dir != IPSP_DIRECTION_OUT) + return (0); + + return (1); +} + +/* + * these are called from netinet/ip_ipsp.c with tdb_sadb_mtx held, + * which we rely on to serialise modifications to the sec_tdbh. + */ + +void +sec_tdb_insert(struct tdb *tdb) +{ + unsigned int idx; + struct tdb **tdbp; + struct tdb *ltdb; + + if (!sec_tdb_valid(tdb)) + return; + + idx = stoeplitz_h32(tdb->tdb_iface) % nitems(sec_tdbh); + tdbp = &sec_tdbh[idx]; + + tdb_ref(tdb); /* take a ref for the SMR pointer */ + + /* wire the tdb into the head of the list */ + ltdb = SMR_PTR_GET_LOCKED(tdbp); + SMR_PTR_SET_LOCKED(&tdb->tdb_dnext, ltdb); + SMR_PTR_SET_LOCKED(tdbp, tdb); +} + +void +sec_tdb_remove(struct tdb *tdb) +{ + struct tdb **tdbp; + struct tdb *ltdb; + unsigned int idx; + + if (!sec_tdb_valid(tdb)) + return; + + idx = stoeplitz_h32(tdb->tdb_iface) % nitems(sec_tdbh); + tdbp = &sec_tdbh[idx]; + + while ((ltdb = SMR_PTR_GET_LOCKED(tdbp)) != NULL) { + if (ltdb == tdb) { + /* take the tdb out of the list */ + ltdb = SMR_PTR_GET_LOCKED(&tdb->tdb_dnext); + SMR_PTR_SET_LOCKED(tdbp, ltdb); + + /* move the ref to the gc */ + + mtx_enter(&sec_tdb_gc_mtx); + tdb->tdb_dnext = sec_tdb_gc_list; + sec_tdb_gc_list = tdb; + mtx_leave(&sec_tdb_gc_mtx); + task_add(systq, &sec_tdb_gc_task); + + return; + } + + tdbp = <db->tdb_dnext; + } + + panic("%s: unable to find tdb %p", __func__, tdb); +} + +static void +sec_tdb_gc(void *null) +{ + struct tdb *tdb, *ntdb; + + mtx_enter(&sec_tdb_gc_mtx); + tdb = sec_tdb_gc_list; + sec_tdb_gc_list = NULL; + mtx_leave(&sec_tdb_gc_mtx); + + if (tdb == NULL) + return; + + smr_barrier(); + + NET_LOCK(); + do { + ntdb = tdb->tdb_dnext; + tdb_unref(tdb); + tdb = ntdb; + } while (tdb != NULL); + NET_UNLOCK(); +} + +struct tdb * +sec_tdb_get(unsigned int unit) +{ + unsigned int idx; + struct tdb **tdbp; + struct tdb *tdb; + + idx = stoeplitz_h32(unit) % nitems(sec_map); + tdbp = &sec_tdbh[idx]; + + smr_read_enter(); + while ((tdb = SMR_PTR_GET(tdbp)) != NULL) { + KASSERT(ISSET(tdb->tdb_flags, TDBF_IFACE)); + if (!ISSET(tdb->tdb_flags, TDBF_DELETED) && + tdb->tdb_iface == unit) { + tdb_ref(tdb); + break; + } + + tdbp = &tdb->tdb_dnext; + } + smr_read_leave(); + + return (tdb); +} Index: sys/net/if_sec.h =================================================================== RCS file: sys/net/if_sec.h diff -N sys/net/if_sec.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ sys/net/if_sec.h 29 Jun 2023 03:09:45 -0000 @@ -0,0 +1,44 @@ +/* $OpenBSD$ */ + +/* + * Copyright (c) 2023 David Gwynne <d...@openbsd.org> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _NET_IF_SEC_H +#define _NET_IF_SEC_H + +#ifdef _KERNEL +struct sec_softc; +struct tdb; + +/* + * let the IPsec stack hand packets to sec(4) for input + */ + +struct sec_softc *sec_get(unsigned int); +void sec_input(struct sec_softc * , int, int, + struct mbuf *); +void sec_put(struct sec_softc *); + +/* + * let the IPsec stack give tdbs to sec(4) for output + */ + +void sec_tdb_insert(struct tdb *); +void sec_tdb_remove(struct tdb *); + +#endif /* _KERNEL */ + +#endif /* _NET_IF_SEC_H */ Index: sys/net/pfkeyv2.c =================================================================== RCS file: /cvs/src/sys/net/pfkeyv2.c,v retrieving revision 1.256 diff -u -p -r1.256 pfkeyv2.c --- sys/net/pfkeyv2.c 22 Apr 2023 20:51:56 -0000 1.256 +++ sys/net/pfkeyv2.c 29 Jun 2023 03:09:45 -0000 @@ -868,6 +868,9 @@ pfkeyv2_get(struct tdb *tdb, void **head i += sizeof(struct sadb_x_tap); #endif + if (ISSET(tdb->tdb_flags, TDBF_IFACE)) + i += sizeof(struct sadb_x_iface); + if (lenp) *lenp = i; @@ -979,6 +982,12 @@ pfkeyv2_get(struct tdb *tdb, void **head } #endif + /* Export sec(4) interface information, if present */ + if (ISSET(tdb->tdb_flags, TDBF_IFACE)) { + headers[SADB_X_EXT_IFACE] = p; + export_iface(&p, tdb); + } + headers[SADB_X_EXT_COUNTER] = p; export_counter(&p, tdb); @@ -1360,6 +1369,7 @@ pfkeyv2_dosend(struct socket *so, void * import_tag(newsa, headers[SADB_X_EXT_TAG]); import_tap(newsa, headers[SADB_X_EXT_TAP]); #endif + import_iface(newsa, headers[SADB_X_EXT_IFACE]); /* Exclude sensitive data from reply message. */ headers[SADB_EXT_KEY_AUTH] = NULL; @@ -1411,6 +1421,8 @@ pfkeyv2_dosend(struct socket *so, void * import_tag(sa2, headers[SADB_X_EXT_TAG]); import_tap(sa2, headers[SADB_X_EXT_TAP]); #endif + import_iface(sa2, headers[SADB_X_EXT_IFACE]); + if (headers[SADB_EXT_ADDRESS_SRC] || headers[SADB_EXT_ADDRESS_PROXY]) { mtx_enter(&tdb_sadb_mtx); @@ -1535,6 +1547,7 @@ pfkeyv2_dosend(struct socket *so, void * import_tag(newsa, headers[SADB_X_EXT_TAG]); import_tap(newsa, headers[SADB_X_EXT_TAP]); #endif + import_iface(newsa, headers[SADB_X_EXT_IFACE]); /* Exclude sensitive data from reply message. */ headers[SADB_EXT_KEY_AUTH] = NULL; Index: sys/net/pfkeyv2.h =================================================================== RCS file: /cvs/src/sys/net/pfkeyv2.h,v retrieving revision 1.93 diff -u -p -r1.93 pfkeyv2.h --- sys/net/pfkeyv2.h 27 Aug 2022 20:28:01 -0000 1.93 +++ sys/net/pfkeyv2.h 29 Jun 2023 03:09:45 -0000 @@ -252,6 +252,14 @@ struct sadb_x_mtu { uint32_t sadb_x_mtu_mtu; }; +struct sadb_x_iface { + uint16_t sadb_x_iface_len; + uint16_t sadb_x_iface_exttype; + uint32_t sadb_x_iface_unit; + uint8_t sadb_x_iface_direction; + uint8_t sadb_x_iface_reserved[7]; +}; + #ifdef _KERNEL #define SADB_X_GETSPROTO(x) \ ( (x) == SADB_SATYPE_AH ? IPPROTO_AH :\ @@ -300,7 +308,8 @@ struct sadb_x_mtu { #define SADB_X_EXT_RDOMAIN 37 #define SADB_X_EXT_MTU 38 #define SADB_X_EXT_REPLAY 39 -#define SADB_EXT_MAX 39 +#define SADB_X_EXT_IFACE 40 +#define SADB_EXT_MAX 40 /* Fix pfkeyv2.c struct pfkeyv2_socket if SATYPE_MAX > 31 */ #define SADB_SATYPE_UNSPEC 0 @@ -438,6 +447,7 @@ void export_mtu(void **, struct tdb *); void export_tap(void **, struct tdb *); void export_satype(void **, struct tdb *); void export_counter(void **, struct tdb *); +void export_iface(void **, struct tdb *); void import_address(struct sockaddr *, struct sadb_address *); void import_identities(struct ipsec_ids **, int, struct sadb_ident *, @@ -452,6 +462,7 @@ void import_udpencap(struct tdb *, struc void import_tag(struct tdb *, struct sadb_x_tag *); void import_rdomain(struct tdb *, struct sadb_x_rdomain *); void import_tap(struct tdb *, struct sadb_x_tap *); +void import_iface(struct tdb *, struct sadb_x_iface *); extern const uint64_t sadb_exts_allowed_out[SADB_MAX+1]; extern const uint64_t sadb_exts_required_out[SADB_MAX+1]; Index: sys/net/pfkeyv2_convert.c =================================================================== RCS file: /cvs/src/sys/net/pfkeyv2_convert.c,v retrieving revision 1.79 diff -u -p -r1.79 pfkeyv2_convert.c --- sys/net/pfkeyv2_convert.c 20 Jan 2022 17:13:12 -0000 1.79 +++ sys/net/pfkeyv2_convert.c 29 Jun 2023 03:09:45 -0000 @@ -951,6 +951,30 @@ export_tap(void **p, struct tdb *tdb) } #endif +/* Import interface information for SA */ +void +import_iface(struct tdb *tdb, struct sadb_x_iface *siface) +{ + if (siface != NULL) { + SET(tdb->tdb_flags, TDBF_IFACE); + tdb->tdb_iface = siface->sadb_x_iface_unit; + tdb->tdb_iface_dir = siface->sadb_x_iface_direction; + } +} + +/* Export interface information for SA */ +void +export_iface(void **p, struct tdb *tdb) +{ + struct sadb_x_iface *siface = (struct sadb_x_iface *)*p; + + siface->sadb_x_iface_len = sizeof(*siface) / sizeof(uint64_t); + siface->sadb_x_iface_unit = tdb->tdb_iface; + siface->sadb_x_iface_direction = tdb->tdb_iface_dir; + + *p += sizeof(*siface); +} + void export_satype(void **p, struct tdb *tdb) { Index: sys/net/pfkeyv2_parsemessage.c =================================================================== RCS file: /cvs/src/sys/net/pfkeyv2_parsemessage.c,v retrieving revision 1.60 diff -u -p -r1.60 pfkeyv2_parsemessage.c --- sys/net/pfkeyv2_parsemessage.c 14 Jul 2021 22:39:26 -0000 1.60 +++ sys/net/pfkeyv2_parsemessage.c 29 Jun 2023 03:09:45 -0000 @@ -135,6 +135,7 @@ #define BITMAP_X_COUNTER (1LL << SADB_X_EXT_COUNTER) #define BITMAP_X_MTU (1LL << SADB_X_EXT_MTU) #define BITMAP_X_REPLAY (1LL << SADB_X_EXT_REPLAY) +#define BITMAP_X_IFACE (1LL << SADB_X_EXT_IFACE) uint64_t sadb_exts_allowed_in[SADB_MAX+1] = { @@ -143,9 +144,9 @@ uint64_t sadb_exts_allowed_in[SADB_MAX+1 /* GETSPI */ BITMAP_ADDRESS_SRC | BITMAP_ADDRESS_DST | BITMAP_SPIRANGE, /* UPDATE */ - BITMAP_SA | BITMAP_LIFETIME | BITMAP_ADDRESS | BITMAP_ADDRESS_PROXY | BITMAP_KEY | BITMAP_IDENTITY | BITMAP_X_FLOW | BITMAP_X_UDPENCAP | BITMAP_X_TAG | BITMAP_X_TAP | BITMAP_X_RDOMAIN, + BITMAP_SA | BITMAP_LIFETIME | BITMAP_ADDRESS | BITMAP_ADDRESS_PROXY | BITMAP_KEY | BITMAP_IDENTITY | BITMAP_X_FLOW | BITMAP_X_UDPENCAP | BITMAP_X_TAG | BITMAP_X_TAP | BITMAP_X_RDOMAIN | BITMAP_X_IFACE, /* ADD */ - BITMAP_SA | BITMAP_LIFETIME | BITMAP_ADDRESS | BITMAP_KEY | BITMAP_IDENTITY | BITMAP_X_FLOW | BITMAP_X_UDPENCAP | BITMAP_X_LIFETIME_LASTUSE | BITMAP_X_TAG | BITMAP_X_TAP | BITMAP_X_RDOMAIN, + BITMAP_SA | BITMAP_LIFETIME | BITMAP_ADDRESS | BITMAP_KEY | BITMAP_IDENTITY | BITMAP_X_FLOW | BITMAP_X_UDPENCAP | BITMAP_X_LIFETIME_LASTUSE | BITMAP_X_TAG | BITMAP_X_TAP | BITMAP_X_RDOMAIN | BITMAP_X_IFACE, /* DELETE */ BITMAP_SA | BITMAP_ADDRESS_SRC | BITMAP_ADDRESS_DST | BITMAP_X_RDOMAIN, /* GET */ @@ -215,13 +216,13 @@ const uint64_t sadb_exts_allowed_out[SAD /* GETSPI */ BITMAP_SA | BITMAP_ADDRESS_SRC | BITMAP_ADDRESS_DST, /* UPDATE */ - BITMAP_SA | BITMAP_LIFETIME | BITMAP_ADDRESS | BITMAP_ADDRESS_PROXY | BITMAP_IDENTITY | BITMAP_X_FLOW | BITMAP_X_UDPENCAP | BITMAP_X_TAG | BITMAP_X_TAP | BITMAP_X_RDOMAIN, + BITMAP_SA | BITMAP_LIFETIME | BITMAP_ADDRESS | BITMAP_ADDRESS_PROXY | BITMAP_IDENTITY | BITMAP_X_FLOW | BITMAP_X_UDPENCAP | BITMAP_X_TAG | BITMAP_X_TAP | BITMAP_X_RDOMAIN | BITMAP_X_IFACE, /* ADD */ - BITMAP_SA | BITMAP_LIFETIME | BITMAP_ADDRESS | BITMAP_IDENTITY | BITMAP_X_FLOW | BITMAP_X_UDPENCAP | BITMAP_X_TAG | BITMAP_X_TAP | BITMAP_X_RDOMAIN, + BITMAP_SA | BITMAP_LIFETIME | BITMAP_ADDRESS | BITMAP_IDENTITY | BITMAP_X_FLOW | BITMAP_X_UDPENCAP | BITMAP_X_TAG | BITMAP_X_TAP | BITMAP_X_RDOMAIN | BITMAP_X_IFACE, /* DELETE */ BITMAP_SA | BITMAP_ADDRESS_SRC | BITMAP_ADDRESS_DST | BITMAP_X_RDOMAIN, /* GET */ - BITMAP_SA | BITMAP_LIFETIME | BITMAP_ADDRESS | BITMAP_KEY | BITMAP_IDENTITY | BITMAP_X_UDPENCAP | BITMAP_X_LIFETIME_LASTUSE | BITMAP_X_SRC_MASK | BITMAP_X_DST_MASK | BITMAP_X_PROTOCOL | BITMAP_X_FLOW_TYPE | BITMAP_X_SRC_FLOW | BITMAP_X_DST_FLOW | BITMAP_X_TAG | BITMAP_X_TAP | BITMAP_X_COUNTER | BITMAP_X_RDOMAIN | BITMAP_X_MTU | BITMAP_X_REPLAY, + BITMAP_SA | BITMAP_LIFETIME | BITMAP_ADDRESS | BITMAP_KEY | BITMAP_IDENTITY | BITMAP_X_UDPENCAP | BITMAP_X_LIFETIME_LASTUSE | BITMAP_X_SRC_MASK | BITMAP_X_DST_MASK | BITMAP_X_PROTOCOL | BITMAP_X_FLOW_TYPE | BITMAP_X_SRC_FLOW | BITMAP_X_DST_FLOW | BITMAP_X_TAG | BITMAP_X_TAP | BITMAP_X_COUNTER | BITMAP_X_RDOMAIN | BITMAP_X_MTU | BITMAP_X_REPLAY | BITMAP_X_IFACE, /* ACQUIRE */ BITMAP_ADDRESS_SRC | BITMAP_ADDRESS_DST | BITMAP_IDENTITY | BITMAP_PROPOSAL, /* REGISTER */ @@ -881,6 +882,12 @@ pfkeyv2_parsemessage(void *p, int len, v } break; #endif + case SADB_X_EXT_IFACE: + if (i != sizeof(struct sadb_x_iface)) { + DPRINTF("bad IFACE header length"); + return (EINVAL); + } + break; default: DPRINTF("unknown extension header type %d", sadb_ext->sadb_ext_type); Index: sys/netinet/ip_ipsp.c =================================================================== RCS file: /cvs/src/sys/netinet/ip_ipsp.c,v retrieving revision 1.275 diff -u -p -r1.275 ip_ipsp.c --- sys/netinet/ip_ipsp.c 11 Nov 2022 18:09:58 -0000 1.275 +++ sys/netinet/ip_ipsp.c 29 Jun 2023 03:09:45 -0000 @@ -39,6 +39,7 @@ #include "pf.h" #include "pfsync.h" +#include "sec.h" #include <sys/param.h> #include <sys/systm.h> @@ -67,6 +68,10 @@ #include <net/if_pfsync.h> #endif +#if NSEC > 0 +#include <net/if_sec.h> +#endif + #include <netinet/ip_ipsp.h> #include <net/pfkeyv2.h> @@ -852,14 +857,6 @@ puttdb_locked(struct tdb *tdbp) tdbp->tdb_hnext = tdbh[hashval]; tdbh[hashval] = tdbp; - hashval = tdb_hash(0, &tdbp->tdb_dst, tdbp->tdb_sproto); - tdbp->tdb_dnext = tdbdst[hashval]; - tdbdst[hashval] = tdbp; - - hashval = tdb_hash(0, &tdbp->tdb_src, tdbp->tdb_sproto); - tdbp->tdb_snext = tdbsrc[hashval]; - tdbsrc[hashval] = tdbp; - tdb_count++; #ifdef IPSEC if ((tdbp->tdb_flags & (TDBF_INVALID|TDBF_TUNNELING)) == TDBF_TUNNELING) @@ -867,6 +864,21 @@ puttdb_locked(struct tdb *tdbp) #endif /* IPSEC */ ipsec_last_added = getuptime(); + + if (ISSET(tdbp->tdb_flags, TDBF_IFACE)) { +#if NSEC > 0 + sec_tdb_insert(tdbp); +#endif + return; + } + + hashval = tdb_hash(0, &tdbp->tdb_dst, tdbp->tdb_sproto); + tdbp->tdb_dnext = tdbdst[hashval]; + tdbdst[hashval] = tdbp; + + hashval = tdb_hash(0, &tdbp->tdb_src, tdbp->tdb_sproto); + tdbp->tdb_snext = tdbsrc[hashval]; + tdbsrc[hashval] = tdbp; } void @@ -901,6 +913,22 @@ tdb_unlink_locked(struct tdb *tdbp) tdbp->tdb_hnext = NULL; + tdb_count--; +#ifdef IPSEC + if ((tdbp->tdb_flags & (TDBF_INVALID|TDBF_TUNNELING)) == + TDBF_TUNNELING) { + ipsecstat_dec(ipsec_tunnels); + ipsecstat_inc(ipsec_prevtunnels); + } +#endif /* IPSEC */ + + if (ISSET(tdbp->tdb_flags, TDBF_IFACE)) { +#if NSEC > 0 + sec_tdb_remove(tdbp); +#endif + return; + } + hashval = tdb_hash(0, &tdbp->tdb_dst, tdbp->tdb_sproto); if (tdbdst[hashval] == tdbp) { @@ -932,14 +960,6 @@ tdb_unlink_locked(struct tdb *tdbp) } tdbp->tdb_snext = NULL; - tdb_count--; -#ifdef IPSEC - if ((tdbp->tdb_flags & (TDBF_INVALID|TDBF_TUNNELING)) == - TDBF_TUNNELING) { - ipsecstat_dec(ipsec_tunnels); - ipsecstat_inc(ipsec_prevtunnels); - } -#endif /* IPSEC */ } void Index: sys/netinet/ip_ipsp.h =================================================================== RCS file: /cvs/src/sys/netinet/ip_ipsp.h,v retrieving revision 1.240 diff -u -p -r1.240 ip_ipsp.h --- sys/netinet/ip_ipsp.h 14 Jul 2022 13:52:10 -0000 1.240 +++ sys/netinet/ip_ipsp.h 29 Jun 2023 03:09:45 -0000 @@ -356,6 +356,7 @@ struct tdb { /* tunnel descriptor blo #define TDBF_PFSYNC_RPL 0x80000 /* Replay counter should be bumped */ #define TDBF_ESN 0x100000 /* 64-bit sequence numbers (ESN) */ #define TDBF_PFSYNC_SNAPPED 0x200000 /* entry is being dispatched to peer */ +#define TDBF_IFACE 0x400000 /* entry policy is via sec(4) */ #define TDBF_BITS ("\20" \ "\1UNIQUE\2TIMER\3BYTES\4ALLOCATIONS" \ @@ -363,7 +364,7 @@ struct tdb { /* tunnel descriptor blo "\11SOFT_BYTES\12SOFT_ALLOCATIONS\13SOFT_FIRSTUSE\14PFS" \ "\15TUNNELING" \ "\21USEDTUNNEL\22UDPENCAP\23PFSYNC\24PFSYNC_RPL" \ - "\25ESN") + "\25ESN" "\26IFACE") u_int32_t tdb_flags; /* [m] Flags related to this TDB */ @@ -406,6 +407,7 @@ struct tdb { /* tunnel descriptor blo u_int8_t tdb_wnd; /* Replay window */ u_int8_t tdb_satype; /* SA type (RFC2367, PF_KEY) */ u_int8_t tdb_updates; /* pfsync update counter */ + u_int8_t tdb_iface_dir; /* [I] sec(4) iface direction */ union sockaddr_union tdb_dst; /* [N] Destination address */ union sockaddr_union tdb_src; /* [N] Source address */ @@ -431,6 +433,7 @@ struct tdb { /* tunnel descriptor blo u_int16_t tdb_tag; /* Packet filter tag */ u_int32_t tdb_tap; /* Alternate enc(4) interface */ + unsigned int tdb_iface; /* [I] sec(4) iface */ u_int tdb_rdomain; /* [I] Routing domain */ u_int tdb_rdomain_post; /* [I] Change domain */ Index: sys/netinet/ipsec_input.c =================================================================== RCS file: /cvs/src/sys/netinet/ipsec_input.c,v retrieving revision 1.204 diff -u -p -r1.204 ipsec_input.c --- sys/netinet/ipsec_input.c 13 May 2023 13:35:17 -0000 1.204 +++ sys/netinet/ipsec_input.c 29 Jun 2023 03:09:45 -0000 @@ -36,6 +36,7 @@ */ #include "pf.h" +#include "sec.h" #include <sys/param.h> #include <sys/systm.h> @@ -63,6 +64,10 @@ #include <net/pfvar.h> #endif +#if NSEC > 0 +#include <net/if_sec.h> +#endif + #ifdef INET6 #include <netinet6/in6_var.h> #include <netinet/ip6.h> @@ -544,6 +549,22 @@ ipsec_common_input_cb(struct mbuf **mp, } } #endif + + if (ISSET(tdbp->tdb_flags, TDBF_IFACE)) { +#if NSEC > 0 + if (ISSET(tdbp->tdb_flags, TDBF_TUNNELING) && + tdbp->tdb_iface_dir == IPSP_DIRECTION_IN) { + struct sec_softc *sc = sec_get(tdbp->tdb_iface); + if (sc == NULL) + goto baddone; + + sec_input(sc, af, prot, m); + sec_put(sc); + return IPPROTO_DONE; + } +#endif /* NSEC > 0 */ + goto baddone; + } #if NPF > 0 /*