On Fri, Aug 19, 2022 at 10:54:42PM +0200, Alexander Bluhm wrote:
> This diff allows to run udp_input() in parallel.
Parts have been commited, below is the diff for -current.
With this diff UDP socket splicing does not work yet as udp_output()
is not MP safe. Also calls from udp_input() to anywhere with shared
netlock may have unexpected effects. So I doubt that this part
will make it into 7.2 release.
Tests are welcome anyway so I know about possible bugs and can fix
them soon.
bluhm
Index: net/if_bridge.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.364
diff -u -p -r1.364 if_bridge.c
--- net/if_bridge.c 7 Aug 2022 00:57:43 -0000 1.364
+++ net/if_bridge.c 6 Sep 2022 19:39:24 -0000
@@ -1590,7 +1590,7 @@ bridge_ipsec(struct ifnet *ifp, struct e
off);
tdb_unref(tdb);
if (prot != IPPROTO_DONE)
- ip_deliver(&m, &hlen, prot, af);
+ ip_deliver(&m, &hlen, prot, af, 0);
return (1);
} else {
tdb_unref(tdb);
Index: netinet/in_proto.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_proto.c,v
retrieving revision 1.99
diff -u -p -r1.99 in_proto.c
--- netinet/in_proto.c 15 Aug 2022 09:11:38 -0000 1.99
+++ netinet/in_proto.c 6 Sep 2022 19:39:24 -0000
@@ -185,7 +185,7 @@ const struct protosw inetsw[] = {
.pr_type = SOCK_DGRAM,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_UDP,
- .pr_flags = PR_ATOMIC|PR_ADDR|PR_SPLICE,
+ .pr_flags = PR_ATOMIC|PR_ADDR|PR_SPLICE|PR_MPSAFE,
.pr_input = udp_input,
.pr_ctlinput = udp_ctlinput,
.pr_ctloutput = ip_ctloutput,
Index: netinet/ip_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.381
diff -u -p -r1.381 ip_input.c
--- netinet/ip_input.c 29 Aug 2022 14:43:56 -0000 1.381
+++ netinet/ip_input.c 6 Sep 2022 19:39:24 -0000
@@ -230,6 +230,11 @@ ip_init(void)
#endif
}
+struct ip_offnxt {
+ int ion_off;
+ int ion_nxt;
+};
+
/*
* Enqueue packet for local delivery. Queuing is used as a boundary
* between the network layer (input/forward path) running with
@@ -246,6 +251,30 @@ ip_ours(struct mbuf **mp, int *offp, int
if (af != AF_UNSPEC)
return nxt;
+ nxt = ip_deliver(mp, offp, nxt, AF_INET, 1);
+ if (nxt == IPPROTO_DONE)
+ return IPPROTO_DONE;
+
+ /* save values for later, use after dequeue */
+ if (*offp != sizeof(struct ip)) {
+ struct m_tag *mtag;
+ struct ip_offnxt *ion;
+
+ /* mbuf tags are expensive, but only used for header options */
+ mtag = m_tag_get(PACKET_TAG_IP_OFFNXT, sizeof(*ion),
+ M_NOWAIT);
+ if (mtag == NULL) {
+ ipstat_inc(ips_idropped);
+ m_freemp(mp);
+ return IPPROTO_DONE;
+ }
+ ion = (struct ip_offnxt *)(mtag + 1);
+ ion->ion_off = *offp;
+ ion->ion_nxt = nxt;
+
+ m_tag_prepend(*mp, mtag);
+ }
+
niq_enqueue(&ipintrq, *mp);
*mp = NULL;
return IPPROTO_DONE;
@@ -261,18 +290,31 @@ ipintr(void)
struct mbuf *m;
while ((m = niq_dequeue(&ipintrq)) != NULL) {
- struct ip *ip;
+ struct m_tag *mtag;
int off, nxt;
#ifdef DIAGNOSTIC
if ((m->m_flags & M_PKTHDR) == 0)
panic("ipintr no HDR");
#endif
- ip = mtod(m, struct ip *);
- off = ip->ip_hl << 2;
- nxt = ip->ip_p;
+ mtag = m_tag_find(m, PACKET_TAG_IP_OFFNXT, NULL);
+ if (mtag != NULL) {
+ struct ip_offnxt *ion;
+
+ ion = (struct ip_offnxt *)(mtag + 1);
+ off = ion->ion_off;
+ nxt = ion->ion_nxt;
- nxt = ip_deliver(&m, &off, nxt, AF_INET);
+ m_tag_delete(m, mtag);
+ } else {
+ struct ip *ip;
+
+ ip = mtod(m, struct ip *);
+ off = ip->ip_hl << 2;
+ nxt = ip->ip_p;
+ }
+
+ nxt = ip_deliver(&m, &off, nxt, AF_INET, 0);
KASSERT(nxt == IPPROTO_DONE);
}
}
@@ -673,7 +715,7 @@ ip_fragcheck(struct mbuf **mp, int *offp
#endif
int
-ip_deliver(struct mbuf **mp, int *offp, int nxt, int af)
+ip_deliver(struct mbuf **mp, int *offp, int nxt, int af, int shared)
{
const struct protosw *psw;
int naf = af;
@@ -681,14 +723,24 @@ ip_deliver(struct mbuf **mp, int *offp,
int nest = 0;
#endif /* INET6 */
- NET_ASSERT_LOCKED_EXCLUSIVE();
-
/*
* Tell launch routine the next header
*/
IPSTAT_INC(delivered);
while (nxt != IPPROTO_DONE) {
+ switch (af) {
+ case AF_INET:
+ psw = &inetsw[ip_protox[nxt]];
+ break;
+#ifdef INET6
+ case AF_INET6:
+ psw = &inet6sw[ip6_protox[nxt]];
+ break;
+#endif /* INET6 */
+ }
+ if (shared && !ISSET(psw->pr_flags, PR_MPSAFE))
+ break;
#ifdef INET6
if (af == AF_INET6 &&
ip6_hdrnestlimit && (++nest > ip6_hdrnestlimit)) {
@@ -725,16 +777,6 @@ ip_deliver(struct mbuf **mp, int *offp,
case IPPROTO_IPV6:
naf = AF_INET6;
ip6stat_inc(ip6s_delivered);
- break;
-#endif /* INET6 */
- }
- switch (af) {
- case AF_INET:
- psw = &inetsw[ip_protox[nxt]];
- break;
-#ifdef INET6
- case AF_INET6:
- psw = &inet6sw[ip6_protox[nxt]];
break;
#endif /* INET6 */
}
Index: netinet/ip_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v
retrieving revision 1.104
diff -u -p -r1.104 ip_var.h
--- netinet/ip_var.h 3 Sep 2022 22:43:38 -0000 1.104
+++ netinet/ip_var.h 6 Sep 2022 19:39:24 -0000
@@ -249,7 +249,7 @@ int ip_sysctl(int *, u_int, void *, siz
void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
struct mbuf *);
int ip_input_if(struct mbuf **, int *, int, int, struct ifnet *);
-int ip_deliver(struct mbuf **, int *, int, int);
+int ip_deliver(struct mbuf **, int *, int, int, int);
void ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int);
int rip_ctloutput(int, struct socket *, int, int, struct mbuf *);
void rip_init(void);
Index: netinet6/in6_proto.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_proto.c,v
retrieving revision 1.111
diff -u -p -r1.111 in6_proto.c
--- netinet6/in6_proto.c 2 Sep 2022 13:12:32 -0000 1.111
+++ netinet6/in6_proto.c 6 Sep 2022 19:39:24 -0000
@@ -136,7 +136,7 @@ const struct protosw inet6sw[] = {
.pr_type = SOCK_DGRAM,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_UDP,
- .pr_flags = PR_ATOMIC|PR_ADDR|PR_SPLICE,
+ .pr_flags = PR_ATOMIC|PR_ADDR|PR_SPLICE|PR_MPSAFE,
.pr_input = udp_input,
.pr_ctlinput = udp6_ctlinput,
.pr_ctloutput = ip6_ctloutput,
Index: netinet6/ip6_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.254
diff -u -p -r1.254 ip6_input.c
--- netinet6/ip6_input.c 21 Aug 2022 14:15:55 -0000 1.254
+++ netinet6/ip6_input.c 6 Sep 2022 19:39:24 -0000
@@ -190,6 +190,10 @@ ip6_ours(struct mbuf **mp, int *offp, in
if (af != AF_UNSPEC)
return nxt;
+ nxt = ip_deliver(mp, offp, nxt, AF_INET6, 1);
+ if (nxt == IPPROTO_DONE)
+ return IPPROTO_DONE;
+
/* save values for later, use after dequeue */
if (*offp != sizeof(struct ip6_hdr)) {
struct m_tag *mtag;
@@ -248,7 +252,7 @@ ip6intr(void)
off = sizeof(struct ip6_hdr);
nxt = ip6->ip6_nxt;
}
- nxt = ip_deliver(&m, &off, nxt, AF_INET6);
+ nxt = ip_deliver(&m, &off, nxt, AF_INET6, 0);
KASSERT(nxt == IPPROTO_DONE);
}
}
Index: sys/mbuf.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/mbuf.h,v
retrieving revision 1.255
diff -u -p -r1.255 mbuf.h
--- sys/mbuf.h 15 Aug 2022 16:15:37 -0000 1.255
+++ sys/mbuf.h 6 Sep 2022 19:39:24 -0000
@@ -471,6 +471,8 @@ struct m_tag *m_tag_next(struct mbuf *,
#define PACKET_TAG_IPSEC_IN_DONE 0x0001 /* IPsec applied, in */
#define PACKET_TAG_IPSEC_OUT_DONE 0x0002 /* IPsec applied, out */
#define PACKET_TAG_IPSEC_FLOWINFO 0x0004 /* IPsec flowinfo */
+#define PACKET_TAG_IP_OFFNXT 0x0010 /* IPv4 offset and next proto */
+#define PACKET_TAG_IP6_OFFNXT 0x0020 /* IPv6 offset and next proto */
#define PACKET_TAG_WIREGUARD 0x0040 /* WireGuard data */
#define PACKET_TAG_GRE 0x0080 /* GRE processing done */
#define PACKET_TAG_DLT 0x0100 /* data link layer type */
@@ -479,7 +481,6 @@ struct m_tag *m_tag_next(struct mbuf *,
#define PACKET_TAG_SRCROUTE 0x1000 /* IPv4 source routing options */
#define PACKET_TAG_TUNNEL 0x2000 /* Tunnel endpoint address */
#define PACKET_TAG_CARP_BAL_IP 0x4000 /* carp(4) ip balanced marker */
-#define PACKET_TAG_IP6_OFFNXT 0x8000 /* IPv6 offset and next proto */
#define MTAG_BITS \
("\20\1IPSEC_IN_DONE\2IPSEC_OUT_DONE\3IPSEC_FLOWINFO" \
Index: sys/protosw.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/protosw.h,v
retrieving revision 1.55
diff -u -p -r1.55 protosw.h
--- sys/protosw.h 5 Sep 2022 14:56:09 -0000 1.55
+++ sys/protosw.h 6 Sep 2022 19:39:24 -0000
@@ -128,6 +128,7 @@ struct protosw {
#define PR_ABRTACPTDIS 0x20 /* abort on accept(2) to
disconnected
socket */
#define PR_SPLICE 0x40 /* socket splicing is possible
*/
+#define PR_MPSAFE 0x80 /* input runs with shared
netlock */
/*
* The arguments to usrreq are: