On Sun, Aug 21, 2022 at 07:07:29PM +0200, Alexander Bluhm wrote:
> On Fri, Aug 19, 2022 at 10:54:42PM +0200, Alexander Bluhm wrote:
> > This diff allows to run udp_input() in parallel.
Diff rebased to -current.
Index: kern/uipc_socket.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.284
diff -u -p -r1.284 uipc_socket.c
--- kern/uipc_socket.c 21 Aug 2022 16:22:17 -0000 1.284
+++ kern/uipc_socket.c 22 Aug 2022 12:01:58 -0000
@@ -822,10 +822,10 @@ bad:
if (mp)
*mp = NULL;
- solock(so);
+ solock_shared(so);
restart:
if ((error = sblock(so, &so->so_rcv, SBLOCKWAIT(flags))) != 0) {
- sounlock(so);
+ sounlock_shared(so);
return (error);
}
@@ -893,7 +893,7 @@ restart:
sbunlock(so, &so->so_rcv);
error = sbwait(so, &so->so_rcv);
if (error) {
- sounlock(so);
+ sounlock_shared(so);
return (error);
}
goto restart;
@@ -962,11 +962,11 @@ dontblock:
sbsync(&so->so_rcv, nextrecord);
if (controlp) {
if (pr->pr_domain->dom_externalize) {
- sounlock(so);
+ sounlock_shared(so);
error =
(*pr->pr_domain->dom_externalize)
(cm, controllen, flags);
- solock(so);
+ solock_shared(so);
}
*controlp = cm;
} else {
@@ -1040,9 +1040,9 @@ dontblock:
SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
resid = uio->uio_resid;
- sounlock(so);
+ sounlock_shared(so);
uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio);
- solock(so);
+ solock_shared(so);
if (uio_error)
uio->uio_resid = resid - len;
} else
@@ -1126,7 +1126,7 @@ dontblock:
error = sbwait(so, &so->so_rcv);
if (error) {
sbunlock(so, &so->so_rcv);
- sounlock(so);
+ sounlock_shared(so);
return (0);
}
if ((m = so->so_rcv.sb_mb) != NULL)
@@ -1171,7 +1171,7 @@ dontblock:
*flagsp |= flags;
release:
sbunlock(so, &so->so_rcv);
- sounlock(so);
+ sounlock_shared(so);
return (error);
}
Index: kern/uipc_socket2.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.127
diff -u -p -r1.127 uipc_socket2.c
--- kern/uipc_socket2.c 13 Aug 2022 21:01:46 -0000 1.127
+++ kern/uipc_socket2.c 22 Aug 2022 12:01:58 -0000
@@ -360,6 +360,24 @@ solock(struct socket *so)
}
}
+void
+solock_shared(struct socket *so)
+{
+ switch (so->so_proto->pr_domain->dom_family) {
+ case PF_INET:
+ case PF_INET6:
+ if (so->so_proto->pr_usrreqs->pru_lock != NULL) {
+ NET_LOCK_SHARED();
+ pru_lock(so);
+ } else
+ NET_LOCK();
+ break;
+ default:
+ rw_enter_write(&so->so_lock);
+ break;
+ }
+}
+
int
solock_persocket(struct socket *so)
{
@@ -403,6 +421,24 @@ sounlock(struct socket *so)
}
void
+sounlock_shared(struct socket *so)
+{
+ switch (so->so_proto->pr_domain->dom_family) {
+ case PF_INET:
+ case PF_INET6:
+ if (so->so_proto->pr_usrreqs->pru_unlock != NULL) {
+ pru_unlock(so);
+ NET_UNLOCK_SHARED();
+ } else
+ NET_UNLOCK();
+ break;
+ default:
+ rw_exit_write(&so->so_lock);
+ break;
+ }
+}
+
+void
soassertlocked(struct socket *so)
{
switch (so->so_proto->pr_domain->dom_family) {
@@ -425,7 +461,15 @@ sosleep_nsec(struct socket *so, void *id
switch (so->so_proto->pr_domain->dom_family) {
case PF_INET:
case PF_INET6:
+ if (so->so_proto->pr_usrreqs->pru_unlock != NULL &&
+ rw_status(&netlock) == RW_READ) {
+ pru_unlock(so);
+ }
ret = rwsleep_nsec(ident, &netlock, prio, wmesg, nsecs);
+ if (so->so_proto->pr_usrreqs->pru_lock != NULL &&
+ rw_status(&netlock) == RW_READ) {
+ pru_lock(so);
+ }
break;
default:
ret = rwsleep_nsec(ident, &so->so_lock, prio, wmesg, nsecs);
Index: net/if_bridge.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.364
diff -u -p -r1.364 if_bridge.c
--- net/if_bridge.c 7 Aug 2022 00:57:43 -0000 1.364
+++ net/if_bridge.c 22 Aug 2022 12:01:58 -0000
@@ -1590,7 +1590,7 @@ bridge_ipsec(struct ifnet *ifp, struct e
off);
tdb_unref(tdb);
if (prot != IPPROTO_DONE)
- ip_deliver(&m, &hlen, prot, af);
+ ip_deliver(&m, &hlen, prot, af, 0);
return (1);
} else {
tdb_unref(tdb);
Index: netinet/in_proto.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_proto.c,v
retrieving revision 1.99
diff -u -p -r1.99 in_proto.c
--- netinet/in_proto.c 15 Aug 2022 09:11:38 -0000 1.99
+++ netinet/in_proto.c 22 Aug 2022 12:01:58 -0000
@@ -185,7 +185,7 @@ const struct protosw inetsw[] = {
.pr_type = SOCK_DGRAM,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_UDP,
- .pr_flags = PR_ATOMIC|PR_ADDR|PR_SPLICE,
+ .pr_flags = PR_ATOMIC|PR_ADDR|PR_SPLICE|PR_MPSAFE,
.pr_input = udp_input,
.pr_ctlinput = udp_ctlinput,
.pr_ctloutput = ip_ctloutput,
Index: netinet/ip_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.380
diff -u -p -r1.380 ip_input.c
--- netinet/ip_input.c 21 Aug 2022 14:15:55 -0000 1.380
+++ netinet/ip_input.c 22 Aug 2022 12:01:58 -0000
@@ -230,6 +230,11 @@ ip_init(void)
#endif
}
+struct ip_offnxt {
+ int ion_off;
+ int ion_nxt;
+};
+
/*
* Enqueue packet for local delivery. Queuing is used as a boundary
* between the network layer (input/forward path) running with
@@ -246,6 +251,30 @@ ip_ours(struct mbuf **mp, int *offp, int
if (af != AF_UNSPEC)
return nxt;
+ nxt = ip_deliver(mp, offp, nxt, AF_INET, 1);
+ if (nxt == IPPROTO_DONE)
+ return IPPROTO_DONE;
+
+ /* save values for later, use after dequeue */
+ if (*offp != sizeof(struct ip)) {
+ struct m_tag *mtag;
+ struct ip_offnxt *ion;
+
+ /* mbuf tags are expensive, but only used for header options */
+ mtag = m_tag_get(PACKET_TAG_IP_OFFNXT, sizeof(*ion),
+ M_NOWAIT);
+ if (mtag == NULL) {
+ ipstat_inc(ips_idropped);
+ m_freemp(mp);
+ return IPPROTO_DONE;
+ }
+ ion = (struct ip_offnxt *)(mtag + 1);
+ ion->ion_off = *offp;
+ ion->ion_nxt = nxt;
+
+ m_tag_prepend(*mp, mtag);
+ }
+
niq_enqueue(&ipintrq, *mp);
*mp = NULL;
return IPPROTO_DONE;
@@ -261,18 +290,31 @@ ipintr(void)
struct mbuf *m;
while ((m = niq_dequeue(&ipintrq)) != NULL) {
- struct ip *ip;
+ struct m_tag *mtag;
int off, nxt;
#ifdef DIAGNOSTIC
if ((m->m_flags & M_PKTHDR) == 0)
panic("ipintr no HDR");
#endif
- ip = mtod(m, struct ip *);
- off = ip->ip_hl << 2;
- nxt = ip->ip_p;
+ mtag = m_tag_find(m, PACKET_TAG_IP_OFFNXT, NULL);
+ if (mtag != NULL) {
+ struct ip_offnxt *ion;
+
+ ion = (struct ip_offnxt *)(mtag + 1);
+ off = ion->ion_off;
+ nxt = ion->ion_nxt;
+
+ m_tag_delete(m, mtag);
+ } else {
+ struct ip *ip;
- nxt = ip_deliver(&m, &off, nxt, AF_INET);
+ ip = mtod(m, struct ip *);
+ off = ip->ip_hl << 2;
+ nxt = ip->ip_p;
+ }
+
+ nxt = ip_deliver(&m, &off, nxt, AF_INET, 0);
KASSERT(nxt == IPPROTO_DONE);
}
}
@@ -673,7 +715,7 @@ ip_fragcheck(struct mbuf **mp, int *offp
#endif
int
-ip_deliver(struct mbuf **mp, int *offp, int nxt, int af)
+ip_deliver(struct mbuf **mp, int *offp, int nxt, int af, int shared)
{
const struct protosw *psw;
int naf = af;
@@ -681,26 +723,24 @@ ip_deliver(struct mbuf **mp, int *offp,
int nest = 0;
#endif /* INET6 */
- NET_ASSERT_LOCKED_EXCLUSIVE();
-
- /* pf might have modified stuff, might have to chksum */
- switch (af) {
- case AF_INET:
- in_proto_cksum_out(*mp, NULL);
- break;
-#ifdef INET6
- case AF_INET6:
- in6_proto_cksum_out(*mp, NULL);
- break;
-#endif /* INET6 */
- }
-
/*
* Tell launch routine the next header
*/
IPSTAT_INC(delivered);
while (nxt != IPPROTO_DONE) {
+ switch (af) {
+ case AF_INET:
+ psw = &inetsw[ip_protox[nxt]];
+ break;
+#ifdef INET6
+ case AF_INET6:
+ psw = &inet6sw[ip6_protox[nxt]];
+ break;
+#endif /* INET6 */
+ }
+ if (shared && !ISSET(psw->pr_flags, PR_MPSAFE))
+ break;
#ifdef INET6
if (af == AF_INET6 &&
ip6_hdrnestlimit && (++nest > ip6_hdrnestlimit)) {
@@ -737,16 +777,6 @@ ip_deliver(struct mbuf **mp, int *offp,
case IPPROTO_IPV6:
naf = AF_INET6;
ip6stat_inc(ip6s_delivered);
- break;
-#endif /* INET6 */
- }
- switch (af) {
- case AF_INET:
- psw = &inetsw[ip_protox[nxt]];
- break;
-#ifdef INET6
- case AF_INET6:
- psw = &inet6sw[ip6_protox[nxt]];
break;
#endif /* INET6 */
}
Index: netinet/ip_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v
retrieving revision 1.99
diff -u -p -r1.99 ip_var.h
--- netinet/ip_var.h 21 Aug 2022 22:45:55 -0000 1.99
+++ netinet/ip_var.h 22 Aug 2022 12:01:58 -0000
@@ -249,7 +249,7 @@ int ip_sysctl(int *, u_int, void *, siz
void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
struct mbuf *);
int ip_input_if(struct mbuf **, int *, int, int, struct ifnet *);
-int ip_deliver(struct mbuf **, int *, int, int);
+int ip_deliver(struct mbuf **, int *, int, int, int);
void ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int);
int rip_ctloutput(int, struct socket *, int, int, struct mbuf *);
void rip_init(void);
Index: netinet/udp_usrreq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v
retrieving revision 1.287
diff -u -p -r1.287 udp_usrreq.c
--- netinet/udp_usrreq.c 22 Aug 2022 10:37:27 -0000 1.287
+++ netinet/udp_usrreq.c 22 Aug 2022 12:01:58 -0000
@@ -122,10 +122,15 @@ u_int udp_sendspace = 9216; /* really m
u_int udp_recvspace = 40 * (1024 + sizeof(struct sockaddr_in));
/* 40 1K datagrams */
+void udp_lock(struct socket *);
+void udp_unlock(struct socket *);
+
const struct pr_usrreqs udp_usrreqs = {
.pru_usrreq = udp_usrreq,
.pru_attach = udp_attach,
.pru_detach = udp_detach,
+ .pru_lock = udp_lock,
+ .pru_unlock = udp_unlock,
.pru_bind = udp_bind,
.pru_connect = udp_connect,
};
@@ -653,12 +658,17 @@ udp_sbappend(struct inpcb *inp, struct m
}
#endif
m_adj(m, hlen);
+
+ mtx_enter(&inp->inp_mtx);
if (sbappendaddr(so, &so->so_rcv, srcaddr, m, opts) == 0) {
+ mtx_leave(&inp->inp_mtx);
udpstat_inc(udps_fullsock);
m_freem(m);
m_freem(opts);
return;
}
+ mtx_leave(&inp->inp_mtx);
+
sorwakeup(so);
}
@@ -1245,6 +1255,24 @@ udp_detach(struct socket *so)
in_pcbdetach(inp);
return (0);
+}
+
+void
+udp_lock(struct socket *so)
+{
+ struct inpcb *inp = sotoinpcb(so);
+
+ NET_ASSERT_LOCKED();
+ mtx_enter(&inp->inp_mtx);
+}
+
+void
+udp_unlock(struct socket *so)
+{
+ struct inpcb *inp = sotoinpcb(so);
+
+ NET_ASSERT_LOCKED();
+ mtx_leave(&inp->inp_mtx);
}
int
Index: netinet6/in6_proto.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_proto.c,v
retrieving revision 1.110
diff -u -p -r1.110 in6_proto.c
--- netinet6/in6_proto.c 15 Aug 2022 09:11:39 -0000 1.110
+++ netinet6/in6_proto.c 22 Aug 2022 12:01:58 -0000
@@ -136,7 +136,7 @@ const struct protosw inet6sw[] = {
.pr_type = SOCK_DGRAM,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_UDP,
- .pr_flags = PR_ATOMIC|PR_ADDR|PR_SPLICE,
+ .pr_flags = PR_ATOMIC|PR_ADDR|PR_SPLICE|PR_MPSAFE,
.pr_input = udp_input,
.pr_ctlinput = udp6_ctlinput,
.pr_ctloutput = ip6_ctloutput,
Index: netinet6/ip6_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.254
diff -u -p -r1.254 ip6_input.c
--- netinet6/ip6_input.c 21 Aug 2022 14:15:55 -0000 1.254
+++ netinet6/ip6_input.c 22 Aug 2022 12:01:58 -0000
@@ -190,6 +190,10 @@ ip6_ours(struct mbuf **mp, int *offp, in
if (af != AF_UNSPEC)
return nxt;
+ nxt = ip_deliver(mp, offp, nxt, AF_INET6, 1);
+ if (nxt == IPPROTO_DONE)
+ return IPPROTO_DONE;
+
/* save values for later, use after dequeue */
if (*offp != sizeof(struct ip6_hdr)) {
struct m_tag *mtag;
@@ -248,7 +252,7 @@ ip6intr(void)
off = sizeof(struct ip6_hdr);
nxt = ip6->ip6_nxt;
}
- nxt = ip_deliver(&m, &off, nxt, AF_INET6);
+ nxt = ip_deliver(&m, &off, nxt, AF_INET6, 0);
KASSERT(nxt == IPPROTO_DONE);
}
}
Index: sys/mbuf.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/mbuf.h,v
retrieving revision 1.255
diff -u -p -r1.255 mbuf.h
--- sys/mbuf.h 15 Aug 2022 16:15:37 -0000 1.255
+++ sys/mbuf.h 22 Aug 2022 12:01:58 -0000
@@ -471,6 +471,8 @@ struct m_tag *m_tag_next(struct mbuf *,
#define PACKET_TAG_IPSEC_IN_DONE 0x0001 /* IPsec applied, in */
#define PACKET_TAG_IPSEC_OUT_DONE 0x0002 /* IPsec applied, out */
#define PACKET_TAG_IPSEC_FLOWINFO 0x0004 /* IPsec flowinfo */
+#define PACKET_TAG_IP_OFFNXT 0x0010 /* IPv4 offset and next proto */
+#define PACKET_TAG_IP6_OFFNXT 0x0020 /* IPv6 offset and next proto */
#define PACKET_TAG_WIREGUARD 0x0040 /* WireGuard data */
#define PACKET_TAG_GRE 0x0080 /* GRE processing done */
#define PACKET_TAG_DLT 0x0100 /* data link layer type */
@@ -479,7 +481,6 @@ struct m_tag *m_tag_next(struct mbuf *,
#define PACKET_TAG_SRCROUTE 0x1000 /* IPv4 source routing options */
#define PACKET_TAG_TUNNEL 0x2000 /* Tunnel endpoint address */
#define PACKET_TAG_CARP_BAL_IP 0x4000 /* carp(4) ip balanced marker */
-#define PACKET_TAG_IP6_OFFNXT 0x8000 /* IPv6 offset and next proto */
#define MTAG_BITS \
("\20\1IPSEC_IN_DONE\2IPSEC_OUT_DONE\3IPSEC_FLOWINFO" \
Index: sys/protosw.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/protosw.h,v
retrieving revision 1.41
diff -u -p -r1.41 protosw.h
--- sys/protosw.h 22 Aug 2022 08:08:47 -0000 1.41
+++ sys/protosw.h 22 Aug 2022 12:01:58 -0000
@@ -66,6 +66,8 @@ struct pr_usrreqs {
int (*pru_attach)(struct socket *, int);
int (*pru_detach)(struct socket *);
+ void (*pru_lock)(struct socket *);
+ void (*pru_unlock)(struct socket *);
int (*pru_bind)(struct socket *, struct mbuf *, struct proc *);
int (*pru_listen)(struct socket *);
int (*pru_connect)(struct socket *, struct mbuf *);
@@ -116,6 +118,7 @@ struct protosw {
#define PR_ABRTACPTDIS 0x20 /* abort on accept(2) to
disconnected
socket */
#define PR_SPLICE 0x40 /* socket splicing is possible
*/
+#define PR_MPSAFE 0x80 /* input runs with shared
netlock */
/*
* The arguments to usrreq are:
@@ -263,6 +266,18 @@ static inline int
pru_detach(struct socket *so)
{
return (*so->so_proto->pr_usrreqs->pru_detach)(so);
+}
+
+static inline void
+pru_lock(struct socket *so)
+{
+ (*so->so_proto->pr_usrreqs->pru_lock)(so);
+}
+
+static inline void
+pru_unlock(struct socket *so)
+{
+ (*so->so_proto->pr_usrreqs->pru_unlock)(so);
}
static inline int
Index: sys/socketvar.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.108
diff -u -p -r1.108 socketvar.h
--- sys/socketvar.h 21 Aug 2022 16:22:18 -0000 1.108
+++ sys/socketvar.h 22 Aug 2022 12:01:58 -0000
@@ -349,9 +349,11 @@ int sockargs(struct mbuf **, const void
int sosleep_nsec(struct socket *, void *, int, const char *, uint64_t);
void solock(struct socket *);
+void solock_shared(struct socket *);
int solock_persocket(struct socket *);
void solock_pair(struct socket *, struct socket *);
void sounlock(struct socket *);
+void sounlock_shared(struct socket *);
int sendit(struct proc *, int, struct msghdr *, int, register_t *);
int recvit(struct proc *, int, struct msghdr *, caddr_t, register_t *);