On Fri, Aug 19, 2022 at 10:54:42PM +0200, Alexander Bluhm wrote:
> This diff allows to run udp_input() in parallel. It consists of
> three major parts.
>
> - Use PR_MPSAFE flag to protocol deliver loop with shared
> netlock. Queue packet and switch to deliver loop with exclusive
> netlock, of a protocol is not MP safe.
>
> - Use a rwlock to protect the inp_notify field. As ip_output()
> may be called in in_pcbnotifyall() and may sleep in pflock, we
> need a sleeping lock.
>
> - Use a mutex at the inpcb to protect the recv socket buffer.
>
> Before commiting I will split the diff in parts. Just showing what
> I have now.
Parts of it are commited. Rebased to -current.
Now I also use shared net lock in soreceive().
This diff is mainly for testing, not to commit yet.
bluhm
Index: kern/uipc_socket.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.283
diff -u -p -r1.283 uipc_socket.c
--- kern/uipc_socket.c 15 Aug 2022 09:11:38 -0000 1.283
+++ kern/uipc_socket.c 21 Aug 2022 15:09:30 -0000
@@ -823,10 +823,10 @@ bad:
if (mp)
*mp = NULL;
- solock(so);
+ solock_shared(so);
restart:
if ((error = sblock(so, &so->so_rcv, SBLOCKWAIT(flags))) != 0) {
- sounlock(so);
+ sounlock_shared(so);
return (error);
}
@@ -894,7 +894,7 @@ restart:
sbunlock(so, &so->so_rcv);
error = sbwait(so, &so->so_rcv);
if (error) {
- sounlock(so);
+ sounlock_shared(so);
return (error);
}
goto restart;
@@ -963,11 +963,11 @@ dontblock:
sbsync(&so->so_rcv, nextrecord);
if (controlp) {
if (pr->pr_domain->dom_externalize) {
- sounlock(so);
+ sounlock_shared(so);
error =
(*pr->pr_domain->dom_externalize)
(cm, controllen, flags);
- solock(so);
+ solock_shared(so);
}
*controlp = cm;
} else {
@@ -1041,9 +1041,9 @@ dontblock:
SBLASTRECORDCHK(&so->so_rcv, "soreceive uiomove");
SBLASTMBUFCHK(&so->so_rcv, "soreceive uiomove");
resid = uio->uio_resid;
- sounlock(so);
+ sounlock_shared(so);
uio_error = uiomove(mtod(m, caddr_t) + moff, len, uio);
- solock(so);
+ solock_shared(so);
if (uio_error)
uio->uio_resid = resid - len;
} else
@@ -1127,7 +1127,7 @@ dontblock:
error = sbwait(so, &so->so_rcv);
if (error) {
sbunlock(so, &so->so_rcv);
- sounlock(so);
+ sounlock_shared(so);
return (0);
}
if ((m = so->so_rcv.sb_mb) != NULL)
@@ -1172,7 +1172,7 @@ dontblock:
*flagsp |= flags;
release:
sbunlock(so, &so->so_rcv);
- sounlock(so);
+ sounlock_shared(so);
return (error);
}
Index: kern/uipc_socket2.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.127
diff -u -p -r1.127 uipc_socket2.c
--- kern/uipc_socket2.c 13 Aug 2022 21:01:46 -0000 1.127
+++ kern/uipc_socket2.c 21 Aug 2022 15:09:30 -0000
@@ -360,6 +360,24 @@ solock(struct socket *so)
}
}
+void
+solock_shared(struct socket *so)
+{
+ switch (so->so_proto->pr_domain->dom_family) {
+ case PF_INET:
+ case PF_INET6:
+ if (so->so_proto->pr_usrreqs->pru_lock != NULL) {
+ NET_LOCK_SHARED();
+ pru_lock(so);
+ } else
+ NET_LOCK();
+ break;
+ default:
+ rw_enter_write(&so->so_lock);
+ break;
+ }
+}
+
int
solock_persocket(struct socket *so)
{
@@ -403,6 +421,24 @@ sounlock(struct socket *so)
}
void
+sounlock_shared(struct socket *so)
+{
+ switch (so->so_proto->pr_domain->dom_family) {
+ case PF_INET:
+ case PF_INET6:
+ if (so->so_proto->pr_usrreqs->pru_unlock != NULL) {
+ pru_unlock(so);
+ NET_UNLOCK_SHARED();
+ } else
+ NET_UNLOCK();
+ break;
+ default:
+ rw_exit_write(&so->so_lock);
+ break;
+ }
+}
+
+void
soassertlocked(struct socket *so)
{
switch (so->so_proto->pr_domain->dom_family) {
@@ -425,7 +461,15 @@ sosleep_nsec(struct socket *so, void *id
switch (so->so_proto->pr_domain->dom_family) {
case PF_INET:
case PF_INET6:
+ if (so->so_proto->pr_usrreqs->pru_unlock != NULL &&
+ rw_status(&netlock) == RW_READ) {
+ pru_unlock(so);
+ }
ret = rwsleep_nsec(ident, &netlock, prio, wmesg, nsecs);
+ if (so->so_proto->pr_usrreqs->pru_lock != NULL &&
+ rw_status(&netlock) == RW_READ) {
+ pru_lock(so);
+ }
break;
default:
ret = rwsleep_nsec(ident, &so->so_lock, prio, wmesg, nsecs);
Index: net/if_bridge.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if_bridge.c,v
retrieving revision 1.364
diff -u -p -r1.364 if_bridge.c
--- net/if_bridge.c 7 Aug 2022 00:57:43 -0000 1.364
+++ net/if_bridge.c 21 Aug 2022 15:06:27 -0000
@@ -1590,7 +1590,7 @@ bridge_ipsec(struct ifnet *ifp, struct e
off);
tdb_unref(tdb);
if (prot != IPPROTO_DONE)
- ip_deliver(&m, &hlen, prot, af);
+ ip_deliver(&m, &hlen, prot, af, 0);
return (1);
} else {
tdb_unref(tdb);
Index: netinet/in_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
retrieving revision 1.271
diff -u -p -r1.271 in_pcb.c
--- netinet/in_pcb.c 21 Aug 2022 11:44:53 -0000 1.271
+++ netinet/in_pcb.c 21 Aug 2022 15:06:27 -0000
@@ -175,6 +175,7 @@ void
in_pcbinit(struct inpcbtable *table, int hashsize)
{
mtx_init(&table->inpt_mtx, IPL_SOFTNET);
+ rw_init(&table->inpt_notify, "inpnotify");
TAILQ_INIT(&table->inpt_queue);
table->inpt_hashtbl = hashinit(hashsize, M_PCB, M_WAITOK,
&table->inpt_mask);
@@ -696,8 +697,6 @@ in_pcbnotifyall(struct inpcbtable *table
struct in_addr faddr;
u_int rdomain;
- NET_ASSERT_LOCKED_EXCLUSIVE();
-
if (dst->sa_family != AF_INET)
return;
faddr = satosin(dst)->sin_addr;
@@ -708,6 +707,7 @@ in_pcbnotifyall(struct inpcbtable *table
SIMPLEQ_INIT(&inpcblist);
rdomain = rtable_l2(rtable);
+ rw_enter_write(&table->inpt_notify);
mtx_enter(&table->inpt_mtx);
TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
#ifdef INET6
@@ -729,6 +729,7 @@ in_pcbnotifyall(struct inpcbtable *table
(*notify)(inp, errno);
in_pcbunref(inp);
}
+ rw_exit_write(&table->inpt_notify);
}
/*
Index: netinet/in_pcb.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.h,v
retrieving revision 1.130
diff -u -p -r1.130 in_pcb.h
--- netinet/in_pcb.h 21 Aug 2022 11:44:53 -0000 1.130
+++ netinet/in_pcb.h 21 Aug 2022 15:06:27 -0000
@@ -66,6 +66,7 @@
#include <sys/queue.h>
#include <sys/mutex.h>
+#include <sys/rwlock.h>
#include <sys/refcnt.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
@@ -79,6 +80,7 @@
* I immutable after creation
* N net lock
* t inpt_mtx pcb table mutex
+ * y inpt_notify pcb table rwlock for notify
* p inpcb_mtx pcb mutex
*/
@@ -103,7 +105,7 @@ struct inpcb {
LIST_ENTRY(inpcb) inp_hash; /* [t] local and foreign hash */
LIST_ENTRY(inpcb) inp_lhash; /* [t] local port hash */
TAILQ_ENTRY(inpcb) inp_queue; /* [t] inet PCB queue */
- SIMPLEQ_ENTRY(inpcb) inp_notify; /* [N] notify or udp append */
+ SIMPLEQ_ENTRY(inpcb) inp_notify; /* [y] notify or udp append */
struct inpcbtable *inp_table; /* [I] inet queue/hash table */
union inpaddru inp_faddru; /* Foreign address. */
union inpaddru inp_laddru; /* Local address. */
@@ -166,6 +168,7 @@ LIST_HEAD(inpcbhead, inpcb);
struct inpcbtable {
struct mutex inpt_mtx; /* protect queue and hash */
+ struct rwlock inpt_notify; /* protect inp_notify list */
TAILQ_HEAD(inpthead, inpcb) inpt_queue; /* [t] inet PCB queue */
struct inpcbhead *inpt_hashtbl; /* [t] local and foreign hash */
struct inpcbhead *inpt_lhashtbl; /* [t] local port hash */
Index: netinet/in_proto.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_proto.c,v
retrieving revision 1.99
diff -u -p -r1.99 in_proto.c
--- netinet/in_proto.c 15 Aug 2022 09:11:38 -0000 1.99
+++ netinet/in_proto.c 21 Aug 2022 15:06:27 -0000
@@ -185,7 +185,7 @@ const struct protosw inetsw[] = {
.pr_type = SOCK_DGRAM,
.pr_domain = &inetdomain,
.pr_protocol = IPPROTO_UDP,
- .pr_flags = PR_ATOMIC|PR_ADDR|PR_SPLICE,
+ .pr_flags = PR_ATOMIC|PR_ADDR|PR_SPLICE|PR_MPSAFE,
.pr_input = udp_input,
.pr_ctlinput = udp_ctlinput,
.pr_ctloutput = ip_ctloutput,
Index: netinet/ip_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_input.c,v
retrieving revision 1.380
diff -u -p -r1.380 ip_input.c
--- netinet/ip_input.c 21 Aug 2022 14:15:55 -0000 1.380
+++ netinet/ip_input.c 21 Aug 2022 15:06:27 -0000
@@ -230,6 +230,11 @@ ip_init(void)
#endif
}
+struct ip_offnxt {
+ int ion_off;
+ int ion_nxt;
+};
+
/*
* Enqueue packet for local delivery. Queuing is used as a boundary
* between the network layer (input/forward path) running with
@@ -246,6 +251,30 @@ ip_ours(struct mbuf **mp, int *offp, int
if (af != AF_UNSPEC)
return nxt;
+ nxt = ip_deliver(mp, offp, nxt, AF_INET, 1);
+ if (nxt == IPPROTO_DONE)
+ return IPPROTO_DONE;
+
+ /* save values for later, use after dequeue */
+ if (*offp != sizeof(struct ip)) {
+ struct m_tag *mtag;
+ struct ip_offnxt *ion;
+
+ /* mbuf tags are expensive, but only used for header options */
+ mtag = m_tag_get(PACKET_TAG_IP_OFFNXT, sizeof(*ion),
+ M_NOWAIT);
+ if (mtag == NULL) {
+ ipstat_inc(ips_idropped);
+ m_freemp(mp);
+ return IPPROTO_DONE;
+ }
+ ion = (struct ip_offnxt *)(mtag + 1);
+ ion->ion_off = *offp;
+ ion->ion_nxt = nxt;
+
+ m_tag_prepend(*mp, mtag);
+ }
+
niq_enqueue(&ipintrq, *mp);
*mp = NULL;
return IPPROTO_DONE;
@@ -261,18 +290,31 @@ ipintr(void)
struct mbuf *m;
while ((m = niq_dequeue(&ipintrq)) != NULL) {
- struct ip *ip;
+ struct m_tag *mtag;
int off, nxt;
#ifdef DIAGNOSTIC
if ((m->m_flags & M_PKTHDR) == 0)
panic("ipintr no HDR");
#endif
- ip = mtod(m, struct ip *);
- off = ip->ip_hl << 2;
- nxt = ip->ip_p;
+ mtag = m_tag_find(m, PACKET_TAG_IP_OFFNXT, NULL);
+ if (mtag != NULL) {
+ struct ip_offnxt *ion;
+
+ ion = (struct ip_offnxt *)(mtag + 1);
+ off = ion->ion_off;
+ nxt = ion->ion_nxt;
+
+ m_tag_delete(m, mtag);
+ } else {
+ struct ip *ip;
- nxt = ip_deliver(&m, &off, nxt, AF_INET);
+ ip = mtod(m, struct ip *);
+ off = ip->ip_hl << 2;
+ nxt = ip->ip_p;
+ }
+
+ nxt = ip_deliver(&m, &off, nxt, AF_INET, 0);
KASSERT(nxt == IPPROTO_DONE);
}
}
@@ -673,7 +715,7 @@ ip_fragcheck(struct mbuf **mp, int *offp
#endif
int
-ip_deliver(struct mbuf **mp, int *offp, int nxt, int af)
+ip_deliver(struct mbuf **mp, int *offp, int nxt, int af, int shared)
{
const struct protosw *psw;
int naf = af;
@@ -681,26 +723,24 @@ ip_deliver(struct mbuf **mp, int *offp,
int nest = 0;
#endif /* INET6 */
- NET_ASSERT_LOCKED_EXCLUSIVE();
-
- /* pf might have modified stuff, might have to chksum */
- switch (af) {
- case AF_INET:
- in_proto_cksum_out(*mp, NULL);
- break;
-#ifdef INET6
- case AF_INET6:
- in6_proto_cksum_out(*mp, NULL);
- break;
-#endif /* INET6 */
- }
-
/*
* Tell launch routine the next header
*/
IPSTAT_INC(delivered);
while (nxt != IPPROTO_DONE) {
+ switch (af) {
+ case AF_INET:
+ psw = &inetsw[ip_protox[nxt]];
+ break;
+#ifdef INET6
+ case AF_INET6:
+ psw = &inet6sw[ip6_protox[nxt]];
+ break;
+#endif /* INET6 */
+ }
+ if (shared && !ISSET(psw->pr_flags, PR_MPSAFE))
+ break;
#ifdef INET6
if (af == AF_INET6 &&
ip6_hdrnestlimit && (++nest > ip6_hdrnestlimit)) {
@@ -737,16 +777,6 @@ ip_deliver(struct mbuf **mp, int *offp,
case IPPROTO_IPV6:
naf = AF_INET6;
ip6stat_inc(ip6s_delivered);
- break;
-#endif /* INET6 */
- }
- switch (af) {
- case AF_INET:
- psw = &inetsw[ip_protox[nxt]];
- break;
-#ifdef INET6
- case AF_INET6:
- psw = &inet6sw[ip6_protox[nxt]];
break;
#endif /* INET6 */
}
Index: netinet/ip_var.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/ip_var.h,v
retrieving revision 1.98
diff -u -p -r1.98 ip_var.h
--- netinet/ip_var.h 20 Aug 2022 23:48:58 -0000 1.98
+++ netinet/ip_var.h 21 Aug 2022 15:06:27 -0000
@@ -249,7 +249,7 @@ int ip_sysctl(int *, u_int, void *, siz
void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *,
struct mbuf *);
int ip_input_if(struct mbuf **, int *, int, int, struct ifnet *);
-int ip_deliver(struct mbuf **, int *, int, int);
+int ip_deliver(struct mbuf **, int *, int, int, int);
void ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int);
int rip_ctloutput(int, struct socket *, int, int, struct mbuf *);
void rip_init(void);
Index: netinet/raw_ip.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/raw_ip.c,v
retrieving revision 1.131
diff -u -p -r1.131 raw_ip.c
--- netinet/raw_ip.c 20 Aug 2022 23:48:58 -0000 1.131
+++ netinet/raw_ip.c 21 Aug 2022 15:06:27 -0000
@@ -159,8 +159,8 @@ rip_input(struct mbuf **mp, int *offp, i
}
}
#endif
- NET_ASSERT_LOCKED_EXCLUSIVE();
SIMPLEQ_INIT(&inpcblist);
+ rw_enter_write(&rawcbtable.inpt_notify);
mtx_enter(&rawcbtable.inpt_mtx);
TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
if (inp->inp_socket->so_state & SS_CANTRCVMORE)
@@ -188,6 +188,8 @@ rip_input(struct mbuf **mp, int *offp, i
mtx_leave(&rawcbtable.inpt_mtx);
if (SIMPLEQ_EMPTY(&inpcblist)) {
+ rw_exit_write(&rawcbtable.inpt_notify);
+
if (ip->ip_p != IPPROTO_ICMP)
icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_PROTOCOL,
0, 0);
@@ -198,6 +200,8 @@ rip_input(struct mbuf **mp, int *offp, i
counters[ips_noproto]++;
counters[ips_delivered]--;
counters_leave(&ref, ipcounters);
+
+ return IPPROTO_DONE;
}
while ((inp = SIMPLEQ_FIRST(&inpcblist)) != NULL) {
@@ -223,6 +227,8 @@ rip_input(struct mbuf **mp, int *offp, i
}
in_pcbunref(inp);
}
+ rw_exit_write(&rawcbtable.inpt_notify);
+
return IPPROTO_DONE;
}
Index: netinet/udp_usrreq.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v
retrieving revision 1.283
diff -u -p -r1.283 udp_usrreq.c
--- netinet/udp_usrreq.c 20 Aug 2022 23:48:58 -0000 1.283
+++ netinet/udp_usrreq.c 21 Aug 2022 15:23:21 -0000
@@ -122,10 +122,15 @@ u_int udp_sendspace = 9216; /* really m
u_int udp_recvspace = 40 * (1024 + sizeof(struct sockaddr_in));
/* 40 1K datagrams */
+void udp_lock(struct socket *);
+void udp_unlock(struct socket *);
+
const struct pr_usrreqs udp_usrreqs = {
.pru_usrreq = udp_usrreq,
.pru_attach = udp_attach,
.pru_detach = udp_detach,
+ .pru_lock = udp_lock,
+ .pru_unlock = udp_unlock,
.pru_bind = udp_bind,
};
@@ -371,8 +376,8 @@ udp_input(struct mbuf **mp, int *offp, i
* Locate pcb(s) for datagram.
* (Algorithm copied from raw_intr().)
*/
- NET_ASSERT_LOCKED_EXCLUSIVE();
SIMPLEQ_INIT(&inpcblist);
+ rw_enter_write(&udbtable.inpt_notify);
mtx_enter(&udbtable.inpt_mtx);
TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) {
if (inp->inp_socket->so_state & SS_CANTRCVMORE)
@@ -445,6 +450,7 @@ udp_input(struct mbuf **mp, int *offp, i
mtx_leave(&udbtable.inpt_mtx);
if (SIMPLEQ_EMPTY(&inpcblist)) {
+ rw_exit_write(&udbtable.inpt_notify);
/*
* No matching pcb found; discard datagram.
* (No need to send an ICMP Port Unreachable
@@ -468,6 +474,8 @@ udp_input(struct mbuf **mp, int *offp, i
}
in_pcbunref(inp);
}
+ rw_exit_write(&udbtable.inpt_notify);
+
return IPPROTO_DONE;
}
/*
@@ -648,12 +656,17 @@ udp_sbappend(struct inpcb *inp, struct m
}
#endif
m_adj(m, hlen);
+
+ mtx_enter(&inp->inp_mtx);
if (sbappendaddr(so, &so->so_rcv, srcaddr, m, opts) == 0) {
+ mtx_leave(&inp->inp_mtx);
udpstat_inc(udps_fullsock);
m_freem(m);
m_freem(opts);
return;
}
+ mtx_leave(&inp->inp_mtx);
+
sorwakeup(so);
}
@@ -1270,6 +1283,24 @@ udp_detach(struct socket *so)
in_pcbdetach(inp);
return (0);
+}
+
+void
+udp_lock(struct socket *so)
+{
+ struct inpcb *inp = sotoinpcb(so);
+
+ NET_ASSERT_LOCKED();
+ mtx_enter(&inp->inp_mtx);
+}
+
+void
+udp_unlock(struct socket *so)
+{
+ struct inpcb *inp = sotoinpcb(so);
+
+ NET_ASSERT_LOCKED();
+ mtx_leave(&inp->inp_mtx);
}
int
Index: netinet6/in6_pcb.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_pcb.c,v
retrieving revision 1.119
diff -u -p -r1.119 in6_pcb.c
--- netinet6/in6_pcb.c 8 Aug 2022 12:06:31 -0000 1.119
+++ netinet6/in6_pcb.c 21 Aug 2022 15:06:27 -0000
@@ -387,8 +387,6 @@ in6_pcbnotify(struct inpcbtable *table,
u_int32_t flowinfo;
u_int rdomain;
- NET_ASSERT_LOCKED_EXCLUSIVE();
-
if ((unsigned)cmd >= PRC_NCMDS)
return;
@@ -430,6 +428,7 @@ in6_pcbnotify(struct inpcbtable *table,
SIMPLEQ_INIT(&inpcblist);
rdomain = rtable_l2(rtable);
+ rw_enter_write(&table->inpt_notify);
mtx_enter(&table->inpt_mtx);
TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
if ((inp->inp_flags & INP_IPV6) == 0)
@@ -513,6 +512,7 @@ in6_pcbnotify(struct inpcbtable *table,
(*notify)(inp, errno);
in_pcbunref(inp);
}
+ rw_exit_write(&table->inpt_notify);
}
struct inpcb *
Index: netinet6/in6_proto.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/in6_proto.c,v
retrieving revision 1.110
diff -u -p -r1.110 in6_proto.c
--- netinet6/in6_proto.c 15 Aug 2022 09:11:39 -0000 1.110
+++ netinet6/in6_proto.c 21 Aug 2022 15:06:27 -0000
@@ -136,7 +136,7 @@ const struct protosw inet6sw[] = {
.pr_type = SOCK_DGRAM,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_UDP,
- .pr_flags = PR_ATOMIC|PR_ADDR|PR_SPLICE,
+ .pr_flags = PR_ATOMIC|PR_ADDR|PR_SPLICE|PR_MPSAFE,
.pr_input = udp_input,
.pr_ctlinput = udp6_ctlinput,
.pr_ctloutput = ip6_ctloutput,
Index: netinet6/ip6_input.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/ip6_input.c,v
retrieving revision 1.254
diff -u -p -r1.254 ip6_input.c
--- netinet6/ip6_input.c 21 Aug 2022 14:15:55 -0000 1.254
+++ netinet6/ip6_input.c 21 Aug 2022 15:06:27 -0000
@@ -190,6 +190,10 @@ ip6_ours(struct mbuf **mp, int *offp, in
if (af != AF_UNSPEC)
return nxt;
+ nxt = ip_deliver(mp, offp, nxt, AF_INET6, 1);
+ if (nxt == IPPROTO_DONE)
+ return IPPROTO_DONE;
+
/* save values for later, use after dequeue */
if (*offp != sizeof(struct ip6_hdr)) {
struct m_tag *mtag;
@@ -248,7 +252,7 @@ ip6intr(void)
off = sizeof(struct ip6_hdr);
nxt = ip6->ip6_nxt;
}
- nxt = ip_deliver(&m, &off, nxt, AF_INET6);
+ nxt = ip_deliver(&m, &off, nxt, AF_INET6, 0);
KASSERT(nxt == IPPROTO_DONE);
}
}
Index: netinet6/raw_ip6.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/raw_ip6.c,v
retrieving revision 1.151
diff -u -p -r1.151 raw_ip6.c
--- netinet6/raw_ip6.c 20 Aug 2022 23:48:58 -0000 1.151
+++ netinet6/raw_ip6.c 21 Aug 2022 15:06:27 -0000
@@ -171,8 +171,8 @@ rip6_input(struct mbuf **mp, int *offp,
}
}
#endif
- NET_ASSERT_LOCKED_EXCLUSIVE();
SIMPLEQ_INIT(&inpcblist);
+ rw_enter_write(&rawin6pcbtable.inpt_notify);
mtx_enter(&rawin6pcbtable.inpt_mtx);
TAILQ_FOREACH(in6p, &rawin6pcbtable.inpt_queue, inp_queue) {
if (in6p->inp_socket->so_state & SS_CANTRCVMORE)
@@ -223,6 +223,8 @@ rip6_input(struct mbuf **mp, int *offp,
struct counters_ref ref;
uint64_t *counters;
+ rw_exit_write(&rawin6pcbtable.inpt_notify);
+
if (proto != IPPROTO_ICMPV6) {
rip6stat_inc(rip6s_nosock);
if (m->m_flags & M_MCAST)
@@ -239,6 +241,8 @@ rip6_input(struct mbuf **mp, int *offp,
counters = counters_enter(&ref, ip6counters);
counters[ip6s_delivered]--;
counters_leave(&ref, ip6counters);
+
+ return IPPROTO_DONE;
}
while ((in6p = SIMPLEQ_FIRST(&inpcblist)) != NULL) {
@@ -266,6 +270,8 @@ rip6_input(struct mbuf **mp, int *offp,
}
in_pcbunref(in6p);
}
+ rw_exit_write(&rawin6pcbtable.inpt_notify);
+
return IPPROTO_DONE;
}
Index: sys/mbuf.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/mbuf.h,v
retrieving revision 1.255
diff -u -p -r1.255 mbuf.h
--- sys/mbuf.h 15 Aug 2022 16:15:37 -0000 1.255
+++ sys/mbuf.h 21 Aug 2022 15:06:27 -0000
@@ -471,6 +471,8 @@ struct m_tag *m_tag_next(struct mbuf *,
#define PACKET_TAG_IPSEC_IN_DONE 0x0001 /* IPsec applied, in */
#define PACKET_TAG_IPSEC_OUT_DONE 0x0002 /* IPsec applied, out */
#define PACKET_TAG_IPSEC_FLOWINFO 0x0004 /* IPsec flowinfo */
+#define PACKET_TAG_IP_OFFNXT 0x0010 /* IPv4 offset and next proto */
+#define PACKET_TAG_IP6_OFFNXT 0x0020 /* IPv6 offset and next proto */
#define PACKET_TAG_WIREGUARD 0x0040 /* WireGuard data */
#define PACKET_TAG_GRE 0x0080 /* GRE processing done */
#define PACKET_TAG_DLT 0x0100 /* data link layer type */
@@ -479,7 +481,6 @@ struct m_tag *m_tag_next(struct mbuf *,
#define PACKET_TAG_SRCROUTE 0x1000 /* IPv4 source routing options */
#define PACKET_TAG_TUNNEL 0x2000 /* Tunnel endpoint address */
#define PACKET_TAG_CARP_BAL_IP 0x4000 /* carp(4) ip balanced marker */
-#define PACKET_TAG_IP6_OFFNXT 0x8000 /* IPv6 offset and next proto */
#define MTAG_BITS \
("\20\1IPSEC_IN_DONE\2IPSEC_OUT_DONE\3IPSEC_FLOWINFO" \
Index: sys/protosw.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/protosw.h,v
retrieving revision 1.38
diff -u -p -r1.38 protosw.h
--- sys/protosw.h 20 Aug 2022 23:48:58 -0000 1.38
+++ sys/protosw.h 21 Aug 2022 15:14:59 -0000
@@ -66,6 +66,8 @@ struct pr_usrreqs {
int (*pru_attach)(struct socket *, int);
int (*pru_detach)(struct socket *);
+ void (*pru_lock)(struct socket *);
+ void (*pru_unlock)(struct socket *);
int (*pru_bind)(struct socket *, struct mbuf *, struct proc *);
};
@@ -113,6 +115,7 @@ struct protosw {
#define PR_ABRTACPTDIS 0x20 /* abort on accept(2) to
disconnected
socket */
#define PR_SPLICE 0x40 /* socket splicing is possible
*/
+#define PR_MPSAFE 0x80 /* input runs with shared
netlock */
/*
* The arguments to usrreq are:
@@ -260,6 +263,18 @@ static inline int
pru_detach(struct socket *so)
{
return (*so->so_proto->pr_usrreqs->pru_detach)(so);
+}
+
+static inline void
+pru_lock(struct socket *so)
+{
+ (*so->so_proto->pr_usrreqs->pru_lock)(so);
+}
+
+static inline void
+pru_unlock(struct socket *so)
+{
+ (*so->so_proto->pr_usrreqs->pru_unlock)(so);
}
static inline int
Index: sys/socketvar.h
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/sys/socketvar.h,v
retrieving revision 1.107
diff -u -p -r1.107 socketvar.h
--- sys/socketvar.h 13 Aug 2022 21:01:46 -0000 1.107
+++ sys/socketvar.h 21 Aug 2022 15:09:30 -0000
@@ -349,9 +349,11 @@ int sockargs(struct mbuf **, const void
int sosleep_nsec(struct socket *, void *, int, const char *, uint64_t);
void solock(struct socket *);
+void solock_shared(struct socket *);
int solock_persocket(struct socket *);
void solock_pair(struct socket *, struct socket *);
void sounlock(struct socket *);
+void sounlock_shared(struct socket *);
int sendit(struct proc *, int, struct msghdr *, int, register_t *);
int recvit(struct proc *, int, struct msghdr *, caddr_t, register_t *);