On Thu, Jun 30, 2022 at 11:08:48AM +0200, Claudio Jeker wrote: > This diff converts the SRP list to a SMR list in rtsock.c > SRP is a bit strange with how it works and the SMR code is a bit easier to > understand. Since we can sleep in the SMR_TAILQ_FOREACH() we need to grab > a refcount on the route pcb so that we can leave the SMR critical section > and then enter the SMR critical section at the end of the loop before > dropping the refcount again. > > The diff does not immeditaly explode but I doubt we can exploit > parallelism in route_input() so this may fail at some later stage if it is > wrong. > > Comments from the lock critics welcome
After discussing this with mpi@ and jmatthew@ we came to the conclusion that we need to smr_barrier() before refcnt_finalize() to ensure that no other CPU is between the SMR_TAILQ_FOREACH, refcnt_take() and smr_read_leave(). Updated diff below -- :wq Claudio Index: net/rtsock.c =================================================================== RCS file: /cvs/src/sys/net/rtsock.c,v retrieving revision 1.334 diff -u -p -r1.334 rtsock.c --- net/rtsock.c 28 Jun 2022 10:01:13 -0000 1.334 +++ net/rtsock.c 30 Jun 2022 09:25:53 -0000 @@ -71,7 +71,7 @@ #include <sys/domain.h> #include <sys/pool.h> #include <sys/protosw.h> -#include <sys/srp.h> +#include <sys/smr.h> #include <net/if.h> #include <net/if_dl.h> @@ -107,8 +107,6 @@ struct walkarg { }; void route_prinit(void); -void rcb_ref(void *, void *); -void rcb_unref(void *, void *); int route_output(struct mbuf *, struct socket *, struct sockaddr *, struct mbuf *); int route_ctloutput(int, struct socket *, int, int, struct mbuf *); @@ -149,7 +147,7 @@ int rt_setsource(unsigned int, struct struct rtpcb { struct socket *rop_socket; /* [I] */ - SRPL_ENTRY(rtpcb) rop_list; + SMR_TAILQ_ENTRY(rtpcb) rop_list; struct refcnt rop_refcnt; struct timeout rop_timeout; unsigned int rop_msgfilter; /* [s] */ @@ -162,8 +160,7 @@ struct rtpcb { #define sotortpcb(so) ((struct rtpcb *)(so)->so_pcb) struct rtptable { - SRPL_HEAD(, rtpcb) rtp_list; - struct srpl_rc rtp_rc; + SMR_TAILQ_HEAD(, rtpcb) rtp_list; struct rwlock rtp_lk; unsigned int rtp_count; }; @@ -185,29 +182,12 @@ struct rtptable rtptable; void route_prinit(void) { - srpl_rc_init(&rtptable.rtp_rc, rcb_ref, rcb_unref, NULL); rw_init(&rtptable.rtp_lk, "rtsock"); - SRPL_INIT(&rtptable.rtp_list); + SMR_TAILQ_INIT(&rtptable.rtp_list); pool_init(&rtpcb_pool, sizeof(struct rtpcb), 0, IPL_SOFTNET, PR_WAITOK, "rtpcb", NULL); } -void -rcb_ref(void *null, void *v) -{ - struct rtpcb *rop = v; - - refcnt_take(&rop->rop_refcnt); -} - -void -rcb_unref(void *null, void *v) -{ - struct rtpcb *rop = v; - - refcnt_rele_wake(&rop->rop_refcnt); -} - int route_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, struct mbuf *control, struct proc *p) @@ -325,8 +305,7 @@ route_attach(struct socket *so, int prot so->so_options |= SO_USELOOPBACK; rw_enter(&rtptable.rtp_lk, RW_WRITE); - SRPL_INSERT_HEAD_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, - rop_list); + SMR_TAILQ_INSERT_HEAD_LOCKED(&rtptable.rtp_list, rop, rop_list); rtptable.rtp_count++; rw_exit(&rtptable.rtp_lk); @@ -347,13 +326,13 @@ route_detach(struct socket *so) rw_enter(&rtptable.rtp_lk, RW_WRITE); rtptable.rtp_count--; - SRPL_REMOVE_LOCKED(&rtptable.rtp_rc, &rtptable.rtp_list, rop, rtpcb, - rop_list); + SMR_TAILQ_REMOVE_LOCKED(&rtptable.rtp_list, rop, rop_list); rw_exit(&rtptable.rtp_lk); sounlock(so); /* wait for all references to drop */ + smr_barrier(); refcnt_finalize(&rop->rop_refcnt, "rtsockrefs"); timeout_del_barrier(&rop->rop_timeout); @@ -501,7 +480,6 @@ route_input(struct mbuf *m0, struct sock struct rtpcb *rop; struct rt_msghdr *rtm; struct mbuf *m = m0; - struct srp_ref sr; /* ensure that we can access the rtm_type via mtod() */ if (m->m_len < offsetof(struct rt_msghdr, rtm_type) + 1) { @@ -509,7 +487,8 @@ route_input(struct mbuf *m0, struct sock return; } - SRPL_FOREACH(rop, &sr, &rtptable.rtp_list, rop_list) { + smr_read_enter(); + SMR_TAILQ_FOREACH(rop, &rtptable.rtp_list, rop_list) { /* * If route socket is bound to an address family only send * messages that match the address family. Address family @@ -519,7 +498,8 @@ route_input(struct mbuf *m0, struct sock rop->rop_proto != sa_family) continue; - + refcnt_take(&rop->rop_refcnt); + smr_read_leave(); so = rop->rop_socket; solock(so); @@ -579,8 +559,10 @@ route_input(struct mbuf *m0, struct sock rtm_sendup(so, m); next: sounlock(so); + smr_read_enter(); + refcnt_rele_wake(&rop->rop_refcnt); } - SRPL_LEAVE(&sr); + smr_read_leave(); m_freem(m); }