On Mon, May 15, 2017 at 11:49:18AM +0200, Martin Pieuchot wrote: > Here's the diff that got reverted before release. My plan is to enable > it and then get rid of the remaining splsoftnet() to be able to fix the > remaining XXXSMP. > > ok?
This diff looks familiar. Put it in so we can fix fallout. OK bluhm@ > > Index: kern/uipc_socket.c > =================================================================== > RCS file: /cvs/src/sys/kern/uipc_socket.c,v > retrieving revision 1.182 > diff -u -p -r1.182 uipc_socket.c > --- kern/uipc_socket.c 2 Apr 2017 23:40:08 -0000 1.182 > +++ kern/uipc_socket.c 15 May 2017 09:35:52 -0000 > @@ -1038,10 +1038,12 @@ sorflush(struct socket *so) > { > struct sockbuf *sb = &so->so_rcv; > struct protosw *pr = so->so_proto; > + sa_family_t af = pr->pr_domain->dom_family; > struct sockbuf asb; > > sb->sb_flags |= SB_NOINTR; > - sblock(sb, M_WAITOK, NULL); > + sblock(sb, M_WAITOK, > + (af != PF_LOCAL && af != PF_ROUTE) ? &netlock : NULL); > socantrcvmore(so); > sbunlock(sb); > asb = *sb; > Index: kern/uipc_socket2.c > =================================================================== > RCS file: /cvs/src/sys/kern/uipc_socket2.c,v > retrieving revision 1.75 > diff -u -p -r1.75 uipc_socket2.c > --- kern/uipc_socket2.c 17 Mar 2017 17:19:16 -0000 1.75 > +++ kern/uipc_socket2.c 15 May 2017 09:35:52 -0000 > @@ -299,7 +299,11 @@ soassertlocked(struct socket *so) > int > sosleep(struct socket *so, void *ident, int prio, const char *wmesg, int > timo) > { > - return tsleep(ident, prio, wmesg, timo); > + if ((so->so_proto->pr_domain->dom_family != PF_LOCAL) && > + (so->so_proto->pr_domain->dom_family != PF_ROUTE)) { > + return rwsleep(ident, &netlock, prio, wmesg, timo); > + } else > + return tsleep(ident, prio, wmesg, timo); > } > > /* > Index: net/if.c > =================================================================== > RCS file: /cvs/src/sys/net/if.c,v > retrieving revision 1.495 > diff -u -p -r1.495 if.c > --- net/if.c 9 May 2017 09:31:07 -0000 1.495 > +++ net/if.c 15 May 2017 09:35:52 -0000 > @@ -230,6 +230,12 @@ struct taskq *softnettq; > struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL); > > /* > + * Serialize socket operations to ensure no new sleeping points > + * are introduced in IP output paths. > + */ > +struct rwlock netlock = RWLOCK_INITIALIZER("netlock"); > + > +/* > * Network interface utility routines. > */ > void > @@ -1146,7 +1152,10 @@ if_clone_create(const char *name, int rd > if (ifunit(name) != NULL) > return (EEXIST); > > + /* XXXSMP breaks atomicity */ > + rw_exit_write(&netlock); > ret = (*ifc->ifc_create)(ifc, unit); > + rw_enter_write(&netlock); > > if (ret != 0 || (ifp = ifunit(name)) == NULL) > return (ret); > @@ -1188,7 +1197,10 @@ if_clone_destroy(const char *name) > splx(s); > } > > + /* XXXSMP breaks atomicity */ > + rw_exit_write(&netlock); > ret = (*ifc->ifc_destroy)(ifp); > + rw_enter_write(&netlock); > > return (ret); > } > Index: net/if_pflow.c > =================================================================== > RCS file: /cvs/src/sys/net/if_pflow.c,v > retrieving revision 1.75 > diff -u -p -r1.75 if_pflow.c > --- net/if_pflow.c 17 Mar 2017 17:19:16 -0000 1.75 > +++ net/if_pflow.c 15 May 2017 09:35:52 -0000 > @@ -463,9 +463,12 @@ pflowioctl(struct ifnet *ifp, u_long cmd > sc->sc_gcounter=pflowstats.pflow_flows; > /* send templates on startup */ > if (sc->sc_version == PFLOW_PROTO_10) { > + /* XXXSMP breaks atomicity */ > + rw_exit_write(&netlock); > s = splnet(); > pflow_sendout_ipfix_tmpl(sc); > splx(s); > + rw_enter_write(&netlock); > } > } else > ifp->if_flags &= ~IFF_RUNNING; > @@ -505,6 +508,8 @@ pflowioctl(struct ifnet *ifp, u_long cmd > sizeof(pflowr)))) > return (error); > > + /* XXXSMP breaks atomicity */ > + rw_exit_write(&netlock); > s = splnet(); > error = pflow_set(sc, &pflowr); > splx(s); > @@ -522,6 +527,7 @@ pflowioctl(struct ifnet *ifp, u_long cmd > } else > ifp->if_flags &= ~IFF_RUNNING; > > + rw_enter_write(&netlock); > break; > > default: > Index: net/pf.c > =================================================================== > RCS file: /cvs/src/sys/net/pf.c,v > retrieving revision 1.1021 > diff -u -p -r1.1021 pf.c > --- net/pf.c 5 May 2017 16:30:39 -0000 1.1021 > +++ net/pf.c 15 May 2017 09:35:52 -0000 > @@ -1154,26 +1154,20 @@ pf_state_export(struct pfsync_state *sp, > /* END state table stuff */ > > void > -pf_purge_expired_rules(int locked) > +pf_purge_expired_rules(void) > { > struct pf_rule *r; > > + NET_ASSERT_LOCKED(); > + > if (SLIST_EMPTY(&pf_rule_gcl)) > return; > > - if (!locked) > - rw_enter_write(&pf_consistency_lock); > - else > - rw_assert_wrlock(&pf_consistency_lock); > - > while ((r = SLIST_FIRST(&pf_rule_gcl)) != NULL) { > SLIST_REMOVE(&pf_rule_gcl, r, pf_rule, gcle); > KASSERT(r->rule_flag & PFRULE_EXPIRED); > pf_purge_rule(r); > } > - > - if (!locked) > - rw_exit_write(&pf_consistency_lock); > } > > void > @@ -1194,7 +1188,7 @@ pf_purge_thread(void *v) > if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { > pf_purge_expired_fragments(); > pf_purge_expired_src_nodes(0); > - pf_purge_expired_rules(0); > + pf_purge_expired_rules(); > nloops = 0; > } > > @@ -1241,27 +1235,20 @@ pf_state_expires(const struct pf_state * > } > > void > -pf_purge_expired_src_nodes(int waslocked) > +pf_purge_expired_src_nodes(void) > { > struct pf_src_node *cur, *next; > - int locked = waslocked; > + > + NET_ASSERT_LOCKED(); > > for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { > next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); > > if (cur->states == 0 && cur->expire <= time_uptime) { > - if (! locked) { > - rw_enter_write(&pf_consistency_lock); > - next = RB_NEXT(pf_src_tree, > - &tree_src_tracking, cur); > - locked = 1; > - } > + next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); > pf_remove_src_node(cur); > } > } > - > - if (locked && !waslocked) > - rw_exit_write(&pf_consistency_lock); > } > > void > @@ -1306,7 +1293,10 @@ pf_remove_state(struct pf_state *cur) > RB_REMOVE(pf_state_tree_id, &tree_id, cur); > #if NPFLOW > 0 > if (cur->state_flags & PFSTATE_PFLOW) { > + /* XXXSMP breaks atomicity */ > + rw_exit_write(&netlock); > export_pflow(cur); > + rw_enter_write(&netlock); > } > #endif /* NPFLOW > 0 */ > #if NPFSYNC > 0 > @@ -1331,13 +1321,12 @@ pf_remove_divert_state(struct pf_state_k > } > } > > -/* callers should hold the write_lock on pf_consistency_lock */ > void > pf_free_state(struct pf_state *cur) > { > struct pf_rule_item *ri; > > - splsoftassert(IPL_SOFTNET); > + NET_ASSERT_LOCKED(); > > #if NPFSYNC > 0 > if (pfsync_state_in_use(cur)) > @@ -1372,7 +1361,8 @@ pf_purge_expired_states(u_int32_t maxche > { > static struct pf_state *cur = NULL; > struct pf_state *next; > - int locked = 0; > + > + NET_ASSERT_LOCKED(); > > while (maxcheck--) { > /* wrap to start of list when we hit the end */ > @@ -1387,25 +1377,14 @@ pf_purge_expired_states(u_int32_t maxche > > if (cur->timeout == PFTM_UNLINKED) { > /* free removed state */ > - if (! locked) { > - rw_enter_write(&pf_consistency_lock); > - locked = 1; > - } > pf_free_state(cur); > } else if (pf_state_expires(cur) <= time_uptime) { > /* remove and free expired state */ > pf_remove_state(cur); > - if (! locked) { > - rw_enter_write(&pf_consistency_lock); > - locked = 1; > - } > pf_free_state(cur); > } > cur = next; > } > - > - if (locked) > - rw_exit_write(&pf_consistency_lock); > } > > int > Index: net/pf_ioctl.c > =================================================================== > RCS file: /cvs/src/sys/net/pf_ioctl.c,v > retrieving revision 1.310 > diff -u -p -r1.310 pf_ioctl.c > --- net/pf_ioctl.c 2 May 2017 12:27:37 -0000 1.310 > +++ net/pf_ioctl.c 15 May 2017 09:35:52 -0000 > @@ -110,7 +110,6 @@ void pf_qid2qname(u_int16_t, char > *); > void pf_qid_unref(u_int16_t); > > struct pf_rule pf_default_rule, pf_default_rule_new; > -struct rwlock pf_consistency_lock = > RWLOCK_INITIALIZER("pfcnslk"); > > struct { > char statusif[IFNAMSIZ]; > @@ -993,12 +992,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a > return (EACCES); > } > > - if (flags & FWRITE) > - rw_enter_write(&pf_consistency_lock); > - else > - rw_enter_read(&pf_consistency_lock); > - > - s = splsoftnet(); > + NET_LOCK(s); > switch (cmd) { > > case DIOCSTART: > @@ -2447,11 +2441,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a > break; > } > fail: > - splx(s); > - if (flags & FWRITE) > - rw_exit_write(&pf_consistency_lock); > - else > - rw_exit_read(&pf_consistency_lock); > + NET_UNLOCK(s); > return (error); > } > > Index: net/pf_norm.c > =================================================================== > RCS file: /cvs/src/sys/net/pf_norm.c,v > retrieving revision 1.203 > diff -u -p -r1.203 pf_norm.c > --- net/pf_norm.c 23 Apr 2017 11:37:11 -0000 1.203 > +++ net/pf_norm.c 15 May 2017 09:35:52 -0000 > @@ -176,6 +176,8 @@ pf_purge_expired_fragments(void) > struct pf_fragment *frag; > int32_t expire; > > + NET_ASSERT_LOCKED(); > + > expire = time_uptime - pf_default_rule.timeout[PFTM_FRAG]; > while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { > if (frag->fr_timeout > expire) > Index: net/pfvar.h > =================================================================== > RCS file: /cvs/src/sys/net/pfvar.h,v > retrieving revision 1.451 > diff -u -p -r1.451 pfvar.h > --- net/pfvar.h 2 May 2017 12:27:37 -0000 1.451 > +++ net/pfvar.h 15 May 2017 09:35:52 -0000 > @@ -1621,9 +1621,9 @@ extern void > pf_tbladdr_remove(struct > extern void pf_tbladdr_copyout(struct pf_addr_wrap *); > extern void pf_calc_skip_steps(struct pf_rulequeue *); > extern void pf_purge_thread(void *); > -extern void pf_purge_expired_src_nodes(int); > +extern void pf_purge_expired_src_nodes(); > extern void pf_purge_expired_states(u_int32_t); > -extern void pf_purge_expired_rules(int); > +extern void pf_purge_expired_rules(); > extern void pf_remove_state(struct pf_state *); > extern void pf_remove_divert_state(struct pf_state_key *); > extern void pf_free_state(struct pf_state *); > @@ -1797,7 +1797,6 @@ int pf_addr_compare(struct pf_addr *, > > extern struct pf_status pf_status; > extern struct pool pf_frent_pl, pf_frag_pl; > -extern struct rwlock pf_consistency_lock; > > struct pf_pool_limit { > void *pp; > Index: sys/systm.h > =================================================================== > RCS file: /cvs/src/sys/sys/systm.h,v > retrieving revision 1.128 > diff -u -p -r1.128 systm.h > --- sys/systm.h 30 Apr 2017 16:45:46 -0000 1.128 > +++ sys/systm.h 15 May 2017 09:35:52 -0000 > @@ -293,23 +293,31 @@ int uiomove(void *, size_t, struct uio * > > #include <sys/rwlock.h> > > +extern struct rwlock netlock; > + > #define NET_LOCK(s) > \ > do { \ > + rw_enter_write(&netlock); \ > s = splsoftnet(); \ > } while (0) > > #define NET_UNLOCK(s) > \ > do { \ > splx(s); \ > + rw_exit_write(&netlock); \ > } while (0) > > #define NET_ASSERT_LOCKED() > \ > do { \ > + if (rw_status(&netlock) != RW_WRITE) \ > + splassert_fail(RW_WRITE, rw_status(&netlock), __func__);\ > splsoftassert(IPL_SOFTNET); \ > } while (0) > > #define NET_ASSERT_UNLOCKED() > \ > do { \ > + if (rw_status(&netlock) == RW_WRITE) \ > + splassert_fail(0, rw_status(&netlock), __func__); \ > } while (0) > > __returns_twice int setjmp(label_t *); > Index: uvm/uvm_vnode.c > =================================================================== > RCS file: /cvs/src/sys/uvm/uvm_vnode.c,v > retrieving revision 1.96 > diff -u -p -r1.96 uvm_vnode.c > --- uvm/uvm_vnode.c 3 May 2017 02:43:15 -0000 1.96 > +++ uvm/uvm_vnode.c 15 May 2017 09:35:52 -0000 > @@ -1176,6 +1176,15 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t > result = vn_lock(vn, LK_EXCLUSIVE | LK_RECURSEFAIL, curproc); > > if (result == 0) { > + int netlocked = (rw_status(&netlock) == RW_WRITE); > + > + /* > + * This process may already have the NET_LOCK(), if we > + * faulted in copyin() or copyout() in the network stack. > + */ > + if (netlocked) > + rw_exit_write(&netlock); > + > /* NOTE: vnode now locked! */ > if (rw == UIO_READ) > result = VOP_READ(vn, &uio, 0, curproc->p_ucred); > @@ -1183,6 +1192,9 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t > result = VOP_WRITE(vn, &uio, > (flags & PGO_PDFREECLUST) ? IO_NOCACHE : 0, > curproc->p_ucred); > + > + if (netlocked) > + rw_enter_write(&netlock); > > if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0) > VOP_UNLOCK(vn, curproc);