On Mon, May 15, 2017 at 11:49:18AM +0200, Martin Pieuchot wrote:
> Here's the diff that got reverted before release.  My plan is to enable
> it and then get rid of the remaining splsoftnet() to be able to fix the
> remaining XXXSMP.
> 
> ok?

This diff looks familiar.  Put it in so we can fix fallout.

OK bluhm@

> 
> Index: kern/uipc_socket.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_socket.c,v
> retrieving revision 1.182
> diff -u -p -r1.182 uipc_socket.c
> --- kern/uipc_socket.c        2 Apr 2017 23:40:08 -0000       1.182
> +++ kern/uipc_socket.c        15 May 2017 09:35:52 -0000
> @@ -1038,10 +1038,12 @@ sorflush(struct socket *so)
>  {
>       struct sockbuf *sb = &so->so_rcv;
>       struct protosw *pr = so->so_proto;
> +     sa_family_t af = pr->pr_domain->dom_family;
>       struct sockbuf asb;
>  
>       sb->sb_flags |= SB_NOINTR;
> -     sblock(sb, M_WAITOK, NULL);
> +     sblock(sb, M_WAITOK,
> +         (af != PF_LOCAL && af != PF_ROUTE) ? &netlock : NULL);
>       socantrcvmore(so);
>       sbunlock(sb);
>       asb = *sb;
> Index: kern/uipc_socket2.c
> ===================================================================
> RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
> retrieving revision 1.75
> diff -u -p -r1.75 uipc_socket2.c
> --- kern/uipc_socket2.c       17 Mar 2017 17:19:16 -0000      1.75
> +++ kern/uipc_socket2.c       15 May 2017 09:35:52 -0000
> @@ -299,7 +299,11 @@ soassertlocked(struct socket *so)
>  int
>  sosleep(struct socket *so, void *ident, int prio, const char *wmesg, int 
> timo)
>  {
> -     return tsleep(ident, prio, wmesg, timo);
> +     if ((so->so_proto->pr_domain->dom_family != PF_LOCAL) &&
> +         (so->so_proto->pr_domain->dom_family != PF_ROUTE)) {
> +             return rwsleep(ident, &netlock, prio, wmesg, timo);
> +     } else
> +             return tsleep(ident, prio, wmesg, timo);
>  }
>  
>  /*
> Index: net/if.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.495
> diff -u -p -r1.495 if.c
> --- net/if.c  9 May 2017 09:31:07 -0000       1.495
> +++ net/if.c  15 May 2017 09:35:52 -0000
> @@ -230,6 +230,12 @@ struct taskq     *softnettq;
>  struct task if_input_task_locked = TASK_INITIALIZER(if_netisr, NULL);
>  
>  /*
> + * Serialize socket operations to ensure no new sleeping points
> + * are introduced in IP output paths.
> + */
> +struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
> +
> +/*
>   * Network interface utility routines.
>   */
>  void
> @@ -1146,7 +1152,10 @@ if_clone_create(const char *name, int rd
>       if (ifunit(name) != NULL)
>               return (EEXIST);
>  
> +     /* XXXSMP breaks atomicity */
> +     rw_exit_write(&netlock);
>       ret = (*ifc->ifc_create)(ifc, unit);
> +     rw_enter_write(&netlock);
>  
>       if (ret != 0 || (ifp = ifunit(name)) == NULL)
>               return (ret);
> @@ -1188,7 +1197,10 @@ if_clone_destroy(const char *name)
>               splx(s);
>       }
>  
> +     /* XXXSMP breaks atomicity */
> +     rw_exit_write(&netlock);
>       ret = (*ifc->ifc_destroy)(ifp);
> +     rw_enter_write(&netlock);
>  
>       return (ret);
>  }
> Index: net/if_pflow.c
> ===================================================================
> RCS file: /cvs/src/sys/net/if_pflow.c,v
> retrieving revision 1.75
> diff -u -p -r1.75 if_pflow.c
> --- net/if_pflow.c    17 Mar 2017 17:19:16 -0000      1.75
> +++ net/if_pflow.c    15 May 2017 09:35:52 -0000
> @@ -463,9 +463,12 @@ pflowioctl(struct ifnet *ifp, u_long cmd
>                       sc->sc_gcounter=pflowstats.pflow_flows;
>                       /* send templates on startup */
>                       if (sc->sc_version == PFLOW_PROTO_10) {
> +                             /* XXXSMP breaks atomicity */
> +                             rw_exit_write(&netlock);
>                               s = splnet();
>                               pflow_sendout_ipfix_tmpl(sc);
>                               splx(s);
> +                             rw_enter_write(&netlock);
>                       }
>               } else
>                       ifp->if_flags &= ~IFF_RUNNING;
> @@ -505,6 +508,8 @@ pflowioctl(struct ifnet *ifp, u_long cmd
>                   sizeof(pflowr))))
>                       return (error);
>  
> +             /* XXXSMP breaks atomicity */
> +             rw_exit_write(&netlock);
>               s = splnet();
>               error = pflow_set(sc, &pflowr);
>               splx(s);
> @@ -522,6 +527,7 @@ pflowioctl(struct ifnet *ifp, u_long cmd
>               } else
>                       ifp->if_flags &= ~IFF_RUNNING;
>  
> +             rw_enter_write(&netlock);
>               break;
>  
>       default:
> Index: net/pf.c
> ===================================================================
> RCS file: /cvs/src/sys/net/pf.c,v
> retrieving revision 1.1021
> diff -u -p -r1.1021 pf.c
> --- net/pf.c  5 May 2017 16:30:39 -0000       1.1021
> +++ net/pf.c  15 May 2017 09:35:52 -0000
> @@ -1154,26 +1154,20 @@ pf_state_export(struct pfsync_state *sp,
>  /* END state table stuff */
>  
>  void
> -pf_purge_expired_rules(int locked)
> +pf_purge_expired_rules(void)
>  {
>       struct pf_rule  *r;
>  
> +     NET_ASSERT_LOCKED();
> +
>       if (SLIST_EMPTY(&pf_rule_gcl))
>               return;
>  
> -     if (!locked)
> -             rw_enter_write(&pf_consistency_lock);
> -     else
> -             rw_assert_wrlock(&pf_consistency_lock);
> -
>       while ((r = SLIST_FIRST(&pf_rule_gcl)) != NULL) {
>               SLIST_REMOVE(&pf_rule_gcl, r, pf_rule, gcle);
>               KASSERT(r->rule_flag & PFRULE_EXPIRED);
>               pf_purge_rule(r);
>       }
> -
> -     if (!locked)
> -             rw_exit_write(&pf_consistency_lock);
>  }
>  
>  void
> @@ -1194,7 +1188,7 @@ pf_purge_thread(void *v)
>               if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
>                       pf_purge_expired_fragments();
>                       pf_purge_expired_src_nodes(0);
> -                     pf_purge_expired_rules(0);
> +                     pf_purge_expired_rules();
>                       nloops = 0;
>               }
>  
> @@ -1241,27 +1235,20 @@ pf_state_expires(const struct pf_state *
>  }
>  
>  void
> -pf_purge_expired_src_nodes(int waslocked)
> +pf_purge_expired_src_nodes(void)
>  {
>       struct pf_src_node              *cur, *next;
> -     int                              locked = waslocked;
> +
> +     NET_ASSERT_LOCKED();
>  
>       for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
>       next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
>  
>               if (cur->states == 0 && cur->expire <= time_uptime) {
> -                     if (! locked) {
> -                             rw_enter_write(&pf_consistency_lock);
> -                             next = RB_NEXT(pf_src_tree,
> -                                 &tree_src_tracking, cur);
> -                             locked = 1;
> -                     }
> +                     next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
>                       pf_remove_src_node(cur);
>               }
>       }
> -
> -     if (locked && !waslocked)
> -             rw_exit_write(&pf_consistency_lock);
>  }
>  
>  void
> @@ -1306,7 +1293,10 @@ pf_remove_state(struct pf_state *cur)
>       RB_REMOVE(pf_state_tree_id, &tree_id, cur);
>  #if NPFLOW > 0
>       if (cur->state_flags & PFSTATE_PFLOW) {
> +             /* XXXSMP breaks atomicity */
> +             rw_exit_write(&netlock);
>               export_pflow(cur);
> +             rw_enter_write(&netlock);
>       }
>  #endif       /* NPFLOW > 0 */
>  #if NPFSYNC > 0
> @@ -1331,13 +1321,12 @@ pf_remove_divert_state(struct pf_state_k
>       }
>  }
>  
> -/* callers should hold the write_lock on pf_consistency_lock */
>  void
>  pf_free_state(struct pf_state *cur)
>  {
>       struct pf_rule_item *ri;
>  
> -     splsoftassert(IPL_SOFTNET);
> +     NET_ASSERT_LOCKED();
>  
>  #if NPFSYNC > 0
>       if (pfsync_state_in_use(cur))
> @@ -1372,7 +1361,8 @@ pf_purge_expired_states(u_int32_t maxche
>  {
>       static struct pf_state  *cur = NULL;
>       struct pf_state         *next;
> -     int                      locked = 0;
> +
> +     NET_ASSERT_LOCKED();
>  
>       while (maxcheck--) {
>               /* wrap to start of list when we hit the end */
> @@ -1387,25 +1377,14 @@ pf_purge_expired_states(u_int32_t maxche
>  
>               if (cur->timeout == PFTM_UNLINKED) {
>                       /* free removed state */
> -                     if (! locked) {
> -                             rw_enter_write(&pf_consistency_lock);
> -                             locked = 1;
> -                     }
>                       pf_free_state(cur);
>               } else if (pf_state_expires(cur) <= time_uptime) {
>                       /* remove and free expired state */
>                       pf_remove_state(cur);
> -                     if (! locked) {
> -                             rw_enter_write(&pf_consistency_lock);
> -                             locked = 1;
> -                     }
>                       pf_free_state(cur);
>               }
>               cur = next;
>       }
> -
> -     if (locked)
> -             rw_exit_write(&pf_consistency_lock);
>  }
>  
>  int
> Index: net/pf_ioctl.c
> ===================================================================
> RCS file: /cvs/src/sys/net/pf_ioctl.c,v
> retrieving revision 1.310
> diff -u -p -r1.310 pf_ioctl.c
> --- net/pf_ioctl.c    2 May 2017 12:27:37 -0000       1.310
> +++ net/pf_ioctl.c    15 May 2017 09:35:52 -0000
> @@ -110,7 +110,6 @@ void                       pf_qid2qname(u_int16_t, char 
> *);
>  void                  pf_qid_unref(u_int16_t);
>  
>  struct pf_rule                pf_default_rule, pf_default_rule_new;
> -struct rwlock                 pf_consistency_lock = 
> RWLOCK_INITIALIZER("pfcnslk");
>  
>  struct {
>       char            statusif[IFNAMSIZ];
> @@ -993,12 +992,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a
>                       return (EACCES);
>               }
>  
> -     if (flags & FWRITE)
> -             rw_enter_write(&pf_consistency_lock);
> -     else
> -             rw_enter_read(&pf_consistency_lock);
> -
> -     s = splsoftnet();
> +     NET_LOCK(s);
>       switch (cmd) {
>  
>       case DIOCSTART:
> @@ -2447,11 +2441,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t a
>               break;
>       }
>  fail:
> -     splx(s);
> -     if (flags & FWRITE)
> -             rw_exit_write(&pf_consistency_lock);
> -     else
> -             rw_exit_read(&pf_consistency_lock);
> +     NET_UNLOCK(s);
>       return (error);
>  }
>  
> Index: net/pf_norm.c
> ===================================================================
> RCS file: /cvs/src/sys/net/pf_norm.c,v
> retrieving revision 1.203
> diff -u -p -r1.203 pf_norm.c
> --- net/pf_norm.c     23 Apr 2017 11:37:11 -0000      1.203
> +++ net/pf_norm.c     15 May 2017 09:35:52 -0000
> @@ -176,6 +176,8 @@ pf_purge_expired_fragments(void)
>       struct pf_fragment      *frag;
>       int32_t                  expire;
>  
> +     NET_ASSERT_LOCKED();
> +
>       expire = time_uptime - pf_default_rule.timeout[PFTM_FRAG];
>       while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
>               if (frag->fr_timeout > expire)
> Index: net/pfvar.h
> ===================================================================
> RCS file: /cvs/src/sys/net/pfvar.h,v
> retrieving revision 1.451
> diff -u -p -r1.451 pfvar.h
> --- net/pfvar.h       2 May 2017 12:27:37 -0000       1.451
> +++ net/pfvar.h       15 May 2017 09:35:52 -0000
> @@ -1621,9 +1621,9 @@ extern void                      
> pf_tbladdr_remove(struct 
>  extern void                   pf_tbladdr_copyout(struct pf_addr_wrap *);
>  extern void                   pf_calc_skip_steps(struct pf_rulequeue *);
>  extern void                   pf_purge_thread(void *);
> -extern void                   pf_purge_expired_src_nodes(int);
> +extern void                   pf_purge_expired_src_nodes();
>  extern void                   pf_purge_expired_states(u_int32_t);
> -extern void                   pf_purge_expired_rules(int);
> +extern void                   pf_purge_expired_rules();
>  extern void                   pf_remove_state(struct pf_state *);
>  extern void                   pf_remove_divert_state(struct pf_state_key *);
>  extern void                   pf_free_state(struct pf_state *);
> @@ -1797,7 +1797,6 @@ int              pf_addr_compare(struct pf_addr *, 
>  
>  extern struct pf_status      pf_status;
>  extern struct pool   pf_frent_pl, pf_frag_pl;
> -extern struct rwlock pf_consistency_lock;
>  
>  struct pf_pool_limit {
>       void            *pp;
> Index: sys/systm.h
> ===================================================================
> RCS file: /cvs/src/sys/sys/systm.h,v
> retrieving revision 1.128
> diff -u -p -r1.128 systm.h
> --- sys/systm.h       30 Apr 2017 16:45:46 -0000      1.128
> +++ sys/systm.h       15 May 2017 09:35:52 -0000
> @@ -293,23 +293,31 @@ int     uiomove(void *, size_t, struct uio *
>  
>  #include <sys/rwlock.h>
>  
> +extern struct rwlock netlock;
> +
>  #define      NET_LOCK(s)                                                     
> \
>  do {                                                                 \
> +     rw_enter_write(&netlock);                                       \
>       s = splsoftnet();                                               \
>  } while (0)
>  
>  #define      NET_UNLOCK(s)                                                   
> \
>  do {                                                                 \
>       splx(s);                                                        \
> +     rw_exit_write(&netlock);                                        \
>  } while (0)
>  
>  #define      NET_ASSERT_LOCKED()                                             
> \
>  do {                                                                 \
> +     if (rw_status(&netlock) != RW_WRITE)                            \
> +             splassert_fail(RW_WRITE, rw_status(&netlock), __func__);\
>       splsoftassert(IPL_SOFTNET);                                     \
>  } while (0)
>  
>  #define      NET_ASSERT_UNLOCKED()                                           
> \
>  do {                                                                 \
> +     if (rw_status(&netlock) == RW_WRITE)                            \
> +             splassert_fail(0, rw_status(&netlock), __func__);       \
>  } while (0)
>  
>  __returns_twice int  setjmp(label_t *);
> Index: uvm/uvm_vnode.c
> ===================================================================
> RCS file: /cvs/src/sys/uvm/uvm_vnode.c,v
> retrieving revision 1.96
> diff -u -p -r1.96 uvm_vnode.c
> --- uvm/uvm_vnode.c   3 May 2017 02:43:15 -0000       1.96
> +++ uvm/uvm_vnode.c   15 May 2017 09:35:52 -0000
> @@ -1176,6 +1176,15 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t 
>               result = vn_lock(vn, LK_EXCLUSIVE | LK_RECURSEFAIL, curproc);
>  
>       if (result == 0) {
> +             int netlocked = (rw_status(&netlock) == RW_WRITE);
> +
> +             /*
> +              * This process may already have the NET_LOCK(), if we
> +              * faulted in copyin() or copyout() in the network stack.
> +              */
> +             if (netlocked)
> +                     rw_exit_write(&netlock);
> +
>               /* NOTE: vnode now locked! */
>               if (rw == UIO_READ)
>                       result = VOP_READ(vn, &uio, 0, curproc->p_ucred);
> @@ -1183,6 +1192,9 @@ uvn_io(struct uvm_vnode *uvn, vm_page_t 
>                       result = VOP_WRITE(vn, &uio,
>                           (flags & PGO_PDFREECLUST) ? IO_NOCACHE : 0,
>                           curproc->p_ucred);
> +
> +             if (netlocked)
> +                     rw_enter_write(&netlock);
>  
>               if ((uvn->u_flags & UVM_VNODE_VNISLOCKED) == 0)
>                       VOP_UNLOCK(vn, curproc);

Reply via email to