On Mon, Jul 13, 2020 at 09:39:38AM +0200, Martin Pieuchot wrote: > On 11/07/20(Sat) 23:51, Vitaliy Makkoveev wrote: > > [...] > > The way you suggest to go is to introduce rwlock and serialize > > pppacopen() and pppacclose(). This is bad idea because we will sleep > > while we are holding rwlock. > > That's the whole point of a rwlock to be able to sleep while holding the > lock. The goal is to prevent any other thread coming from userland to > enter any code path leading to the same data structure. > > This is the same as what the KERNEL_LOCK() was supposed to do assuming > there where no sleeping point in if_attach() and pipex_iface_init(). > > > Also this is bad idea because you should > > prevent access to `sc' which is being destroyed because you can grab it > > by concurrent thread. > > Which data structure is the other thread using to get a reference on `sc'? > > If the data structure is protected by the rwlock, like I suggest for > ifunit(), there's no problem. If it is protected by the KERNEL_LOCK() > then any sleeping point can lead to a race. That's why we're doing such > changes. > > In case of a data structure protected by the KERNEL_LOCK() the easiest > way to deal with sleeping point is to re-check when coming back to > sleep. This works well if the sleeping point is not deep into another > layer. > > Another way is to have a per-driver lock or serialization mechanism. > > > You must serialize *all* access to this `sc' > > elsewhere your "protection" is useless. > > The question is not access to `sc' is access to which global data > structure having a reference to this `sc'? If the data structure is > common to all the network stack, like ifunit()'s then we should look a > for solution for all the network stack with all the usages of the list. > That includes many driver's *open() functions, the cloning ioctls, etc. > > If the data structure is per-driver then the locking/serialization > mechanism is per-driver. > > > pppx(4) had no problems with unit protection. Also it had no problems > > to access incomplete `pxi'. Now pppx(4) has fixed access to `pxi' which > > is being destroyed. And this is the way to go in pppac(4) layer too. > > > > We have pppx_dev2pxd() to obtain `pxd'. While we adding extra check to > > pppx_dev2pxd() this is not system wide. Also pppac(4) already has > > `sc_dead' to prevent concurrent pppac_ioctl() access to dying `sc'. You > > suggest to serialize pppac_ioctl() too? > > The way `sc_dead' is used in other drivers is a way to prevent > per-driver ioctl(2) while a pseudo-device is being detached. It assumes > the NET_LOCK() is held around pseuo-device ioctl(2) so the flag is > protected (but not documented by the NET_LOCK(). If you want to do the > same go for it, but please do not add another meaning to the same mechanism. > Having drivers that work similarly reduces maintains effort. >
This time `sc_dead' is used only to prevent `IFF_UP' and `IFF_RUNNING' bits modification. ---- cut begin ---- 1372 pppac_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1373 { 1374 struct pppac_softc *sc = ifp->if_softc; 1375 /* struct ifreq *ifr = (struct ifreq *)data; */ 1376 int error = 0; 1377 1378 if (sc->sc_dead) 1379 return (ENXIO); 1380 1381 switch (cmd) { 1382 case SIOCSIFADDR: 1383 SET(ifp->if_flags, IFF_UP); /* XXX cry cry */ 1384 /* FALLTHROUGH */ 1385 case SIOCSIFFLAGS: 1386 if (ISSET(ifp->if_flags, IFF_UP)) 1387 SET(ifp->if_flags, IFF_RUNNING); 1388 else 1389 CLR(ifp->if_flags, IFF_RUNNING); 1390 break; 1391 case SIOCSIFMTU: 1392 break; ---- cut end ---- And the only reason for is the order in pppacclose(). Since we don't destroy `ifp' by if_detach(), we can do detachment as first action. ---- cut begin ---- 1345 pppacclose(dev_t dev, int flags, int mode, struct proc *p) 1346 { 1347 struct pppac_softc *sc = pppac_lookup(dev); 1348 struct ifnet *ifp = &sc->sc_if; 1349 int s; 1350 1351 NET_LOCK(); 1352 sc->sc_dead = 1; 1353 CLR(ifp->if_flags, IFF_RUNNING); 1354 NET_UNLOCK(); 1355 1356 s = splhigh(); 1357 klist_invalidate(&sc->sc_rsel.si_note); 1358 klist_invalidate(&sc->sc_wsel.si_note); 1359 splx(s); 1360 1361 pipex_iface_fini(&sc->sc_pipex_iface); 1362 1363 if_detach(ifp); 1364 1365 LIST_REMOVE(sc, sc_entry); 1366 free(sc, M_DEVBUF, sizeof(*sc)); 1367 1368 return (0); 1369 } ---- cut end ---- Just like below. But also we should prevent access to dying `sc' form concurrent thread here, so the resulting code will be like: ---- cut begin ---- pppacclose(dev_t dev, int flags, int mode, struct proc *p) { struct pppac_softc *sc; pppac_lookup(dev); int s; if ((sc = pppac_lookup(dev)) == NULL) return ENODEV; sc->sc_ready = 0; if_detach(&sc->sc_if); s = splhigh(); klist_invalidate(&sc->sc_rsel.si_note); klist_invalidate(&sc->sc_wsel.si_note); splx(s); pipex_iface_fini(&sc->sc_pipex_iface); LIST_REMOVE(sc, sc_entry); free(sc, M_DEVBUF, sizeof(*sc)); return (0); } ---- cut end ---- This prevents access to dying `sc' from pppac_ioctl(). Also this prevents access to dying `sc' from concurrent pppacclose() and other pppac(4) routines related to device node. There is no protection against killing `sc' used by concurrent thread, but I suggest to do this in next diff. Do you have any objections? Index: sys/net/if_pppx.c =================================================================== RCS file: /cvs/src/sys/net/if_pppx.c,v retrieving revision 1.95 diff -u -p -r1.95 if_pppx.c --- sys/net/if_pppx.c 10 Jul 2020 13:26:42 -0000 1.95 +++ sys/net/if_pppx.c 13 Jul 2020 12:01:31 -0000 @@ -990,7 +990,7 @@ RBT_GENERATE(pppx_ifs, pppx_if, pxi_entr struct pppac_softc { struct ifnet sc_if; - unsigned int sc_dead; + unsigned int sc_ready; dev_t sc_dev; LIST_ENTRY(pppac_softc) sc_entry; @@ -1043,8 +1043,11 @@ pppac_lookup(dev_t dev) struct pppac_softc *sc; LIST_FOREACH(sc, &pppac_devs, sc_entry) { - if (sc->sc_dev == dev) - return (sc); + if (sc->sc_dev == dev) { + if (sc->sc_ready != 0) + return (sc); + break; + } } return (NULL); @@ -1059,16 +1062,19 @@ pppacattach(int n) int pppacopen(dev_t dev, int flags, int mode, struct proc *p) { - struct pppac_softc *sc; + struct pppac_softc *sc, *sc_tmp; struct ifnet *ifp; - sc = pppac_lookup(dev); - if (sc != NULL) - return (EBUSY); - sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO); - sc->sc_dev = dev; + LIST_FOREACH(sc_tmp, &pppac_devs, sc_entry) { + if (sc_tmp->sc_dev == dev) { + free(sc, M_DEVBUF, sizeof(*sc)); + return (EBUSY); + } + } + + sc->sc_dev = dev; mtx_init(&sc->sc_rsel_mtx, IPL_SOFTNET); mtx_init(&sc->sc_wsel_mtx, IPL_SOFTNET); mq_init(&sc->sc_mq, IFQ_MAXLEN, IPL_SOFTNET); @@ -1100,6 +1106,7 @@ pppacopen(dev_t dev, int flags, int mode #endif pipex_iface_init(&sc->sc_pipex_iface, ifp); + sc->sc_ready = 1; return (0); } @@ -1107,12 +1114,16 @@ pppacopen(dev_t dev, int flags, int mode int pppacread(dev_t dev, struct uio *uio, int ioflag) { - struct pppac_softc *sc = pppac_lookup(dev); - struct ifnet *ifp = &sc->sc_if; + struct pppac_softc *sc; + struct ifnet *ifp; struct mbuf *m0, *m; int error = 0; size_t len; + if ((sc = pppac_lookup(dev)) == NULL) + return (EBADF); + + ifp = &sc->sc_if; if (!ISSET(ifp->if_flags, IFF_RUNNING)) return (EHOSTDOWN); @@ -1152,12 +1163,16 @@ pppacread(dev_t dev, struct uio *uio, in int pppacwrite(dev_t dev, struct uio *uio, int ioflag) { - struct pppac_softc *sc = pppac_lookup(dev); - struct ifnet *ifp = &sc->sc_if; + struct pppac_softc *sc; + struct ifnet *ifp; uint32_t proto; int error; struct mbuf *m; + if ((sc = pppac_lookup(dev)) == NULL) + return (EBADF); + + ifp = &sc->sc_if; if (!ISSET(ifp->if_flags, IFF_RUNNING)) return (EHOSTDOWN); @@ -1229,9 +1244,12 @@ pppacwrite(dev_t dev, struct uio *uio, i int pppacioctl(dev_t dev, u_long cmd, caddr_t data, int flags, struct proc *p) { - struct pppac_softc *sc = pppac_lookup(dev); + struct pppac_softc *sc; int error = 0; + if ((sc = pppac_lookup(dev)) == NULL) + return (EBADF); + switch (cmd) { case TUNSIFMODE: /* make npppd happy */ break; @@ -1253,9 +1271,12 @@ pppacioctl(dev_t dev, u_long cmd, caddr_ int pppacpoll(dev_t dev, int events, struct proc *p) { - struct pppac_softc *sc = pppac_lookup(dev); + struct pppac_softc *sc; int revents = 0; + if ((sc = pppac_lookup(dev)) == NULL) + return (POLLHUP); + if (events & (POLLIN | POLLRDNORM)) { if (!mq_empty(&sc->sc_mq)) revents |= events & (POLLIN | POLLRDNORM); @@ -1274,10 +1295,13 @@ pppacpoll(dev_t dev, int events, struct int pppackqfilter(dev_t dev, struct knote *kn) { - struct pppac_softc *sc = pppac_lookup(dev); + struct pppac_softc *sc; struct mutex *mtx; struct klist *klist; + if ((sc = pppac_lookup(dev)) == NULL) + return (EBADF); + switch (kn->kn_filter) { case EVFILT_READ: mtx = &sc->sc_rsel_mtx; @@ -1344,14 +1368,14 @@ filt_pppac_write(struct knote *kn, long int pppacclose(dev_t dev, int flags, int mode, struct proc *p) { - struct pppac_softc *sc = pppac_lookup(dev); - struct ifnet *ifp = &sc->sc_if; + struct pppac_softc *sc; int s; - NET_LOCK(); - sc->sc_dead = 1; - CLR(ifp->if_flags, IFF_RUNNING); - NET_UNLOCK(); + if ((sc = pppac_lookup(dev)) == NULL) + return (EBADF); + sc->sc_ready = 0; + + if_detach(&sc->sc_if); s = splhigh(); klist_invalidate(&sc->sc_rsel.si_note); @@ -1360,8 +1384,6 @@ pppacclose(dev_t dev, int flags, int mod pipex_iface_fini(&sc->sc_pipex_iface); - if_detach(ifp); - LIST_REMOVE(sc, sc_entry); free(sc, M_DEVBUF, sizeof(*sc)); @@ -1371,12 +1393,8 @@ pppacclose(dev_t dev, int flags, int mod static int pppac_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { - struct pppac_softc *sc = ifp->if_softc; /* struct ifreq *ifr = (struct ifreq *)data; */ int error = 0; - - if (sc->sc_dead) - return (ENXIO); switch (cmd) { case SIOCSIFADDR: