On Mon, Jul 13, 2020 at 09:39:38AM +0200, Martin Pieuchot wrote:
> On 11/07/20(Sat) 23:51, Vitaliy Makkoveev wrote:
> > [...]
> > The way you suggest to go is to introduce rwlock and serialize
> > pppacopen() and pppacclose(). This is bad idea because we will sleep
> > while we are holding rwlock.
>
> That's the whole point of a rwlock to be able to sleep while holding the
> lock. The goal is to prevent any other thread coming from userland to
> enter any code path leading to the same data structure.
>
> This is the same as what the KERNEL_LOCK() was supposed to do assuming
> there where no sleeping point in if_attach() and pipex_iface_init().
>
> > Also this is bad idea because you should
> > prevent access to `sc' which is being destroyed because you can grab it
> > by concurrent thread.
>
> Which data structure is the other thread using to get a reference on `sc'?
>
> If the data structure is protected by the rwlock, like I suggest for
> ifunit(), there's no problem. If it is protected by the KERNEL_LOCK()
> then any sleeping point can lead to a race. That's why we're doing such
> changes.
>
> In case of a data structure protected by the KERNEL_LOCK() the easiest
> way to deal with sleeping point is to re-check when coming back to
> sleep. This works well if the sleeping point is not deep into another
> layer.
>
> Another way is to have a per-driver lock or serialization mechanism.
>
> > You must serialize *all* access to this `sc'
> > elsewhere your "protection" is useless.
>
> The question is not access to `sc' is access to which global data
> structure having a reference to this `sc'? If the data structure is
> common to all the network stack, like ifunit()'s then we should look a
> for solution for all the network stack with all the usages of the list.
> That includes many driver's *open() functions, the cloning ioctls, etc.
>
> If the data structure is per-driver then the locking/serialization
> mechanism is per-driver.
>
> > pppx(4) had no problems with unit protection. Also it had no problems
> > to access incomplete `pxi'. Now pppx(4) has fixed access to `pxi' which
> > is being destroyed. And this is the way to go in pppac(4) layer too.
> >
> > We have pppx_dev2pxd() to obtain `pxd'. While we adding extra check to
> > pppx_dev2pxd() this is not system wide. Also pppac(4) already has
> > `sc_dead' to prevent concurrent pppac_ioctl() access to dying `sc'. You
> > suggest to serialize pppac_ioctl() too?
>
> The way `sc_dead' is used in other drivers is a way to prevent
> per-driver ioctl(2) while a pseudo-device is being detached. It assumes
> the NET_LOCK() is held around pseuo-device ioctl(2) so the flag is
> protected (but not documented by the NET_LOCK(). If you want to do the
> same go for it, but please do not add another meaning to the same mechanism.
> Having drivers that work similarly reduces maintains effort.
>
This time `sc_dead' is used only to prevent `IFF_UP' and `IFF_RUNNING'
bits modification.
---- cut begin ----
1372 pppac_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1373 {
1374 struct pppac_softc *sc = ifp->if_softc;
1375 /* struct ifreq *ifr = (struct ifreq *)data; */
1376 int error = 0;
1377
1378 if (sc->sc_dead)
1379 return (ENXIO);
1380
1381 switch (cmd) {
1382 case SIOCSIFADDR:
1383 SET(ifp->if_flags, IFF_UP); /* XXX cry cry */
1384 /* FALLTHROUGH */
1385 case SIOCSIFFLAGS:
1386 if (ISSET(ifp->if_flags, IFF_UP))
1387 SET(ifp->if_flags, IFF_RUNNING);
1388 else
1389 CLR(ifp->if_flags, IFF_RUNNING);
1390 break;
1391 case SIOCSIFMTU:
1392 break;
---- cut end ----
And the only reason for is the order in pppacclose(). Since we don't
destroy `ifp' by if_detach(), we can do detachment as first action.
---- cut begin ----
1345 pppacclose(dev_t dev, int flags, int mode, struct proc *p)
1346 {
1347 struct pppac_softc *sc = pppac_lookup(dev);
1348 struct ifnet *ifp = &sc->sc_if;
1349 int s;
1350
1351 NET_LOCK();
1352 sc->sc_dead = 1;
1353 CLR(ifp->if_flags, IFF_RUNNING);
1354 NET_UNLOCK();
1355
1356 s = splhigh();
1357 klist_invalidate(&sc->sc_rsel.si_note);
1358 klist_invalidate(&sc->sc_wsel.si_note);
1359 splx(s);
1360
1361 pipex_iface_fini(&sc->sc_pipex_iface);
1362
1363 if_detach(ifp);
1364
1365 LIST_REMOVE(sc, sc_entry);
1366 free(sc, M_DEVBUF, sizeof(*sc));
1367
1368 return (0);
1369 }
---- cut end ----
Just like below. But also we should prevent access to dying `sc' form
concurrent thread here, so the resulting code will be like:
---- cut begin ----
pppacclose(dev_t dev, int flags, int mode, struct proc *p)
{
struct pppac_softc *sc; pppac_lookup(dev);
int s;
if ((sc = pppac_lookup(dev)) == NULL)
return ENODEV;
sc->sc_ready = 0;
if_detach(&sc->sc_if);
s = splhigh();
klist_invalidate(&sc->sc_rsel.si_note);
klist_invalidate(&sc->sc_wsel.si_note);
splx(s);
pipex_iface_fini(&sc->sc_pipex_iface);
LIST_REMOVE(sc, sc_entry);
free(sc, M_DEVBUF, sizeof(*sc));
return (0);
}
---- cut end ----
This prevents access to dying `sc' from pppac_ioctl(). Also this
prevents access to dying `sc' from concurrent pppacclose() and other
pppac(4) routines related to device node. There is no protection against
killing `sc' used by concurrent thread, but I suggest to do this in next
diff.
Do you have any objections?
Index: sys/net/if_pppx.c
===================================================================
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.95
diff -u -p -r1.95 if_pppx.c
--- sys/net/if_pppx.c 10 Jul 2020 13:26:42 -0000 1.95
+++ sys/net/if_pppx.c 13 Jul 2020 12:01:31 -0000
@@ -990,7 +990,7 @@ RBT_GENERATE(pppx_ifs, pppx_if, pxi_entr
struct pppac_softc {
struct ifnet sc_if;
- unsigned int sc_dead;
+ unsigned int sc_ready;
dev_t sc_dev;
LIST_ENTRY(pppac_softc)
sc_entry;
@@ -1043,8 +1043,11 @@ pppac_lookup(dev_t dev)
struct pppac_softc *sc;
LIST_FOREACH(sc, &pppac_devs, sc_entry) {
- if (sc->sc_dev == dev)
- return (sc);
+ if (sc->sc_dev == dev) {
+ if (sc->sc_ready != 0)
+ return (sc);
+ break;
+ }
}
return (NULL);
@@ -1059,16 +1062,19 @@ pppacattach(int n)
int
pppacopen(dev_t dev, int flags, int mode, struct proc *p)
{
- struct pppac_softc *sc;
+ struct pppac_softc *sc, *sc_tmp;
struct ifnet *ifp;
- sc = pppac_lookup(dev);
- if (sc != NULL)
- return (EBUSY);
-
sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
- sc->sc_dev = dev;
+ LIST_FOREACH(sc_tmp, &pppac_devs, sc_entry) {
+ if (sc_tmp->sc_dev == dev) {
+ free(sc, M_DEVBUF, sizeof(*sc));
+ return (EBUSY);
+ }
+ }
+
+ sc->sc_dev = dev;
mtx_init(&sc->sc_rsel_mtx, IPL_SOFTNET);
mtx_init(&sc->sc_wsel_mtx, IPL_SOFTNET);
mq_init(&sc->sc_mq, IFQ_MAXLEN, IPL_SOFTNET);
@@ -1100,6 +1106,7 @@ pppacopen(dev_t dev, int flags, int mode
#endif
pipex_iface_init(&sc->sc_pipex_iface, ifp);
+ sc->sc_ready = 1;
return (0);
}
@@ -1107,12 +1114,16 @@ pppacopen(dev_t dev, int flags, int mode
int
pppacread(dev_t dev, struct uio *uio, int ioflag)
{
- struct pppac_softc *sc = pppac_lookup(dev);
- struct ifnet *ifp = &sc->sc_if;
+ struct pppac_softc *sc;
+ struct ifnet *ifp;
struct mbuf *m0, *m;
int error = 0;
size_t len;
+ if ((sc = pppac_lookup(dev)) == NULL)
+ return (EBADF);
+
+ ifp = &sc->sc_if;
if (!ISSET(ifp->if_flags, IFF_RUNNING))
return (EHOSTDOWN);
@@ -1152,12 +1163,16 @@ pppacread(dev_t dev, struct uio *uio, in
int
pppacwrite(dev_t dev, struct uio *uio, int ioflag)
{
- struct pppac_softc *sc = pppac_lookup(dev);
- struct ifnet *ifp = &sc->sc_if;
+ struct pppac_softc *sc;
+ struct ifnet *ifp;
uint32_t proto;
int error;
struct mbuf *m;
+ if ((sc = pppac_lookup(dev)) == NULL)
+ return (EBADF);
+
+ ifp = &sc->sc_if;
if (!ISSET(ifp->if_flags, IFF_RUNNING))
return (EHOSTDOWN);
@@ -1229,9 +1244,12 @@ pppacwrite(dev_t dev, struct uio *uio, i
int
pppacioctl(dev_t dev, u_long cmd, caddr_t data, int flags, struct proc *p)
{
- struct pppac_softc *sc = pppac_lookup(dev);
+ struct pppac_softc *sc;
int error = 0;
+ if ((sc = pppac_lookup(dev)) == NULL)
+ return (EBADF);
+
switch (cmd) {
case TUNSIFMODE: /* make npppd happy */
break;
@@ -1253,9 +1271,12 @@ pppacioctl(dev_t dev, u_long cmd, caddr_
int
pppacpoll(dev_t dev, int events, struct proc *p)
{
- struct pppac_softc *sc = pppac_lookup(dev);
+ struct pppac_softc *sc;
int revents = 0;
+ if ((sc = pppac_lookup(dev)) == NULL)
+ return (POLLHUP);
+
if (events & (POLLIN | POLLRDNORM)) {
if (!mq_empty(&sc->sc_mq))
revents |= events & (POLLIN | POLLRDNORM);
@@ -1274,10 +1295,13 @@ pppacpoll(dev_t dev, int events, struct
int
pppackqfilter(dev_t dev, struct knote *kn)
{
- struct pppac_softc *sc = pppac_lookup(dev);
+ struct pppac_softc *sc;
struct mutex *mtx;
struct klist *klist;
+ if ((sc = pppac_lookup(dev)) == NULL)
+ return (EBADF);
+
switch (kn->kn_filter) {
case EVFILT_READ:
mtx = &sc->sc_rsel_mtx;
@@ -1344,14 +1368,14 @@ filt_pppac_write(struct knote *kn, long
int
pppacclose(dev_t dev, int flags, int mode, struct proc *p)
{
- struct pppac_softc *sc = pppac_lookup(dev);
- struct ifnet *ifp = &sc->sc_if;
+ struct pppac_softc *sc;
int s;
- NET_LOCK();
- sc->sc_dead = 1;
- CLR(ifp->if_flags, IFF_RUNNING);
- NET_UNLOCK();
+ if ((sc = pppac_lookup(dev)) == NULL)
+ return (EBADF);
+ sc->sc_ready = 0;
+
+ if_detach(&sc->sc_if);
s = splhigh();
klist_invalidate(&sc->sc_rsel.si_note);
@@ -1360,8 +1384,6 @@ pppacclose(dev_t dev, int flags, int mod
pipex_iface_fini(&sc->sc_pipex_iface);
- if_detach(ifp);
-
LIST_REMOVE(sc, sc_entry);
free(sc, M_DEVBUF, sizeof(*sc));
@@ -1371,12 +1393,8 @@ pppacclose(dev_t dev, int flags, int mod
static int
pppac_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
{
- struct pppac_softc *sc = ifp->if_softc;
/* struct ifreq *ifr = (struct ifreq *)data; */
int error = 0;
-
- if (sc->sc_dead)
- return (ENXIO);
switch (cmd) {
case SIOCSIFADDR: