On Mon, Jul 13, 2020 at 12:52:15PM +0300, Vitaliy Makkoveev wrote: > On Mon, Jul 13, 2020 at 09:53:44AM +0200, Martin Pieuchot wrote: > > On 06/07/20(Mon) 15:44, Vitaliy Makkoveev wrote: > > > > On 6 Jul 2020, at 12:17, Martin Pieuchot <m...@openbsd.org> wrote: > > > > Assertions and documentation are more important than preventing races > > > > because they allow to build awareness and elegant solutions instead of > > > > hacking diffs until stuff work without knowing why. > > > > > > > > There are two cases where `ifp' are inserted into `ifnet': > > > > 1. by autoconf during boot or hotplug > > > > 2. by cloning ioctl > > > > > > > > In the second case it is always about pseudo-devices. So the assertion > > > > should be conditional like: > > > > > > > > if (ISSET(ifp->if_xflags, IFXF_CLONED)) > > > > rw_assert_wrlock(&ifc_lock); > > > > > > > > In other words this fixes serializes insertions/removal on the global > > > > list `ifnet', the KERNEL_LOCK() being still required for reading it. > > > > > > > > Is there any other data structure which ends up being protected by this > > > > approach and could be documented? > > > > > > We should be sure there is no multiple `ifnet’s in `if_list’ with the same > > > `if_xname’. > > > > That's a symptom of a bug. Checking for a symptom won't prevent another > > type of corruption, maybe next time it will be a corrupted pointer? > > Absolutely no. You don't break the list do you understand this? > > > > > > And the assertion you proposed looks not obvious here. > > > > Why, is it because of the if() check? That's required unless we change > > put all if_attach() functions under the lock which would require changing > > all driver in-tree. However since drivers for physical devices are being > > attached without having multiple CPUs running there's no possible race. > > Because we should keep `if_list’ be linked by objects with unique > `if_xname’. The modificatios of this list is not the problem. Problem is > inconsistency caused by not unique `if_xname'. You are not fixing > problem, just simptom. > > > > > > Assertion like below looks more reasonable but introduces performance > > > impact. > > > > We should first aim for correctness then performance. In this case, > > performance is not even an issue because interfaces are not created > > often compared to the rate of processing packets. > > > > Well, so let's do "KASSERT(ifunit(ifp->if_xname) == NULL);" here.
Updated diff below. Now we have assertion in if_attach{,head}(). Index: sys/net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.612 diff -u -p -r1.612 if.c --- sys/net/if.c 10 Jul 2020 13:23:34 -0000 1.612 +++ sys/net/if.c 13 Jul 2020 10:50:44 -0000 @@ -155,6 +155,8 @@ int if_getgrouplist(caddr_t); void if_linkstate(struct ifnet *); void if_linkstate_task(void *); +int if_clone_lock(struct if_clone *); +void if_clone_unlock(struct if_clone *); int if_clone_list(struct if_clonereq *); struct if_clone *if_clone_lookup(const char *, int *); @@ -524,6 +526,7 @@ if_attachhead(struct ifnet *ifp) { if_attach_common(ifp); NET_LOCK(); + KASSERT(ifunit(ifp->if_xname) == NULL); TAILQ_INSERT_HEAD(&ifnet, ifp, if_list); if_attachsetup(ifp); NET_UNLOCK(); @@ -534,6 +537,7 @@ if_attach(struct ifnet *ifp) { if_attach_common(ifp); NET_LOCK(); + KASSERT(ifunit(ifp->if_xname) == NULL); TAILQ_INSERT_TAIL(&ifnet, ifp, if_list); if_attachsetup(ifp); NET_UNLOCK(); @@ -1244,27 +1248,35 @@ if_clone_create(const char *name, int rd { struct if_clone *ifc; struct ifnet *ifp; - int unit, ret; + int unit, error; ifc = if_clone_lookup(name, &unit); if (ifc == NULL) return (EINVAL); - if (ifunit(name) != NULL) - return (EEXIST); + error = if_clone_lock(ifc); + if (error != 0) + return (error); + + if (ifunit(name) != NULL) { + error = (EEXIST); + goto unlock; + } - ret = (*ifc->ifc_create)(ifc, unit); + error = (*ifc->ifc_create)(ifc, unit); - if (ret != 0 || (ifp = ifunit(name)) == NULL) - return (ret); + if (error != 0 || (ifp = ifunit(name)) == NULL) + goto unlock; NET_LOCK(); if_addgroup(ifp, ifc->ifc_name); if (rdomain != 0) if_setrdomain(ifp, rdomain); NET_UNLOCK(); +unlock: + if_clone_unlock(ifc); - return (ret); + return (error); } /* @@ -1275,18 +1287,26 @@ if_clone_destroy(const char *name) { struct if_clone *ifc; struct ifnet *ifp; - int ret; + int error; ifc = if_clone_lookup(name, NULL); if (ifc == NULL) return (EINVAL); + error = if_clone_lock(ifc); + if (error != 0) + return (error); + ifp = ifunit(name); - if (ifp == NULL) - return (ENXIO); + if (ifp == NULL) { + error = (ENXIO); + goto unlock; + } - if (ifc->ifc_destroy == NULL) - return (EOPNOTSUPP); + if (ifc->ifc_destroy == NULL) { + error = (EOPNOTSUPP); + goto unlock; + } NET_LOCK(); if (ifp->if_flags & IFF_UP) { @@ -1296,9 +1316,56 @@ if_clone_destroy(const char *name) splx(s); } NET_UNLOCK(); - ret = (*ifc->ifc_destroy)(ifp); + error = (*ifc->ifc_destroy)(ifp); +unlock: + if_clone_unlock(ifc); + + return (error); +} + +/* + * Lock a clone network interface. + */ +int +if_clone_lock(struct if_clone *ifc) +{ + int error; + + rw_enter_write(&ifc->ifc_lock); + + while (ifc->ifc_flags & IFC_CREATE_LOCKED) { + ifc->ifc_flags |= IFC_CREATE_LOCKWAIT; + error = rwsleep_nsec(&ifc->ifc_flags, &ifc->ifc_lock, + PWAIT|PCATCH, "ifclk", INFSLP); + if(error != 0) { + ifc->ifc_flags &= ~IFC_CREATE_LOCKWAIT; + rw_exit_write(&ifc->ifc_lock); + return error; + } + } + ifc->ifc_flags |= IFC_CREATE_LOCKED; + ifc->ifc_flags &= ~IFC_CREATE_LOCKWAIT; + + rw_exit_write(&ifc->ifc_lock); + + return 0; +} + +/* + * Unlock a clone network interface. + */ +void +if_clone_unlock(struct if_clone *ifc) +{ + rw_enter_write(&ifc->ifc_lock); + + ifc->ifc_flags &= ~IFC_CREATE_LOCKED; + if (ifc->ifc_flags & IFC_CREATE_LOCKWAIT) { + ifc->ifc_flags &= ~IFC_CREATE_LOCKWAIT; + wakeup(&ifc->ifc_flags); + } - return (ret); + rw_exit_write(&ifc->ifc_lock); } /*