On Mon, Jul 13, 2020 at 12:52:15PM +0300, Vitaliy Makkoveev wrote:
> On Mon, Jul 13, 2020 at 09:53:44AM +0200, Martin Pieuchot wrote:
> > On 06/07/20(Mon) 15:44, Vitaliy Makkoveev wrote:
> > > > On 6 Jul 2020, at 12:17, Martin Pieuchot <[email protected]> wrote:
> > > > Assertions and documentation are more important than preventing races
> > > > because they allow to build awareness and elegant solutions instead of
> > > > hacking diffs until stuff work without knowing why.
> > > >
> > > > There are two cases where `ifp' are inserted into `ifnet':
> > > > 1. by autoconf during boot or hotplug
> > > > 2. by cloning ioctl
> > > >
> > > > In the second case it is always about pseudo-devices. So the assertion
> > > > should be conditional like:
> > > >
> > > > if (ISSET(ifp->if_xflags, IFXF_CLONED))
> > > > rw_assert_wrlock(&ifc_lock);
> > > >
> > > > In other words this fixes serializes insertions/removal on the global
> > > > list `ifnet', the KERNEL_LOCK() being still required for reading it.
> > > >
> > > > Is there any other data structure which ends up being protected by this
> > > > approach and could be documented?
> > >
> > > We should be sure there is no multiple `ifnet’s in `if_list’ with the same
> > > `if_xname’.
> >
> > That's a symptom of a bug. Checking for a symptom won't prevent another
> > type of corruption, maybe next time it will be a corrupted pointer?
>
> Absolutely no. You don't break the list do you understand this?
>
> >
> > > And the assertion you proposed looks not obvious here.
> >
> > Why, is it because of the if() check? That's required unless we change
> > put all if_attach() functions under the lock which would require changing
> > all driver in-tree. However since drivers for physical devices are being
> > attached without having multiple CPUs running there's no possible race.
>
> Because we should keep `if_list’ be linked by objects with unique
> `if_xname’. The modificatios of this list is not the problem. Problem is
> inconsistency caused by not unique `if_xname'. You are not fixing
> problem, just simptom.
>
> >
> > > Assertion like below looks more reasonable but introduces performance
> > > impact.
> >
> > We should first aim for correctness then performance. In this case,
> > performance is not even an issue because interfaces are not created
> > often compared to the rate of processing packets.
> >
>
> Well, so let's do "KASSERT(ifunit(ifp->if_xname) == NULL);" here.
Updated diff below.
Now we have assertion in if_attach{,head}().
Index: sys/net/if.c
===================================================================
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.612
diff -u -p -r1.612 if.c
--- sys/net/if.c 10 Jul 2020 13:23:34 -0000 1.612
+++ sys/net/if.c 13 Jul 2020 10:50:44 -0000
@@ -155,6 +155,8 @@ int if_getgrouplist(caddr_t);
void if_linkstate(struct ifnet *);
void if_linkstate_task(void *);
+int if_clone_lock(struct if_clone *);
+void if_clone_unlock(struct if_clone *);
int if_clone_list(struct if_clonereq *);
struct if_clone *if_clone_lookup(const char *, int *);
@@ -524,6 +526,7 @@ if_attachhead(struct ifnet *ifp)
{
if_attach_common(ifp);
NET_LOCK();
+ KASSERT(ifunit(ifp->if_xname) == NULL);
TAILQ_INSERT_HEAD(&ifnet, ifp, if_list);
if_attachsetup(ifp);
NET_UNLOCK();
@@ -534,6 +537,7 @@ if_attach(struct ifnet *ifp)
{
if_attach_common(ifp);
NET_LOCK();
+ KASSERT(ifunit(ifp->if_xname) == NULL);
TAILQ_INSERT_TAIL(&ifnet, ifp, if_list);
if_attachsetup(ifp);
NET_UNLOCK();
@@ -1244,27 +1248,35 @@ if_clone_create(const char *name, int rd
{
struct if_clone *ifc;
struct ifnet *ifp;
- int unit, ret;
+ int unit, error;
ifc = if_clone_lookup(name, &unit);
if (ifc == NULL)
return (EINVAL);
- if (ifunit(name) != NULL)
- return (EEXIST);
+ error = if_clone_lock(ifc);
+ if (error != 0)
+ return (error);
+
+ if (ifunit(name) != NULL) {
+ error = (EEXIST);
+ goto unlock;
+ }
- ret = (*ifc->ifc_create)(ifc, unit);
+ error = (*ifc->ifc_create)(ifc, unit);
- if (ret != 0 || (ifp = ifunit(name)) == NULL)
- return (ret);
+ if (error != 0 || (ifp = ifunit(name)) == NULL)
+ goto unlock;
NET_LOCK();
if_addgroup(ifp, ifc->ifc_name);
if (rdomain != 0)
if_setrdomain(ifp, rdomain);
NET_UNLOCK();
+unlock:
+ if_clone_unlock(ifc);
- return (ret);
+ return (error);
}
/*
@@ -1275,18 +1287,26 @@ if_clone_destroy(const char *name)
{
struct if_clone *ifc;
struct ifnet *ifp;
- int ret;
+ int error;
ifc = if_clone_lookup(name, NULL);
if (ifc == NULL)
return (EINVAL);
+ error = if_clone_lock(ifc);
+ if (error != 0)
+ return (error);
+
ifp = ifunit(name);
- if (ifp == NULL)
- return (ENXIO);
+ if (ifp == NULL) {
+ error = (ENXIO);
+ goto unlock;
+ }
- if (ifc->ifc_destroy == NULL)
- return (EOPNOTSUPP);
+ if (ifc->ifc_destroy == NULL) {
+ error = (EOPNOTSUPP);
+ goto unlock;
+ }
NET_LOCK();
if (ifp->if_flags & IFF_UP) {
@@ -1296,9 +1316,56 @@ if_clone_destroy(const char *name)
splx(s);
}
NET_UNLOCK();
- ret = (*ifc->ifc_destroy)(ifp);
+ error = (*ifc->ifc_destroy)(ifp);
+unlock:
+ if_clone_unlock(ifc);
+
+ return (error);
+}
+
+/*
+ * Lock a clone network interface.
+ */
+int
+if_clone_lock(struct if_clone *ifc)
+{
+ int error;
+
+ rw_enter_write(&ifc->ifc_lock);
+
+ while (ifc->ifc_flags & IFC_CREATE_LOCKED) {
+ ifc->ifc_flags |= IFC_CREATE_LOCKWAIT;
+ error = rwsleep_nsec(&ifc->ifc_flags, &ifc->ifc_lock,
+ PWAIT|PCATCH, "ifclk", INFSLP);
+ if(error != 0) {
+ ifc->ifc_flags &= ~IFC_CREATE_LOCKWAIT;
+ rw_exit_write(&ifc->ifc_lock);
+ return error;
+ }
+ }
+ ifc->ifc_flags |= IFC_CREATE_LOCKED;
+ ifc->ifc_flags &= ~IFC_CREATE_LOCKWAIT;
+
+ rw_exit_write(&ifc->ifc_lock);
+
+ return 0;
+}
+
+/*
+ * Unlock a clone network interface.
+ */
+void
+if_clone_unlock(struct if_clone *ifc)
+{
+ rw_enter_write(&ifc->ifc_lock);
+
+ ifc->ifc_flags &= ~IFC_CREATE_LOCKED;
+ if (ifc->ifc_flags & IFC_CREATE_LOCKWAIT) {
+ ifc->ifc_flags &= ~IFC_CREATE_LOCKWAIT;
+ wakeup(&ifc->ifc_flags);
+ }
- return (ret);
+ rw_exit_write(&ifc->ifc_lock);
}
/*