if_clone_create() has the races caused by context switch. ---- cut begin ---- if_clone_create(const char *name, int rdomain) { struct if_clone *ifc; struct ifnet *ifp; int unit, ret;
ifc = if_clone_lookup(name, &unit); /* [1] */ if (ifc == NULL) return (EINVAL); if (ifunit(name) != NULL) /* [2] */ return (EEXIST); ret = (*ifc->ifc_create)(ifc, unit); /* [3] */ if (ret != 0 || (ifp = ifunit(name)) == NULL) /* [4] */ return (ret); NET_LOCK(); if_addgroup(ifp, ifc->ifc_name); if (rdomain != 0) if_setrdomain(ifp, rdomain); NET_UNLOCK(); return (ret); } ---- cut end ---- The race is: Thread 1: 1. We pass the string "ifacename0" with contains interface name and it's unit to if_clone_lookup(). if_clone_lookup() extracts the unit '0' and return it to us as integer `unit'. this `unit' is our local variable, nobody knows what we are going to create interface with this unit. 2. There is no "ifacename0" yet, so ifunit() will return NULL. 3. We did some checks add call `ifc_create' which will call pseudo-driver's callback to create instance with passed `unit'. We initialize ifnet with our softwere context and the `if_xname' set to "ifacename0". 3.1. We do if_attach() which call NET_LOCK() before we link `ifnet' to `if_list'. Also if_attach() doesn't check anything so we always attach passed `ifnet'. Context was switched to Thread 2: 1. We also passed string "ifacename0" to if_clone_lookup() and received `unit' with value `0' 2. There is no "ifacename0" yet, so ifunit() will return NULL. 3. All checks done, we call `ifc_create'. We initialize out softwere context with passed `unit', Who knows do we chech is software context for `unit' already exists before? In fact we don't :) 3.1 We initialized `ifnet' with the `if_xname' set to "ifacename0" and we link it `if_list'. 4. This check is passed, `ifc_create' returned `0' and we have "ifacename0" linked so ifunit() will return `ifp'. We returned to userland with success and switched to Thread 1: We continue 3.1. if_attach() continue his work and we have another `ifnet' with `if_xname' set to "ifacename0" in list. Now we have inconsistent `if_list'. 4. We do these checks. ifunit() will return pointer to `ifp' with requested "ifacename0". All ok we return to userland with success. Diff below fixes this reces. Each `struct if_clone' has the bitmap where requested `unit' marked as busy if it was not allocated before. We have new function if_clone_alloc_unit() for. If `unit' already was allocated if_clone_alloc_unit() will return EEXIST. Also we have if_clone_rele_unit() to release `unit'. We do unit allocation before we do context switch so now we can't allocate the same unit twice or more. Index: net/if.c =================================================================== RCS file: /cvs/src/sys/net/if.c,v retrieving revision 1.610 diff -u -p -r1.610 if.c --- net/if.c 22 Jun 2020 09:45:13 -0000 1.610 +++ net/if.c 26 Jun 2020 13:49:08 -0000 @@ -157,6 +157,8 @@ void if_linkstate_task(void *); int if_clone_list(struct if_clonereq *); struct if_clone *if_clone_lookup(const char *, int *); +int if_clone_alloc_unit(struct if_clone *, int); +void if_clone_rele_unit(struct if_clone *, int); int if_group_egress_build(void); @@ -1244,19 +1246,23 @@ if_clone_create(const char *name, int rd { struct if_clone *ifc; struct ifnet *ifp; - int unit, ret; + int unit, error; ifc = if_clone_lookup(name, &unit); if (ifc == NULL) return (EINVAL); + error = if_clone_alloc_unit(ifc, unit); + if (error != 0) + return (error); - if (ifunit(name) != NULL) - return (EEXIST); - - ret = (*ifc->ifc_create)(ifc, unit); + if (ifunit(name) != NULL) { + error = (EEXIST); + goto rele; + } - if (ret != 0 || (ifp = ifunit(name)) == NULL) - return (ret); + error = (*ifc->ifc_create)(ifc, unit); + if (error != 0 || (ifp = ifunit(name)) == NULL) + return (0); NET_LOCK(); if_addgroup(ifp, ifc->ifc_name); @@ -1264,7 +1270,10 @@ if_clone_create(const char *name, int rd if_setrdomain(ifp, rdomain); NET_UNLOCK(); - return (ret); + return (0); +rele: + if_clone_rele_unit(ifc, unit); + return (error); } /* @@ -1275,9 +1284,9 @@ if_clone_destroy(const char *name) { struct if_clone *ifc; struct ifnet *ifp; - int ret; + int ret, unit; - ifc = if_clone_lookup(name, NULL); + ifc = if_clone_lookup(name, &unit); if (ifc == NULL) return (EINVAL); @@ -1297,6 +1306,7 @@ if_clone_destroy(const char *name) } NET_UNLOCK(); ret = (*ifc->ifc_destroy)(ifp); + if_clone_rele_unit(ifc, unit); return (ret); } @@ -1342,12 +1352,96 @@ if_clone_lookup(const char *name, int *u unit = (unit * 10) + (*cp++ - '0'); } - if (unitp != NULL) - *unitp = unit; + *unitp = unit; return (ifc); } /* + * Allocate unit for cloned network interface. + */ +int if_clone_alloc_unit(struct if_clone *ifc, int unit) +{ + int word, bit, ret; + + word = unit / (sizeof(*ifc->ifc_map) * 8); + bit = unit % (sizeof(*ifc->ifc_map) * 8); + + rw_enter_write(&ifc->ifc_lock); + + if(word >= ifc->ifc_map_size) { + u_long *map; + int size; + + size = word + 1; + map = mallocarray(size, sizeof(*map), M_TEMP, M_WAITOK | + M_ZERO); + + if (ifc->ifc_map != NULL) { + memcpy(map, ifc->ifc_map, ifc->ifc_map_size); + free(ifc->ifc_map, M_TEMP, + ifc->ifc_map_size * sizeof(*map)); + } + + ifc->ifc_map = map; + ifc->ifc_map_size = size; + } + + if (ifc->ifc_map[word] & (1UL << bit)) + ret = EEXIST; + else { + ifc->ifc_map[word] |= (1UL << bit); + ret = 0; + } + + rw_exit_write(&ifc->ifc_lock); + + return ret; +} + +/* + * Release allocated unit for cloned network interface. + */ +void if_clone_rele_unit(struct if_clone *ifc, int unit) +{ + int word, bit; + + word = unit / (sizeof(*ifc->ifc_map) * 8); + bit = unit % (sizeof(*ifc->ifc_map) * 8); + + rw_enter_write(&ifc->ifc_lock); + KASSERT(word < ifc->ifc_map_size); + + ifc->ifc_map[word] &= ~(1UL << bit); + + if (ifc->ifc_map[word] == 0) { + u_long *map; + int size; + + size = ifc->ifc_map_size - 2; + while (size>=0) { + if (ifc->ifc_map[size] != 0) + break; + --size; + } + if (size<0) { + size = 0; + map = NULL; + } else { + size += 1; + map = mallocarray(size, sizeof(*map), M_TEMP, + M_WAITOK); + memcpy(map, ifc->ifc_map, size); + free(ifc->ifc_map, M_TEMP, + ifc->ifc_map_size * sizeof(*map)); + } + + ifc->ifc_map = map; + ifc->ifc_map_size = size; + } + rw_exit_write(&ifc->ifc_lock); +} + +/* * Register a network interface cloner. */ void @@ -1360,6 +1454,7 @@ if_clone_attach(struct if_clone *ifc) * initialization, the if_cloners becomes immutable. */ KASSERT(pdevinit_done == 0); + rw_init(&ifc->ifc_lock, "icflck"); LIST_INSERT_HEAD(&if_cloners, ifc, ifc_list); if_cloners_count++; } Index: net/if_var.h =================================================================== RCS file: /cvs/src/sys/net/if_var.h,v retrieving revision 1.105 diff -u -p -r1.105 if_var.h --- net/if_var.h 12 May 2020 08:49:54 -0000 1.105 +++ net/if_var.h 26 Jun 2020 13:49:08 -0000 @@ -45,6 +45,7 @@ #include <sys/task.h> #include <sys/time.h> #include <sys/timeout.h> +#include <sys/rwlock.h> #include <net/ifq.h> @@ -86,6 +87,10 @@ struct if_clone { const char *ifc_name; /* name of device, e.g. `gif' */ size_t ifc_namelen; /* length of name */ + struct rwlock ifc_lock; /* lock for map */ + u_long *ifc_map; /* units map */ + int ifc_map_size; /* units map size */ + int (*ifc_create)(struct if_clone *, int); int (*ifc_destroy)(struct ifnet *); }; @@ -95,6 +100,8 @@ struct if_clone { .ifc_list = { NULL, NULL }, \ .ifc_name = name, \ .ifc_namelen = sizeof(name) - 1, \ + .ifc_map = NULL, \ + .ifc_map_size = 0, \ .ifc_create = create, \ .ifc_destroy = destroy, \ }