dlg@ could reproduce a panic by running dhclient in a loop on one of his machines.
Turns out that there's a race between arplookup() and arpcache() inside in_arpinput(). If another CPU removes the ARP entry from the table, via RTM_DELETE, it will free the ARP storage. That means we cannot update an ARP cache without holding the KERNEL_LOCK(). Diff below should prevent the race. A better solution would be to delay the pool_put() until we call rtfree(9), but this needs more work. Index: netinet/if_ether.c =================================================================== RCS file: /cvs/src/sys/netinet/if_ether.c,v retrieving revision 1.217 diff -u -p -u -1 -1 -r1.217 if_ether.c --- netinet/if_ether.c 11 Jul 2016 09:23:06 -0000 1.217 +++ netinet/if_ether.c 12 Jul 2016 08:36:18 -0000 @@ -201,23 +201,23 @@ arp_rtrequest(struct ifnet *ifp, int req } if (ifa) { KASSERT(ifa == rt->rt_ifa); rt->rt_expire = 0; } break; case RTM_DELETE: if (la == NULL) break; LIST_REMOVE(la, la_list); - rt->rt_llinfo = 0; + rt->rt_llinfo = NULL; rt->rt_flags &= ~RTF_LLINFO; la_hold_total -= ml_purge(&la->la_ml); pool_put(&arp_pool, la); } } /* * Broadcast an ARP request. Caller specifies: * - arp header source ip address * - arp header target ip address * - arp header source ethernet address @@ -499,23 +499,28 @@ in_arpinput(struct ifnet *ifp, struct mb /* Do we have an ARP cache for the sender? Create if we are target. */ rt = arplookup(&isaddr, target, 0, rdomain); /* Check sender against our interface addresses. */ if (rtisvalid(rt) && ISSET(rt->rt_flags, RTF_LOCAL) && rt->rt_ifidx == ifp->if_index && isaddr.s_addr != INADDR_ANY) { inet_ntop(AF_INET, &isaddr, addr, sizeof(addr)); log(LOG_ERR, "duplicate IP address %s sent from ethernet " "address %s\n", addr, ether_sprintf(ea->arp_sha)); itaddr = isaddr; } else if (rt != NULL) { - if (arpcache(ifp, ea, rt)) + int error; + + KERNEL_LOCK(); + error = arpcache(ifp, ea, rt); + KERNEL_UNLOCK(); + if (error) goto out; } if (op == ARPOP_REQUEST) { uint8_t *eaddr; if (target) { /* We already have all info for the reply */ eaddr = LLADDR(ifp->if_sadl); } else { rtfree(rt); @@ -541,23 +546,31 @@ out: int arpcache(struct ifnet *ifp, struct ether_arp *ea, struct rtentry *rt) { struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo; struct sockaddr_dl *sdl = satosdl(rt->rt_gateway); struct in_addr *spa = (struct in_addr *)ea->arp_spa; char addr[INET_ADDRSTRLEN]; struct ifnet *rifp; unsigned int len; int changed = 0; + KERNEL_ASSERT_LOCKED(); KASSERT(sdl != NULL); + + /* + * This can happen if the entry has been deleted by another CPU + * after we found it. + */ + if (la == NULL) + return (0); if (sdl->sdl_alen > 0) { if (memcmp(ea->arp_sha, LLADDR(sdl), sdl->sdl_alen)) { if (ISSET(rt->rt_flags, RTF_PERMANENT_ARP|RTF_LOCAL)) { inet_ntop(AF_INET, spa, addr, sizeof(addr)); log(LOG_WARNING, "arp: attempt to overwrite " "permanent entry for %s by %s on %s\n", addr, ether_sprintf(ea->arp_sha), ifp->if_xname); return (-1); } else if (rt->rt_ifidx != ifp->if_index) { #if NCARP > 0