dlg@ could reproduce a panic by running dhclient in a loop on one of his
machines.
Turns out that there's a race between arplookup() and arpcache() inside
in_arpinput(). If another CPU removes the ARP entry from the table, via
RTM_DELETE, it will free the ARP storage. That means we cannot update
an ARP cache without holding the KERNEL_LOCK().
Diff below should prevent the race. A better solution would be to delay
the pool_put() until we call rtfree(9), but this needs more work.
Index: netinet/if_ether.c
===================================================================
RCS file: /cvs/src/sys/netinet/if_ether.c,v
retrieving revision 1.217
diff -u -p -u -1 -1 -r1.217 if_ether.c
--- netinet/if_ether.c 11 Jul 2016 09:23:06 -0000 1.217
+++ netinet/if_ether.c 12 Jul 2016 08:36:18 -0000
@@ -201,23 +201,23 @@ arp_rtrequest(struct ifnet *ifp, int req
}
if (ifa) {
KASSERT(ifa == rt->rt_ifa);
rt->rt_expire = 0;
}
break;
case RTM_DELETE:
if (la == NULL)
break;
LIST_REMOVE(la, la_list);
- rt->rt_llinfo = 0;
+ rt->rt_llinfo = NULL;
rt->rt_flags &= ~RTF_LLINFO;
la_hold_total -= ml_purge(&la->la_ml);
pool_put(&arp_pool, la);
}
}
/*
* Broadcast an ARP request. Caller specifies:
* - arp header source ip address
* - arp header target ip address
* - arp header source ethernet address
@@ -499,23 +499,28 @@ in_arpinput(struct ifnet *ifp, struct mb
/* Do we have an ARP cache for the sender? Create if we are target. */
rt = arplookup(&isaddr, target, 0, rdomain);
/* Check sender against our interface addresses. */
if (rtisvalid(rt) && ISSET(rt->rt_flags, RTF_LOCAL) &&
rt->rt_ifidx == ifp->if_index && isaddr.s_addr != INADDR_ANY) {
inet_ntop(AF_INET, &isaddr, addr, sizeof(addr));
log(LOG_ERR, "duplicate IP address %s sent from ethernet "
"address %s\n", addr, ether_sprintf(ea->arp_sha));
itaddr = isaddr;
} else if (rt != NULL) {
- if (arpcache(ifp, ea, rt))
+ int error;
+
+ KERNEL_LOCK();
+ error = arpcache(ifp, ea, rt);
+ KERNEL_UNLOCK();
+ if (error)
goto out;
}
if (op == ARPOP_REQUEST) {
uint8_t *eaddr;
if (target) {
/* We already have all info for the reply */
eaddr = LLADDR(ifp->if_sadl);
} else {
rtfree(rt);
@@ -541,23 +546,31 @@ out:
int
arpcache(struct ifnet *ifp, struct ether_arp *ea, struct rtentry *rt)
{
struct llinfo_arp *la = (struct llinfo_arp *)rt->rt_llinfo;
struct sockaddr_dl *sdl = satosdl(rt->rt_gateway);
struct in_addr *spa = (struct in_addr *)ea->arp_spa;
char addr[INET_ADDRSTRLEN];
struct ifnet *rifp;
unsigned int len;
int changed = 0;
+ KERNEL_ASSERT_LOCKED();
KASSERT(sdl != NULL);
+
+ /*
+ * This can happen if the entry has been deleted by another CPU
+ * after we found it.
+ */
+ if (la == NULL)
+ return (0);
if (sdl->sdl_alen > 0) {
if (memcmp(ea->arp_sha, LLADDR(sdl), sdl->sdl_alen)) {
if (ISSET(rt->rt_flags, RTF_PERMANENT_ARP|RTF_LOCAL)) {
inet_ntop(AF_INET, spa, addr, sizeof(addr));
log(LOG_WARNING, "arp: attempt to overwrite "
"permanent entry for %s by %s on %s\n", addr,
ether_sprintf(ea->arp_sha), ifp->if_xname);
return (-1);
} else if (rt->rt_ifidx != ifp->if_index) {
#if NCARP > 0