Hi,
When removing these kernel locks from the ARP input path, the machine
runs stable in my tests. Caller if_netisr() grabs the exclusive
netlock and that should be sufficent for in_arpinput() and arpcache().
To stress the ARP resolver I run arp -nd ... in a loop.
Hrvoje: Could you run this diff on your testsetup?
bluhm
Index: net/if.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/net/if.c,v
retrieving revision 1.686
diff -u -p -r1.686 if.c
--- net/if.c 5 Apr 2023 19:35:23 -0000 1.686
+++ net/if.c 6 Apr 2023 18:47:42 -0000
@@ -927,11 +927,8 @@ if_netisr(void *unused)
atomic_clearbits_int(&netisr, n);
#if NETHER > 0
- if (n & (1 << NETISR_ARP)) {
- KERNEL_LOCK();
+ if (n & (1 << NETISR_ARP))
arpintr();
- KERNEL_UNLOCK();
- }
#endif
if (n & (1 << NETISR_IP))
ipintr();
Index: netinet/if_ether.c
===================================================================
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/if_ether.c,v
retrieving revision 1.260
diff -u -p -r1.260 if_ether.c
--- netinet/if_ether.c 5 Apr 2023 21:51:47 -0000 1.260
+++ netinet/if_ether.c 6 Apr 2023 19:31:10 -0000
@@ -609,12 +609,7 @@ in_arpinput(struct ifnet *ifp, struct mb
"address %s\n", addr, ether_sprintf(ea->arp_sha));
itaddr = isaddr;
} else if (rt != NULL) {
- int error;
-
- KERNEL_LOCK();
- error = arpcache(ifp, ea, rt);
- KERNEL_UNLOCK();
- if (error)
+ if (arpcache(ifp, ea, rt))
goto out;
}
@@ -656,7 +651,7 @@ arpcache(struct ifnet *ifp, struct ether
time_t uptime;
int changed = 0;
- KERNEL_ASSERT_LOCKED();
+ NET_ASSERT_LOCKED_EXCLUSIVE();
KASSERT(sdl != NULL);
/*