Hi,
My daily netlink test found a crash during socket splicing.
[-- MARK -- Tue Jan 7 08:05:00 2025]
uvm_fault(0xffffffff828c74e8, 0x7, 0, 2) -> e
kernel: page fault trap, code=2
Stopped at taskq_next_work+0x8e: movq %rdx,0x8(%rsi)
TID PID UID PRFLAGS PFLAGS CPU COMMAND
*213124 16048 0 0x14000 0x200 3 sosplice
204927 99709 0 0x14000 0x200 0 softnet0
taskq_next_work(ffff800000078000,ffff8000359fc4c0) at taskq_next_work+0x8e
taskq_thread(ffff800000078000) at taskq_thread+0x10b
end trace frame: 0x0, count: 13
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports. Insufficient info makes it difficult to find and fix bugs.
ddb{3}> [-- MARK -- Tue Jan 7 08:10:00 2025]
I have seen it once on real hardware andd once as KVM guest. It
does not happen at the first test run, but after 4 to 8 runs it may
crash. Affected versions are
OpenBSD 7.6-current (GENERIC.MP) #498: Mon Jan 6 12:16:01 MST 2025
[email protected]:/usr/src/sys/arch/amd64/compile/GENERIC.MP
OpenBSD 7.6-current (GENERIC.MP) #cvs : D2025.01.07.00.00.00: Tue Jan 7
07:49:46 CET 2025
[email protected]:/usr/src/sys/arch/amd64/compile/GENERIC.MP
The latter is built from sources checked out at Jan 7th 0:00 UTC.
It also has a patch to force bounce buffers.
ddb{3}> show panic
*cpu3: uvm_fault(0xffffffff828c74e8, 0x7, 0, 2) -> e
ddb{3}> trace
taskq_next_work(ffff800000078000,ffff8000359fc4c0) at taskq_next_work+0x8e
taskq_thread(ffff800000078000) at taskq_thread+0x10b
end trace frame: 0x0, count: -2
ddb{3}> show register
rdi 0
rsi 0xffffffffffffffff
rbp 0xffff8000359fc4b0
rbx 0
rdx 0xffffffffffffffff
rcx 0xffffffffffffffff
rax 0xfffffd810b110e40
r8 0xffff8000fffe346c
r9 0xd905 __ALIGN_SIZE+0xc905
r10 0x60853e4cb4db7591
r11 0x534ced3d8dc3e295
r12 0xffff8000359fc4c0
r13 0xffff8000359fc4f0
r14 0xffff800000078000
r15 0xffff800000078018
rip 0xffffffff8135c79e taskq_next_work+0x8e
cs 0x8
rflags 0x10286 __ALIGN_SIZE+0xf286
rsp 0xffff8000359fc480
ss 0x10
taskq_next_work+0x8e: movq %rdx,0x8(%rsi)
ddb{3}> ps
PID TID PPID UID S FLAGS WAIT COMMAND
34329 438044 48921 0 3 0x10008a kqread ssh
38554 178763 48921 0 3 0x100002 netlock splicebench
13632 64094 48921 0 3 0x10008a kqread ssh
48921 336230 68778 0 3 0x82 kqread perl
*16048 213124 0 0 7 0x14200 sosplice
68778 129859 29864 0 3 0x82 piperd perl
29864 404090 72569 0 3 0x10008a sigsusp ksh
72569 233951 2295 0 3 0x98 kqread sshd-session
2295 222647 41602 0 3 0x92 kqread sshd-session
65787 159909 1 0 3 0x100083 ttyin getty
52822 521335 1 0 3 0x100098 kqread cron
93395 131534 1 99 3 0x1100090 kqread sndiod
23324 284931 1 110 3 0x100090 kqread sndiod
24127 122988 73583 95 3 0x1100092 kqread smtpd
57628 242308 73583 103 3 0x1100092 kqread smtpd
63092 336176 73583 95 3 0x1100092 kqread smtpd
54166 39462 73583 95 3 0x100092 kqread smtpd
96596 436770 73583 95 3 0x1100092 kqread smtpd
12440 426907 73583 95 3 0x1100092 kqread smtpd
73583 12384 1 0 3 0x100080 kqread smtpd
66197 242621 87605 91 3 0x92 kqread snmpd_metrics
15636 443397 87605 91 3 0x1100092 kqread snmpd
87605 299396 1 0 3 0x100080 kqread snmpd
41602 99587 1 0 3 0x88 kqread sshd
79701 444348 0 0 3 0x14200 acct acct
78900 215651 0 0 3 0x14280 nfsidl nfsio
89627 199203 0 0 3 0x14280 nfsidl nfsio
61886 196957 0 0 3 0x14280 nfsidl nfsio
66558 238311 0 0 3 0x14280 nfsidl nfsio
45105 181107 1 0 3 0x100080 kqread ntpd
20800 237572 30737 83 3 0x100092 kqread ntpd
30737 124174 1 83 3 0x1100092 kqread ntpd
62099 83612 63492 74 3 0x1100092 bpf pflogd
63492 2312 1 0 3 0x80 sbwait pflogd
86525 394545 29171 73 3 0x1100090 kqread syslogd
29171 174505 1 0 3 0x100082 sbwait syslogd
5463 212983 24294 77 3 0x100092 kqread dhcpleased
48283 16523 24294 77 3 0x100092 kqread dhcpleased
24294 326084 1 0 3 0x80 kqread dhcpleased
3868 108358 20848 115 3 0x100092 kqread slaacd
9372 494521 20848 115 3 0x100092 kqread slaacd
20848 426374 1 0 3 0x100080 kqread slaacd
25680 448020 0 0 3 0x14200 bored smr
13678 254593 0 0 3 0x14200 pgzero zerothread
54721 343638 0 0 3 0x14200 aiodoned aiodoned
10780 163499 0 0 3 0x14200 syncer update
9425 411227 0 0 3 0x14200 cleaner cleaner
33622 290530 0 0 3 0x14200 reaper reaper
431 386664 0 0 3 0x14200 pgdaemon pagedaemon
67107 441071 0 0 3 0x14200 bored viomb
3869 39336 0 0 3 0x40014200 acpi0 acpi0
48416 183698 0 0 3 0x40014200 idle3
2231 214611 0 0 7 0x40014200 idle2
94933 389713 0 0 7 0x40014200 idle1
28470 359578 0 0 3 0x14200 bored softnet3
62125 473073 0 0 3 0x14200 bored softnet2
30141 475498 0 0 3 0x14200 bored softnet1
99709 204927 0 0 7 0x14200 softnet0
62376 394508 0 0 3 0x14200 bored systqmp
67599 421094 0 0 3 0x14200 bored systq
59641 156144 0 0 3 0x14200 tmoslp softclockmp
1581 212835 0 0 3 0x40014200 tmoslp softclock
77525 295571 0 0 3 0x40014200 idle0
1 424584 0 0 3 0x82 wait init
0 0 -1 0 3 0x10200 scheduler swapper
ddb{3}> x/s version
version: OpenBSD 7.6-current (GENERIC.MP) #cvs : D2025.01.07.00.00.00:
Tue Jan 7 07:49:46 CET 2025\012
[email protected]:/usr/src/sys/arch/amd64/compile/GENERIC.MP\012
ddb{0}> trace
x86_ipi_db(ffffffff827ddff0) at x86_ipi_db+0x16
x86_ipi_handler() at x86_ipi_handler+0x80
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x27
memcpy() at memcpy+0x19
vio_rxeof(ffff80000012c600) at vio_rxeof+0x120
vio_rx_intr(ffff80000012d400) at vio_rx_intr+0x88
intr_handler(ffff80003590c640,ffff80000007bc80) at intr_handler+0x91
Xintr_ioapic_edge22_untramp() at Xintr_ioapic_edge22_untramp+0x18f
pf_addrcpy(ffff80003590c740,fffffd8054df402a,2) at pf_addrcpy+0x17
pf_test(2,1,ffff80000012b858,ffff80003590c9c8) at pf_test+0xe42
ip_input_if(ffff80003590c9c8,ffff80003590c9d4,5dc,0,ffff80000012b858) at
ip_input_if+0xdf
ipv4_input(ffff80000012b858,fffffd80b1539500) at ipv4_input+0x38
ether_input(ffff80000012b858,fffffd80b1539500) at ether_input+0x3df
if_input_process(ffff80000012b858,ffff80003590cab8) at if_input_process+0x78
ifiq_process(ffff80000012bc68) at ifiq_process+0x90
taskq_thread(ffff800000036000) at taskq_thread+0x129
end trace frame: 0x0, count: -16
ddb{1}> trace
x86_ipi_db(ffff80002d4f3ff0) at x86_ipi_db+0x16
x86_ipi_handler() at x86_ipi_handler+0x80
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x27
acpicpu_idle() at acpicpu_idle+0x2b9
sched_idle(ffff80002d4f3ff0) at sched_idle+0x298
end trace frame: 0x0, count: -5
ddb{2}> trace
x86_ipi_db(ffff80002d4fcff0) at x86_ipi_db+0x16
x86_ipi_handler() at x86_ipi_handler+0x80
Xresume_lapic_ipi() at Xresume_lapic_ipi+0x27
acpicpu_idle() at acpicpu_idle+0x2b9
sched_idle(ffff80002d4fcff0) at sched_idle+0x298
end trace frame: 0x0, count: -5
ddb{3}> show struct taskq 0xffff800000078000
struct taskq at 0xffff800000078000 (80 bytes) {tq_state = TQ_S_RUNNING,
tq_running = {tqe_next = (struct buf *)0x100000001, tqe_prev =
0x823c4db300000001}, tq_nthreads = {tqe_next = (struct buf *)0x100000001,
tqe_prev = 0xffffffff823c4db3}, tq_flags = {tqe_next = (struct buf
*)0x823c4db300000001, tqe_prev = 0x2d505ff0ffffffff}, tq_name = {tqe_next =
(struct buf *)0xffffffff823c4db3, tqe_prev = 0xffff80002d505ff0}, tq_mtx =
{sc_if = {if_softc = (void *)0xffff80002d505ff0, if_refcnt = {r_refs = 9,
r_traceidx = 0}, if_list = {tqe_next = (struct ifnet *)0xfffffd810b110e40,
tqe_prev = 0xfffffd810b110ee8}, if_addrlist = {tqh_first = (struct ifaddr
*)0xffff8000359fc4f0, tqh_last = 0x5400000000}, if_maddrlist = {tqh_first =
(struct ifmaddr *)0x0, tqh_last = 0x15439}, if_groups = {tqh_first = (struct
ifg_list *)0xce982, tqh_last = 0x0}, if_addrhooks = {tqh_first = (struct task
*)0x0, tqh_last = 0x0}, if_linkstatehooks = {tqh_first = (struct task
*)0xfffffd8119734698, tqh_last = 0xdead007fdeadbeef}, if_detachhooks =
{tqh_first = (struct task *)0xaead9a013d5b6da7, tqh_last = 0xdeadbeefdeadbeef},
if_rtrequest = 0xdeadbeefdeadbeef, if_xname =
[-17,-66,-83,-34,-17,-66,-83,-34,-17,-66,-83,-34,-17,-66,-83,-34], if_pcount =
-559038737, if_bridgeidx = 3735928559, if_bpf = (char *)0xdeadbeefdeadbeef,
if_mcast = (char *)0x0, if_mcast6 = (char *)0x0, if_pf_kif = (char *)0x0,
if_carp_ptr = {carp_s = {sl_head = {ref = (void *)0x0}}, carp_idx = 0},
if_index = 0, if_timer = 0, if_flags = 0, if_xflags = 0, if_data = {ifi_type =
0, ifi_addrlen = 0, ifi_hdrlen = 0, ifi_link_state = 0, ifi_mtu = 0, ifi_metric
= 68780640, ifi_rdomain = 4294966657, ifi_baudrate = 16045481472033668847,
ifi_ipackets = 12586885863685450791, ifi_ierrors = 16045690984833335023,
ifi_opackets = 16045690984833335023, ifi_oerrors = 16045690984833335023,
ifi_collisions = 16045690984833335023, ifi_ibytes = 16045690984833335023,
ifi_obytes = 16045690984833335023, ifi_imcasts = 0, ifi_omcasts = 0,
ifi_iqdrops = 0, ifi_oqdrops = 0, ifi_noproto = 0, ifi_capabilities = 0,
ifi_lastchange = {tv_sec = 0, tv_usec = -2744411551176}}, if_counters = (struct
cpumem *)0xdead0062deadbeef, if_hardmtu = 1029398951, if_description =
[1,-102,-83,-82,-17,-66,-83,-34,-17,-66,-83,-34,-17,-66,-83,-34,-17,-66,-83,-34,-17,-66,-83,-34,-17,-66,-83,-34,-17,-66,-83,-34,-17,-66,-83,-34,-17,-66,-83,-34,-17,-66,-83,-34,-17,-66,-83,-34,-17,-66,-83,-34,0,0,0,0,0,0,0,0,0,0,0,0],
if_rtlabelid = 0, if_priority = 0, if_llprio = 0, if_slowtimo = {to_list =
{next = (struct circq *)0x0, prev = (struct circq *)0x0}, to_abstime = {tv_sec
= 0, tv_nsec = 0}, to_func = 0x0, to_arg = (void *)0xfffffd810804ce58,
to_process = (struct process *)0xffff800000078228, to_time = 1, to_flags = 1,
to_kclock = 3}, if_watchdogtask = {t_entry = {tqe_next = (struct task
*)0xffff800000079400, tqe_prev = 0xffff800000079f98}, t_func = 0x181c, t_arg =
(void *)0x10002ff, t_flags = 16777216, t_process = (struct process
*)0x100000000}, if_linkstatetask = {t_entry = {tqe_next = (struct task *)0x0,
tqe_prev = 0xfffffd8136942850}, t_func = 0xfffffd81369421c8, t_arg = (void
*)0xfffffd8136942e40, t_flags = 416669248, t_process = (struct process
*)0xfffffd8118d5d130}, if_input = 0x0, if_bpf_mtap = 0xdead0062deadbeef,
if_output = 0xaead9a013d5b7827, if_ll_output = 0xdeadbeefdeadbeef, if_enqueue =
0xdeadbeefdeadbeef, if_start = 0xdeadbeefdeadbeef, if_ioctl =
0xdeadbeefdeadbeef, if_watchdog = 0xdeadbeefdeadbeef, if_wol =
0xdeadbeefdeadbeef, if_snd = {ifq_if = (struct ifnet *)0x0, ifq_softnet =
(struct taskq *)0x0, _ifq_ptr = {_ifq_softc = (void *)0x0, _ifq_ifqs = [(struct
ifqueue *)0x0]}, ifq_mtx = {mtx_owner = (void *)0x0, mtx_wantipl = 0,
mtx_oldipl = 0}, ifq_ops = (const ifq_ops *)0x0, ifq_q = (void *)0x0, ifq_free
= {ml_head = (struct mbuf *)0xfffffd8103fa50a0, ml_tail = (struct mbuf
*)0xdead0062deadbeef, ml_len = 1029403815}, ifq_len = 3735928559, ifq_oactive =
3735928559, ifq_packets = 16045690984833335023, ifq_bytes =
16045690984833335023, ifq_qdrops = 16045690984833335023, ifq_errors =
16045690984833335023, ifq_mcasts = 16045690984833335023, ifq_oactives = 0,
ifq_kstat = (struct kstat *)0x0, ifq_task_mtx = {mtx_owner = (void *)0x0,
mtx_wantipl = 0, mtx_oldipl = 0}, ifq_task_list = {tqh_first = (struct task
*)0x0, tqh_last = 0x0}, ifq_serializer = (void *)0x0, ifq_bundle = {t_entry =
{tqe_next = (struct task *)0xfffffd8104530a38, tqe_prev = 0xdead007fdeadbeef},
t_func = 0xaead9a013d5b73a7, t_arg = (void *)0xdeadbeefdeadbeef, t_flags =
3735928559, t_process = (struct process *)0xdeadbeefdeadbeef}, ifq_start =
{t_entry = {tqe_next = (struct task *)0xdeadbeefdeadbeef, tqe_prev =
0xdeadbeefdeadbeef}, t_func = 0xdeadbeefdeadbeef, t_arg = (void *)0x0, t_flags
= 0, t_process = (struct process *)0x0}, ifq_restart = {t_entry = {tqe_next =
(struct task *)0x0, tqe_prev = 0x0}, t_func = 0x0, t_arg = (void *)0x0, t_flags
= 426984048, t_process = (struct process *)0xdead007fdeadbeef}, ifq_maxlen =
1029406631, ifq_idx = 2930612737}, if_ifqs = 0xdeadbeefdeadbeef, if_qstart =
0xdeadbeefdeadbeef, if_nifqs = 3735928559, if_txmit = 3735928559, if_rcv =
{ifiq_if = (struct ifnet *)0xdeadbeefdeadbeef, ifiq_softnet = (struct taskq
*)0xdeadbeefdeadbeef, _ifiq_ptr = {_ifiq_softc = (void *)0xdeadbeefdeadbeef,
_ifiq_ifiqs = [(struct ifiqueue *)0xdeadbeefdeadbeef]}, ifiq_mtx = {mtx_owner =
(void *)0x0, mtx_wantipl = 0, mtx_oldipl = 0}, ifiq_ml = {ml_head = (struct
mbuf *)0x0, ml_tail = (struct mbuf *)0x0, ml_len = 0}, ifiq_task = {t_entry =
{tqe_next = (struct task *)0x0, tqe_prev = 0x0}, t_func = 0xfffffd81041987b8,
t_arg = (void *)0xdead0062deadbeef, t_flags = 1029394599, t_process = (struct
process *)0xdeadbeefdeadbeef}, ifiq_pressure = 3735928559, ifiq_packets =
16045690984833335023, ifiq_bytes = 16045690984833335023, ifiq_fdrops =
16045690984833335023, ifiq_qdrops = 16045690984833335023, ifiq_errors = 0,
ifiq_mcasts = 0, ifiq_noproto = 0, ifiq_enqueues = 0, ifiq_dequeues = 0,
ifiq_kstat = (struct kstat *)0x0, ifiq_idx = 0}, if_iqs = 0xfffffd81054b9dc0,
if_niqs = 3735928559, if_sadl = (struct sockaddr_dl *)0xaead9a013d5b6727, if_nd
= (struct nd_ifinfo *)0xdeadbeefdeadbeef}, sc_dead = 3735928559, sc_ports =
[(struct tpmr_port *)0xdeadbeefdeadbeef,(struct tpmr_port
*)0xdeadbeefdeadbeef], sc_nports = 3735928559}, tq_worklist = {sle_next =
(struct vm_map_entry *)0xfffffd810b110e40}, tq_threads = {slh_first = (struct
taskq_thread *)0xffff8000359fc4f0}, tq_barriers = {tqe_next = (struct buf
*)0x5400000000, tqe_prev = 0x0}, tq_bgen = {tqe_next = (struct buf *)0x54,
tqe_prev = 0x1543900000000}, tq_bthreads = {tqe_next = (struct buf *)0x0,
tqe_prev = 0x15439}}
ddb{3}> show struct task 0xffff8000359fc4c0
struct task at 0xffff8000359fc4c0 (48 bytes) {t_entry = {tqe_next = (struct
task *)0xffffffffffffffff, tqe_prev = 0xffffffffffffffff}, t_func =
0xffffffff817e9c10, t_arg = (void *)0xffff800000f3b900, t_flags = 0, t_process
= (struct process *)0x0}
ddb{3}> show struct task_list 0xfffffd810b110e40
struct task_list at 0xfffffd810b110e40 (16 bytes) {tqh_first = (struct task
*)0xffffffffffffffff, tqh_last = 0xffffffffffffffff}
/home/bluhm/openbsd/cvs/src/sys/kern/kern_task.c:410
a4c: 48 8b 08 mov (%rax),%rcx
a4f: 48 8b 50 08 mov 0x8(%rax),%rdx
a53: 49 8d 76 28 lea 0x28(%r14),%rsi
a57: 48 85 c9 test %rcx,%rcx
a5a: 48 0f 45 f1 cmovne %rcx,%rsi
*a5e: 48 89 56 08 mov %rdx,0x8(%rsi)
a62: 48 8b 08 mov (%rax),%rcx
a65: 48 8b 50 08 mov 0x8(%rax),%rdx
a69: 48 89 0a mov %rcx,(%rdx)
a6c: 48 c7 40 08 ff ff ff movq $0xffffffffffffffff,0x8(%rax)
a73: ff
a74: 48 c7 00 ff ff ff ff movq $0xffffffffffffffff,(%rax)
/home/bluhm/openbsd/cvs/src/sys/kern/kern_task.c:411
395 int
396 taskq_next_work(struct taskq *tq, struct task *work)
397 {
398 struct task *next;
399
400 mtx_enter(&tq->tq_mtx);
401 while ((next = TAILQ_FIRST(&tq->tq_worklist)) == NULL) {
402 if (tq->tq_state != TQ_S_RUNNING) {
403 mtx_leave(&tq->tq_mtx);
404 return (0);
405 }
406
407 msleep_nsec(tq, &tq->tq_mtx, PWAIT, "bored", INFSLP);
408 }
409
* 410 TAILQ_REMOVE(&tq->tq_worklist, next, t_entry);
411 CLR(next->t_flags, TASK_ONQUEUE);
412
413 *work = *next; /* copy to caller to avoid races */
414
415 next = TAILQ_FIRST(&tq->tq_worklist);
416 mtx_leave(&tq->tq_mtx);
417
418 if (next != NULL && tq->tq_nthreads > 1)
419 wakeup_one(tq);
420
421 return (1);
422 }