Hi,
there is an issue with the admin queue of ixl(4) which leads into the
following panic when the link state changes:
uvm_fault(0xffffffff818005f8, 0x18, 0, 2) -> e
kernel: page fault trap, code=0
Stopped at ixl_intr0+0xca: movq %rdx,0x18(%rax)
TID PID UID PRFLAGS PFLAGS CPU COMMAND
392823 13219 0 0x100040 0 2 ifstated
444681 94950 90 0x1100010 0 6 ospf6d
428704 9496 90 0x1100010 0 9 ospf6d
106020 59273 85 0x1100010 0 1 ospfd
420435 72114 85 0x1100010 0 5 ospfd
295821 93368 73 0x1100010 0 3 syslogd
367116 56598 0 0x14000 0x200 7 zerothread
275385 57815 0 0x14000 0x200 4 softnet
ixl_intr0(ffff800004509000) at ixl_intr0+0xca
intr_handler(0,ffff8000044b0b80) at intr_handler+0x5b
Xintr_ioapic_edge25_untramp() at Xintr_ioapic_edge25_untramp+0x18f
acpicpu_idle() at acpicpu_idle+0x1f6
sched_idle(0) at sched_idle+0x280
end trace frame: 0x0, count: 10
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports. Insufficient info makes it difficult to find and fix bugs.
ddb{0}>
The queue is corrupted in a way, that slot->iaq_cookie is 0. Which
causes the uvm fault when iatq is dereferenced.
The following diff uses a mutex to protect the admin queue and avoids
the issue above.
ok?
bye,
Jan
Index: dev/pci/if_ixl.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
retrieving revision 1.87
diff -u -p -r1.87 if_ixl.c
--- dev/pci/if_ixl.c 6 Feb 2023 20:27:45 -0000 1.87
+++ dev/pci/if_ixl.c 19 Jul 2023 07:05:40 -0000
@@ -1274,6 +1274,7 @@ struct ixl_softc {
unsigned int sc_atq_prod;
unsigned int sc_atq_cons;
+ struct mutex sc_atq_mtx;
struct ixl_dmamem sc_arq;
struct task sc_arq_task;
struct ixl_aq_bufs sc_arq_idle;
@@ -1723,6 +1724,8 @@ ixl_attach(struct device *parent, struct
/* initialise the adminq */
+ mtx_init(&sc->sc_atq_mtx, IPL_NET);
+
if (ixl_dmamem_alloc(sc, &sc->sc_atq,
sizeof(struct ixl_aq_desc) * IXL_AQ_NUM, IXL_AQ_ALIGN) != 0) {
printf("\n" "%s: unable to allocate atq\n", DEVNAME(sc));
@@ -3599,6 +3602,8 @@ ixl_atq_post(struct ixl_softc *sc, struc
struct ixl_aq_desc *atq, *slot;
unsigned int prod;
+ mtx_enter(&sc->sc_atq_mtx);
+
/* assert locked */
atq = IXL_DMA_KVA(&sc->sc_atq);
@@ -3618,6 +3623,8 @@ ixl_atq_post(struct ixl_softc *sc, struc
prod &= IXL_AQ_MASK;
sc->sc_atq_prod = prod;
ixl_wr(sc, sc->sc_aq_regs->atq_tail, prod);
+
+ mtx_leave(&sc->sc_atq_mtx);
}
static void
@@ -3628,11 +3635,15 @@ ixl_atq_done(struct ixl_softc *sc)
unsigned int cons;
unsigned int prod;
+ mtx_enter(&sc->sc_atq_mtx);
+
prod = sc->sc_atq_prod;
cons = sc->sc_atq_cons;
- if (prod == cons)
+ if (prod == cons) {
+ mtx_leave(&sc->sc_atq_mtx);
return;
+ }
atq = IXL_DMA_KVA(&sc->sc_atq);
@@ -3645,6 +3656,7 @@ ixl_atq_done(struct ixl_softc *sc)
if (!ISSET(slot->iaq_flags, htole16(IXL_AQ_DD)))
break;
+ KASSERT(slot->iaq_cookie != 0);
iatq = (struct ixl_atq *)slot->iaq_cookie;
iatq->iatq_desc = *slot;
@@ -3661,6 +3673,8 @@ ixl_atq_done(struct ixl_softc *sc)
BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
sc->sc_atq_cons = cons;
+
+ mtx_leave(&sc->sc_atq_mtx);
}
static void
@@ -3691,6 +3705,8 @@ ixl_atq_poll(struct ixl_softc *sc, struc
unsigned int prod;
unsigned int t = 0;
+ mtx_enter(&sc->sc_atq_mtx);
+
atq = IXL_DMA_KVA(&sc->sc_atq);
prod = sc->sc_atq_prod;
slot = atq + prod;
@@ -3712,8 +3728,10 @@ ixl_atq_poll(struct ixl_softc *sc, struc
while (ixl_rd(sc, sc->sc_aq_regs->atq_head) != prod) {
delaymsec(1);
- if (t++ > tm)
+ if (t++ > tm) {
+ mtx_leave(&sc->sc_atq_mtx);
return (ETIMEDOUT);
+ }
}
bus_dmamap_sync(sc->sc_dmat, IXL_DMA_MAP(&sc->sc_atq),
@@ -3724,6 +3742,7 @@ ixl_atq_poll(struct ixl_softc *sc, struc
sc->sc_atq_cons = prod;
+ mtx_leave(&sc->sc_atq_mtx);
return (0);
}