On Mon, Nov 18, 2024 at 03:26:21PM -0800, Andrew Hewus Fresh wrote:
> On Sun, Nov 17, 2024 at 04:16:17PM +1000, David Gwynne wrote:
> > On Sat, Nov 16, 2024 at 07:36:37PM -0800, Andrew Hewus Fresh wrote:
> > > I finally got around to fixing my alpha, which involved replacing the
> > > disk. That meant I have to scp some stuff over to to it and after a bit
> > > of time it panics:
> > >
> > > panic: mtx 0xfffffe000002a628: locking against myself
> > > Stopped at db_enter+0x8: lda sp,10(sp)
> > > TID PID UID PRFLAGS PFLAGS CPU COMMAND
> > > *204699 79774 0 0x14000 0x200 0 softnet0
> > > db_enter(0, 7ffffe00e0003f8, 1, 8, 3, 8) at db_enter+0x8
> > > panic(?, fffffe000002a628, 1b0, 10, a, 1) at panic+0xe8
> > > mtx_enter(?, ?, 1b0, 10, a, 1) at mtx_enter+0xb4
> > > ifq_set_oactive(?, ?, 1b0, 10, a, 1) at ifq_set_oactive+0x50
> >
> > this is from src/sys/net/ifq.c r1.50 where i added a counter for the
> > number of times oactive gets set. because there's checks and multiple
> > things being tweaked i used the ifq mutex to serialise the updates.
> >
> > de(4) uses ifq_deq_begin to try and shove an mbuf onto the hardware,
> > which takes but doesnt release the ifq mutex until ifq_deq_commit or
> > ifq_deq_rollback is called. so while it's holding the mutex is calls
> > ifq_set_oactive, which also tries to take the mutex.
> >
> > i honestly don't understand what de(4) is doing with the hardware and
> > packet setup, so i dont feel confident changing the driver to avoid
> > this. the least worst alternative i could think of is to provide an
> > alternative set_oactive it can call.
> >
> > the diff below should fix this.
>
> I mean, it "fixed" the locking against myself error :-)
nice work getting a kernel build going.
> de0 at pci0 dev 11 function 0 "DEC 21040" rev 0x23, DEC 21040 pass 2.3: isa
> irq 5, address ...
> panic: mutex 0xfffffe000002a628 not held in ifq_deq_set_oactive
> Stopped at db_enter+0x8: lda sp,10(sp)
> TID PID UID PRFLAGS PFLAGS CPU COMMAND
> * 0 0 0 0x10000 0x200 0 swapper
> db_enter(0, 7ffffe00e0003f8, 1, 8, 3, fffffc0000000008) at db_enter+0x8
> panic(?, fffffe000002a628, fffffc0000bc9718, 1, 5, 1) at panic+0xe8
> ifq_deq_set_oactive(?, ?, fffffc0000bc9718, 1, 5, 1) at
> ifq_deq_set_oactive+0x8
> 8
> tulip_txput(?, ?, fffffe000002a000, ?, 5, 1) at tulip_txput+0x5e8
hrm.
this might be better. or it might not even compile.
Index: net/ifq.c
===================================================================
RCS file: /cvs/src/sys/net/ifq.c,v
diff -u -p -r1.54 ifq.c
--- net/ifq.c 9 Nov 2024 04:09:56 -0000 1.54
+++ net/ifq.c 19 Nov 2024 01:58:05 -0000
@@ -156,6 +156,17 @@ ifq_set_oactive(struct ifqueue *ifq)
}
void
+ifq_deq_set_oactive(struct ifqueue *ifq)
+{
+ MUTEX_ASSERT_LOCKED(&ifq->ifq_mtx);
+
+ if (!ifq->ifq_oactive) {
+ ifq->ifq_oactive = 1;
+ ifq->ifq_oactives++;
+ }
+}
+
+void
ifq_restart_task(void *p)
{
struct ifqueue *ifq = p;
Index: net/ifq.h
===================================================================
RCS file: /cvs/src/sys/net/ifq.h,v
diff -u -p -r1.41 ifq.h
--- net/ifq.h 10 Nov 2023 15:51:24 -0000 1.41
+++ net/ifq.h 19 Nov 2024 01:58:05 -0000
@@ -444,6 +444,7 @@ void ifq_q_leave(struct ifqueue *, voi
void ifq_serialize(struct ifqueue *, struct task *);
void ifq_barrier(struct ifqueue *);
void ifq_set_oactive(struct ifqueue *);
+void ifq_deq_set_oactive(struct ifqueue *);
int ifq_deq_sleep(struct ifqueue *, struct mbuf **, int, int,
const char *, volatile unsigned int *,
Index: dev/pci/if_de.c
===================================================================
RCS file: /cvs/src/sys/dev/pci/if_de.c,v
diff -u -p -r1.143 if_de.c
--- dev/pci/if_de.c 24 May 2024 06:02:53 -0000 1.143
+++ dev/pci/if_de.c 19 Nov 2024 01:58:05 -0000
@@ -3728,6 +3728,8 @@ tulip_txput(tulip_softc_t * const sc, st
u_int32_t d_status;
bus_dmamap_t map;
struct ifnet *ifp = &sc->tulip_if;
+ void (*set_oactive)(struct ifqueue *) =
+ notonqueue ? ifq_set_oactive : ifq_deq_set_oactive;
#if defined(TULIP_DEBUG)
if ((sc->tulip_cmdmode & TULIP_CMD_TXRUN) == 0) {
@@ -3846,8 +3848,10 @@ tulip_txput(tulip_softc_t * const sc, st
* The descriptors have been filled in. Now get ready
* to transmit.
*/
- if (!notonqueue)
+ if (!notonqueue) {
ifq_deq_commit(&ifp->if_snd, m);
+ set_oactive = ifq_set_oactive;
+ }
TULIP_SETCTX(m, map);
map = NULL;
@@ -3897,7 +3901,7 @@ tulip_txput(tulip_softc_t * const sc, st
if (sc->tulip_flags & TULIP_TXPROBE_ACTIVE) {
TULIP_CSR_WRITE(sc, csr_txpoll, 1);
- ifq_set_oactive(&sc->tulip_if.if_snd);
+ (*set_oactive)(&sc->tulip_if.if_snd);
TULIP_PERFEND(txput);
return (NULL);
}
@@ -3926,7 +3930,7 @@ tulip_txput(tulip_softc_t * const sc, st
sc->tulip_dbg.dbg_txput_finishes[6]++;
#endif
if (sc->tulip_flags & (TULIP_WANTTXSTART|TULIP_DOINGSETUP)) {
- ifq_set_oactive(&sc->tulip_if.if_snd);
+ (*set_oactive)(&sc->tulip_if.if_snd);
if ((sc->tulip_intrmask & TULIP_STS_TXINTR) == 0) {
sc->tulip_intrmask |= TULIP_STS_TXINTR;
TULIP_CSR_WRITE(sc, csr_intr, sc->tulip_intrmask);