Mark Kettenis <[email protected]> writes:
> Anyway, I think you're right in thinking that nvme_intr() needs some
> belt and suspenders. In nvme_shutdown() we delete the "normal"
> command queue, but nvme_intr() inconditionally looks at both of them.
> Now nvme_shutdown() masks the interrupt and nvme_resume() unmasks it
> only after it re-creates the "normal" command queue. But I think
> there are scenarios where we can get a spurious interrupt and it would
> check a queue that isn't there. So I think something like the diff
> below would make sense.
>
> Greg, does this fix your crash?
I applied this on top of -current. Sadly I'm still getting the same
crashes in sd_buf_done.
>
> Index: dev/ic/nvme.c
> ===================================================================
> RCS file: /cvs/src/sys/dev/ic/nvme.c,v
> diff -u -p -r1.121 nvme.c
> --- dev/ic/nvme.c 13 Jul 2024 08:59:41 -0000 1.121
> +++ dev/ic/nvme.c 7 Aug 2024 09:31:14 -0000
> @@ -418,12 +418,14 @@ nvme_attach(struct nvme_softc *sc)
>
> free_q:
> nvme_q_free(sc, sc->sc_q);
> + sc->sc_q = NULL;
> disable:
> nvme_disable(sc);
> free_ccbs:
> nvme_ccbs_free(sc, nccbs);
> free_admin_q:
> nvme_q_free(sc, sc->sc_admin_q);
> + sc->sc_admin_q = NULL;
>
> return (1);
> }
> @@ -463,6 +465,7 @@ nvme_resume(struct nvme_softc *sc)
>
> free_q:
> nvme_q_free(sc, sc->sc_q);
> + sc->sc_q = NULL;
> disable:
> nvme_disable(sc);
>
> @@ -531,6 +534,7 @@ nvme_shutdown(struct nvme_softc *sc)
> printf("%s: unable to delete q, disabling\n", DEVNAME(sc));
> goto disable;
> }
> + sc->sc_q = NULL;
>
> cc = nvme_read4(sc, NVME_CC);
> CLR(cc, NVME_CC_SHN_MASK);
> @@ -1552,9 +1556,9 @@ nvme_intr(void *xsc)
> struct nvme_softc *sc = xsc;
> int rv = 0;
>
> - if (nvme_q_complete(sc, sc->sc_q))
> + if (sc->sc_q && nvme_q_complete(sc, sc->sc_q))
> rv = 1;
> - if (nvme_q_complete(sc, sc->sc_admin_q))
> + if (sc->sc_admin_q && nvme_q_complete(sc, sc->sc_admin_q))
> rv = 1;
>
> return (rv);