Module Name:    src
Committed By:   jdolecek
Date:           Tue Jul 14 10:37:30 UTC 2020

Modified Files:
        src/sys/dev/ic: ciss.c cissreg.h cissvar.h
        src/sys/dev/pci: ciss_pci.c

Log Message:
add support for PERFORMANT mode, and allow MSI/MSI-X (only) for adapters which
support it

code adapted from FreeBSD, but with fix for setting the performant bit
and pull count on command submittion as seen in hpsa Linux driver

tested with INTx and MSI-X on HP Smart Array 11

thanks to Andreas Gustafsson for initial testing, and providing
access to test machine


To generate a diff of this commit:
cvs rdiff -u -r1.43 -r1.44 src/sys/dev/ic/ciss.c
cvs rdiff -u -r1.5 -r1.6 src/sys/dev/ic/cissreg.h
cvs rdiff -u -r1.8 -r1.9 src/sys/dev/ic/cissvar.h
cvs rdiff -u -r1.17 -r1.18 src/sys/dev/pci/ciss_pci.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/dev/ic/ciss.c
diff -u src/sys/dev/ic/ciss.c:1.43 src/sys/dev/ic/ciss.c:1.44
--- src/sys/dev/ic/ciss.c:1.43	Fri Jul 10 14:23:56 2020
+++ src/sys/dev/ic/ciss.c	Tue Jul 14 10:37:30 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: ciss.c,v 1.43 2020/07/10 14:23:56 jdolecek Exp $	*/
+/*	$NetBSD: ciss.c,v 1.44 2020/07/14 10:37:30 jdolecek Exp $	*/
 /*	$OpenBSD: ciss.c,v 1.68 2013/05/30 16:15:02 deraadt Exp $	*/
 
 /*
@@ -19,7 +19,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ciss.c,v 1.43 2020/07/10 14:23:56 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ciss.c,v 1.44 2020/07/14 10:37:30 jdolecek Exp $");
 
 #include "bio.h"
 
@@ -128,6 +128,98 @@ ciss_put_ccb(struct ciss_ccb *ccb)
 	mutex_exit(&sc->sc_mutex);
 }
 
+static int
+ciss_init_perf(struct ciss_softc *sc)
+{
+	struct ciss_perf_config *pc = &sc->perfcfg;
+	int error, total, rseg;
+
+	if (sc->cfg.max_perfomant_mode_cmds)
+		sc->maxcmd = sc->cfg.max_perfomant_mode_cmds;
+
+	bus_space_read_region_4(sc->sc_iot, sc->cfg_ioh,
+	    sc->cfgoff + sc->cfg.troff,
+	    (u_int32_t *)pc, sizeof(*pc) / 4);
+
+	total = sizeof(uint64_t) * sc->maxcmd;
+
+	if ((error = bus_dmamem_alloc(sc->sc_dmat, total, PAGE_SIZE, 0,
+	    sc->replyseg, 1, &rseg, BUS_DMA_WAITOK))) {
+		aprint_error(": cannot allocate perf area (%d)\n", error);
+		return -1;
+	}
+
+	if ((error = bus_dmamem_map(sc->sc_dmat, sc->replyseg, rseg, total,
+	    (void **)&sc->perf_reply, BUS_DMA_WAITOK))) {
+		aprint_error(": cannot map perf area (%d)\n", error);
+		bus_dmamem_free(sc->sc_dmat, sc->replyseg, 1);
+		return -1;
+	}
+
+	if ((error = bus_dmamap_create(sc->sc_dmat, total, 1,
+	    total, 0, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, &sc->replymap))) {
+		aprint_error(": cannot create perf dmamap (%d)\n", error);
+		bus_dmamem_unmap(sc->sc_dmat, sc->perf_reply, total);
+		sc->perf_reply = NULL;
+		bus_dmamem_free(sc->sc_dmat, sc->replyseg, 1);
+		return -1;
+	}
+
+	if ((error = bus_dmamap_load(sc->sc_dmat, sc->replymap, sc->perf_reply,
+	    total, NULL, BUS_DMA_WAITOK))) {
+		aprint_error(": cannot load perf dmamap (%d)\n", error);
+		bus_dmamap_destroy(sc->sc_dmat, sc->replymap);
+		bus_dmamem_unmap(sc->sc_dmat, sc->perf_reply, total);
+		sc->perf_reply = NULL;
+		bus_dmamem_free(sc->sc_dmat, sc->replyseg, 1);
+		return -1;
+	}
+
+	memset(sc->perf_reply, 0, total);
+
+	sc->perf_cycle = 0x1;
+	sc->perf_rqidx = 0;
+
+	/*
+	* Preload the fetch table with common command sizes.  This allows the
+	* hardware to not waste bus cycles for typical i/o commands, but also
+	* not tax the driver to be too exact in choosing sizes.  The table
+	* is optimized for page-aligned i/o's, but since most i/o comes
+	* from the various pagers, it's a reasonable assumption to make.
+	*/
+#define CISS_FETCH_COUNT(x)	\
+    (sizeof(struct ciss_cmd) + sizeof(struct ciss_sg_entry) * (x - 1) + 15) / 16
+
+	pc->fetch_count[CISS_SG_FETCH_NONE] = CISS_FETCH_COUNT(0);
+	pc->fetch_count[CISS_SG_FETCH_1] = CISS_FETCH_COUNT(1);
+	pc->fetch_count[CISS_SG_FETCH_2] = CISS_FETCH_COUNT(2);
+	pc->fetch_count[CISS_SG_FETCH_4] = CISS_FETCH_COUNT(4);
+	pc->fetch_count[CISS_SG_FETCH_8] = CISS_FETCH_COUNT(8);
+	pc->fetch_count[CISS_SG_FETCH_16] = CISS_FETCH_COUNT(16);
+	pc->fetch_count[CISS_SG_FETCH_32] = CISS_FETCH_COUNT(32);
+	pc->fetch_count[CISS_SG_FETCH_MAX] = (sc->ccblen + 15) / 16;
+
+	pc->rq_size = sc->maxcmd;
+	pc->rq_count = 1;	/* Hardcode for a single queue */
+	pc->rq_bank_hi = 0;
+	pc->rq_bank_lo = 0;
+	pc->rq[0].rq_addr_hi = 0x0;
+	pc->rq[0].rq_addr_lo = sc->replymap->dm_segs[0].ds_addr;
+
+	/*
+	 * Write back the changed configuration. Tt will be picked up
+	 * by controller together with general configuration later on.
+	 */
+	bus_space_write_region_4(sc->sc_iot, sc->cfg_ioh,
+	    sc->cfgoff + sc->cfg.troff,
+	    (u_int32_t *)pc, sizeof(*pc) / 4);
+	bus_space_barrier(sc->sc_iot, sc->cfg_ioh,
+	    sc->cfgoff + sc->cfg.troff, sizeof(*pc),
+	    BUS_SPACE_BARRIER_READ|BUS_SPACE_BARRIER_WRITE);
+
+	return 0;
+}
+
 int
 ciss_attach(struct ciss_softc *sc)
 {
@@ -138,27 +230,41 @@ ciss_attach(struct ciss_softc *sc)
 	int error, i, total, rseg, maxfer;
 	paddr_t pa;
 
-	bus_space_read_region_4(sc->sc_iot, sc->cfg_ioh, sc->cfgoff,
-	    (u_int32_t *)&sc->cfg, sizeof(sc->cfg) / 4);
-
 	if (sc->cfg.signature != CISS_SIGNATURE) {
 		aprint_error(": bad sign 0x%08x\n", sc->cfg.signature);
 		return -1;
 	}
 
-	if (!(sc->cfg.methods & CISS_METH_SIMPL)) {
-		aprint_error(": not simple 0x%08x\n", sc->cfg.methods);
+	if (!(sc->cfg.methods & (CISS_METH_SIMPL|CISS_METH_PERF))) {
+		aprint_error(": no supported method 0x%08x\n", sc->cfg.methods);
 		return -1;
 	}
 
-	sc->cfg.rmethod = CISS_METH_SIMPL;
+	if (!sc->cfg.maxsg)
+		sc->cfg.maxsg = MAXPHYS / PAGE_SIZE + 1;
+
+	sc->maxcmd = sc->cfg.maxcmd;
+	sc->maxsg = sc->cfg.maxsg;
+	if (sc->maxsg > MAXPHYS / PAGE_SIZE + 1)
+		sc->maxsg = MAXPHYS / PAGE_SIZE + 1;
+	i = sizeof(struct ciss_ccb) +
+	    sizeof(ccb->ccb_cmd.sgl[0]) * (sc->maxsg - 1);
+	for (sc->ccblen = 0x10; sc->ccblen < i; sc->ccblen <<= 1);
+
 	sc->cfg.paddr_lim = 0;			/* 32bit addrs */
 	sc->cfg.int_delay = 0;			/* disable coalescing */
 	sc->cfg.int_count = 0;
 	strlcpy(sc->cfg.hostname, "HUMPPA", sizeof(sc->cfg.hostname));
 	sc->cfg.driverf |= CISS_DRV_PRF;	/* enable prefetch */
-	if (!sc->cfg.maxsg)
-		sc->cfg.maxsg = MAXPHYS / PAGE_SIZE + 1;
+	if (CISS_PERF_SUPPORTED(sc)) {
+		sc->cfg.rmethod = CISS_METH_PERF | CISS_METH_SHORT_TAG;
+		if (ciss_init_perf(sc) != 0) {
+			/* Don't try to fallback, just bail out */
+			return -1;
+		}
+	} else {
+		sc->cfg.rmethod = CISS_METH_SIMPL;
+	}
 
 	bus_space_write_region_4(sc->sc_iot, sc->cfg_ioh, sc->cfgoff,
 	    (u_int32_t *)&sc->cfg, sizeof(sc->cfg) / 4);
@@ -178,15 +284,15 @@ ciss_attach(struct ciss_softc *sc)
 	}
 
 	if (bus_space_read_4(sc->sc_iot, sc->sc_ioh, CISS_IDB) & CISS_IDB_CFG) {
-		printf(": cannot set config\n");
+		aprint_error(": cannot set config\n");
 		return -1;
 	}
 
 	bus_space_read_region_4(sc->sc_iot, sc->cfg_ioh, sc->cfgoff,
 	    (u_int32_t *)&sc->cfg, sizeof(sc->cfg) / 4);
 
-	if (!(sc->cfg.amethod & CISS_METH_SIMPL)) {
-		printf(": cannot simplify 0x%08x\n", sc->cfg.amethod);
+	if (!(sc->cfg.amethod & (CISS_METH_SIMPL|CISS_METH_PERF))) {
+		aprint_error(": cannot set method 0x%08x\n", sc->cfg.amethod);
 		return -1;
 	}
 
@@ -210,13 +316,6 @@ ciss_attach(struct ciss_softc *sc)
 	mutex_init(&sc->sc_mutex, MUTEX_DEFAULT, IPL_VM);
 	mutex_init(&sc->sc_mutex_scratch, MUTEX_DEFAULT, IPL_VM);
 	cv_init(&sc->sc_condvar, "ciss_cmd");
-	sc->maxcmd = sc->cfg.maxcmd;
-	sc->maxsg = sc->cfg.maxsg;
-	if (sc->maxsg > MAXPHYS / PAGE_SIZE + 1)
-		sc->maxsg = MAXPHYS / PAGE_SIZE + 1;
-	i = sizeof(struct ciss_ccb) +
-	    sizeof(ccb->ccb_cmd.sgl[0]) * (sc->maxsg - 1);
-	for (sc->ccblen = 0x10; sc->ccblen < i; sc->ccblen <<= 1);
 
 	total = sc->ccblen * sc->maxcmd;
 	if ((error = bus_dmamem_alloc(sc->sc_dmat, total, PAGE_SIZE, 0,
@@ -329,6 +428,9 @@ ciss_attach(struct ciss_softc *sc)
 		aprint_normal(", 64bit fifo");
 	else if (sc->cfg.methods & CISS_METH_FIFO64_RRO)
 		aprint_normal(", 64bit fifo rro");
+	aprint_normal(", method %s %#x",
+	    CISS_IS_PERF(sc) ? "perf" : "simple",
+	    sc->cfg.amethod);
 	aprint_normal("\n");
 
 	mutex_exit(&sc->sc_mutex_scratch);
@@ -431,61 +533,110 @@ cissminphys(struct buf *bp)
 	minphys(bp);
 }
 
-static struct ciss_ccb *
-ciss_poll1(struct ciss_softc *sc)
+static void
+ciss_enqueue(struct ciss_softc *sc, ciss_queue_head *q, uint32_t id)
 {
 	struct ciss_ccb *ccb;
+
+	KASSERT(mutex_owned(&sc->sc_mutex));
+
+	KASSERT((id >> 2) <= sc->maxcmd);
+	ccb = (struct ciss_ccb *) ((char *)sc->ccbs + (id >> 2) * sc->ccblen);
+	ccb->ccb_cmd.id = htole32(id);
+	ccb->ccb_cmd.id_hi = htole32(0);
+	TAILQ_INSERT_TAIL(q, ccb, ccb_link);
+}
+
+static void
+ciss_completed_simple(struct ciss_softc *sc, ciss_queue_head *q)
+{
 	uint32_t id;
 
-	if (!(bus_space_read_4(sc->sc_iot, sc->sc_ioh, CISS_ISR) & sc->iem)) {
-		CISS_DPRINTF(CISS_D_CMD, ("N"));
-		return NULL;
-	}
+	KASSERT(mutex_owned(&sc->sc_mutex));
 
-	if (sc->cfg.methods & CISS_METH_FIFO64) {
-		if (bus_space_read_4(sc->sc_iot, sc->sc_ioh, CISS_OUTQ64_HI) ==
-		    0xffffffff) {
-			CISS_DPRINTF(CISS_D_CMD, ("Q"));
-			return NULL;
-		}
-		id = bus_space_read_4(sc->sc_iot, sc->sc_ioh, CISS_OUTQ64_LO);
-	} else if (sc->cfg.methods & CISS_METH_FIFO64_RRO) {
-		id = bus_space_read_4(sc->sc_iot, sc->sc_ioh, CISS_OUTQ64_LO);
-		if (id == 0xffffffff) {
-			CISS_DPRINTF(CISS_D_CMD, ("Q"));
-			return NULL;
-		}
-		(void)bus_space_read_4(sc->sc_iot, sc->sc_ioh, CISS_OUTQ64_HI);
-	} else {
-		id = bus_space_read_4(sc->sc_iot, sc->sc_ioh, CISS_OUTQ);
-		if (id == 0xffffffff) {
-			CISS_DPRINTF(CISS_D_CMD, ("Q"));
-			return NULL;
+	for (;;) {
+		if (sc->cfg.methods & CISS_METH_FIFO64) {
+			if (bus_space_read_4(sc->sc_iot, sc->sc_ioh,
+			    CISS_OUTQ64_HI) == 0xffffffff) {
+				CISS_DPRINTF(CISS_D_CMD, ("Q"));
+				break;
+			}
+			id = bus_space_read_4(sc->sc_iot, sc->sc_ioh,
+			    CISS_OUTQ64_LO);
+		} else if (sc->cfg.methods & CISS_METH_FIFO64_RRO) {
+			id = bus_space_read_4(sc->sc_iot, sc->sc_ioh,
+			    CISS_OUTQ64_LO);
+			if (id == 0xffffffff) {
+				CISS_DPRINTF(CISS_D_CMD, ("Q"));
+				break;
+			}
+			(void)bus_space_read_4(sc->sc_iot, sc->sc_ioh,
+			    CISS_OUTQ64_HI);
+		} else {
+			id = bus_space_read_4(sc->sc_iot, sc->sc_ioh,
+			    CISS_OUTQ);
+			if (id == 0xffffffff) {
+				CISS_DPRINTF(CISS_D_CMD, ("Q"));
+				break;
+			}
 		}
+
+		CISS_DPRINTF(CISS_D_CMD, ("got=0x%x ", id));
+		ciss_enqueue(sc, q, id);
 	}
+}
 
-	CISS_DPRINTF(CISS_D_CMD, ("got=0x%x ", id));
-	ccb = (struct ciss_ccb *) ((char *)sc->ccbs + (id >> 2) * sc->ccblen);
-	ccb->ccb_cmd.id = htole32(id);
-	ccb->ccb_cmd.id_hi = htole32(0);
-	return ccb;
+static void
+ciss_completed_perf(struct ciss_softc *sc, ciss_queue_head *q)
+{
+	uint32_t id;
+
+	KASSERT(mutex_owned(&sc->sc_mutex));
+
+	for (;;) {
+		id = sc->perf_reply[sc->perf_rqidx];
+		if ((id & CISS_CYCLE_MASK) != sc->perf_cycle)
+			break;
+
+		if (++sc->perf_rqidx == sc->maxcmd) {
+			sc->perf_rqidx = 0;
+			sc->perf_cycle ^= 1;
+		}
+
+		CISS_DPRINTF(CISS_D_CMD, ("got=0x%x ", id));
+		ciss_enqueue(sc, q, id);
+	}
 }
 
 static int
 ciss_poll(struct ciss_softc *sc, struct ciss_ccb *ccb, int ms)
 {
+	ciss_queue_head q;
 	struct ciss_ccb *ccb1;
 
+	TAILQ_INIT(&q);
 	ms /= 10;
 
 	while (ms-- > 0) {
 		DELAY(10);
-		ccb1 = ciss_poll1(sc);
-		if (ccb1 == NULL)
-			continue;
-		ciss_done(ccb1);
-		if (ccb1 == ccb)
-			return 0;
+		mutex_enter(&sc->sc_mutex);
+		if (CISS_IS_PERF(sc))
+			ciss_completed_perf(sc, &q);
+		else
+			ciss_completed_simple(sc, &q);
+		mutex_exit(&sc->sc_mutex);
+
+		while (!TAILQ_EMPTY(&q)) {
+			ccb1 = TAILQ_FIRST(&q);
+			TAILQ_REMOVE(&q, ccb1, ccb_link);
+
+			KASSERT(ccb1->ccb_state == CISS_CCB_ONQ);
+			ciss_done(ccb1);
+			if (ccb1 == ccb) {
+				KASSERT(TAILQ_EMPTY(&q));
+				return 0;
+			}
+		}
 	}
 
 	return ETIMEDOUT;
@@ -502,7 +653,6 @@ ciss_wait(struct ciss_softc *sc, struct 
 	etick = getticks() + tohz;
 
 	for (;;) {
-		ccb->ccb_state = CISS_CCB_POLL;
 		CISS_DPRINTF(CISS_D_CMD, ("cv_timedwait(%d) ", tohz));
 		mutex_enter(&sc->sc_mutex);
 		if (cv_timedwait(&sc->sc_condvar, &sc->sc_mutex, tohz)
@@ -536,6 +686,8 @@ ciss_cmd(struct ciss_ccb *ccb, int flags
 	bus_dmamap_t dmap = ccb->ccb_dmamap;
 	u_int64_t addr;
 	int i, error = 0;
+	const bool pollsleep = ((wait & (XS_CTL_POLL|XS_CTL_NOSLEEP)) ==
+	    XS_CTL_POLL);
 
 	if (ccb->ccb_state != CISS_CCB_READY) {
 		printf("%s: ccb %d not ready state=0x%x\n", device_xname(sc->sc_dev),
@@ -578,8 +730,27 @@ ciss_cmd(struct ciss_ccb *ccb, int flags
 
 		bus_dmamap_sync(sc->sc_dmat, dmap, 0, dmap->dm_mapsize,
 		    BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE);
-	} else
+
+		if (dmap->dm_nsegs == 0)
+			ccb->ccb_sg_tag = CISS_SG_FETCH_NONE;
+		else if (dmap->dm_nsegs == 1)
+			ccb->ccb_sg_tag = CISS_SG_FETCH_1;
+		else if (dmap->dm_nsegs == 2)
+			ccb->ccb_sg_tag = CISS_SG_FETCH_2;
+		else if (dmap->dm_nsegs <= 4)
+			ccb->ccb_sg_tag = CISS_SG_FETCH_4;
+		else if (dmap->dm_nsegs <= 8)
+			ccb->ccb_sg_tag = CISS_SG_FETCH_8;
+		else if (dmap->dm_nsegs <= 16)
+			ccb->ccb_sg_tag = CISS_SG_FETCH_16;
+		else if (dmap->dm_nsegs <= 32)
+			ccb->ccb_sg_tag = CISS_SG_FETCH_32;
+		else
+			ccb->ccb_sg_tag = CISS_SG_FETCH_MAX;
+	} else {
+		ccb->ccb_sg_tag = CISS_SG_FETCH_NONE;
 		cmd->sgin = 0;
+	}
 	cmd->sglen = htole16((u_int16_t)cmd->sgin);
 	memset(&ccb->ccb_err, 0, sizeof(ccb->ccb_err));
 
@@ -592,31 +763,45 @@ ciss_cmd(struct ciss_ccb *ccb, int flags
 		    bus_space_read_4(sc->sc_iot, sc->sc_ioh, CISS_IMR) | sc->iem);
 #endif
 
-	ccb->ccb_state = CISS_CCB_ONQ;
+	if (!pollsleep)
+		ccb->ccb_state = CISS_CCB_ONQ;
+	else
+		ccb->ccb_state = CISS_CCB_POLL;
 	CISS_DPRINTF(CISS_D_CMD, ("submit=0x%x ", cmd->id));
+
+	addr = (u_int64_t)ccb->ccb_cmdpa;
+	if (CISS_IS_PERF(sc)) {
+		KASSERT((addr & 0xf) == 0);
+		/*
+		 * The bits in addr in performant mean:
+		 * - performant mode bit (bit 0)
+		 * - pull count (bits 1-3)
+		 * There is no support for ioaccel mode
+		 */
+		addr |= 1 | (ccb->ccb_sg_tag << 1);
+	}
 	if (sc->cfg.methods & (CISS_METH_FIFO64|CISS_METH_FIFO64_RRO)) {
 		/*
 		 * Write the upper 32bits immediately before the lower
 		 * 32bits and set bit 63 to indicate 64bit FIFO mode.
 		 */
-		addr = (u_int64_t)ccb->ccb_cmdpa;
 		bus_space_write_4(sc->sc_iot, sc->sc_ioh, CISS_INQ64_HI,
 		    (addr >> 32) | 0x80000000);
 		bus_space_write_4(sc->sc_iot, sc->sc_ioh, CISS_INQ64_LO,
 		    addr & 0x00000000ffffffffULL);
 	} else
 		bus_space_write_4(sc->sc_iot, sc->sc_ioh, CISS_INQ,
-		    ccb->ccb_cmdpa);
+		    (uint32_t)addr);
 
 	if (wait & XS_CTL_POLL) {
 		int ms;
 		CISS_DPRINTF(CISS_D_CMD, ("waiting "));
 
 		ms = ccb->ccb_xs ? ccb->ccb_xs->timeout : 60000;
-		if (wait & XS_CTL_NOSLEEP)
-			error = ciss_poll(sc, ccb, ms);
-		else
+		if (pollsleep)
 			error = ciss_wait(sc, ccb, ms);
+		else
+			error = ciss_poll(sc, ccb, ms);
 
 		/* if never got a chance to be done above... */
 		if (ccb->ccb_state != CISS_CCB_FREE) {
@@ -1152,38 +1337,15 @@ ciss_scsi_cmd(struct scsipi_channel *cha
 	}
 }
 
-int
-ciss_intr(void *v)
+static void
+ciss_completed_process(struct ciss_softc *sc, ciss_queue_head *q)
 {
-	struct ciss_softc *sc = v;
 	struct ciss_ccb *ccb;
-	u_int32_t id;
-	bus_size_t reg;
-	int hit = 0;
 
-	CISS_DPRINTF(CISS_D_INTR, ("intr "));
+	while (!TAILQ_EMPTY(q)) {
+		ccb = TAILQ_FIRST(q);
+		TAILQ_REMOVE(q, ccb, ccb_link);
 
-	if (!(bus_space_read_4(sc->sc_iot, sc->sc_ioh, CISS_ISR) & sc->iem))
-		return 0;
-
-	if (sc->cfg.methods & CISS_METH_FIFO64)
-		reg = CISS_OUTQ64_HI;
-	else if (sc->cfg.methods & CISS_METH_FIFO64_RRO)
-		reg = CISS_OUTQ64_LO;
-	else
-		reg = CISS_OUTQ;
-	while ((id = bus_space_read_4(sc->sc_iot, sc->sc_ioh, reg)) !=
-	    0xffffffff) {
-		if (reg == CISS_OUTQ64_HI)
-			id = bus_space_read_4(sc->sc_iot, sc->sc_ioh,
-			    CISS_OUTQ64_LO);
-		else if (reg == CISS_OUTQ64_LO)
-			(void)bus_space_read_4(sc->sc_iot, sc->sc_ioh,
-			    CISS_OUTQ64_HI);
-
-		ccb = (struct ciss_ccb *) ((char *)sc->ccbs + (id >> 2) * sc->ccblen);
-		ccb->ccb_cmd.id = htole32(id);
-		ccb->ccb_cmd.id_hi = htole32(0); /* ignore the upper 32bits */
 		if (ccb->ccb_state == CISS_CCB_POLL) {
 			ccb->ccb_state = CISS_CCB_ONQ;
 			mutex_enter(&sc->sc_mutex);
@@ -1191,14 +1353,73 @@ ciss_intr(void *v)
 			mutex_exit(&sc->sc_mutex);
 		} else
 			ciss_done(ccb);
-
-		hit = 1;
 	}
+}
+
+int
+ciss_intr_simple_intx(void *v)
+{
+	struct ciss_softc *sc = v;
+	ciss_queue_head q;
+	int hit = 0;
+
+	CISS_DPRINTF(CISS_D_INTR, ("intr "));
+
+	/* XXX shouldn't be necessary, intr triggers only if enabled */
+	if (!(bus_space_read_4(sc->sc_iot, sc->sc_ioh, CISS_ISR) & sc->iem))
+		return 0;
 
+	TAILQ_INIT(&q);
+	mutex_enter(&sc->sc_mutex);
+	ciss_completed_simple(sc, &q);
+	mutex_exit(&sc->sc_mutex);
+
+	hit = (!TAILQ_EMPTY(&q));
+	ciss_completed_process(sc, &q);
+
+	KASSERT(TAILQ_EMPTY(&q));
 	CISS_DPRINTF(CISS_D_INTR, ("exit\n"));
+
 	return hit;
 }
 
+int
+ciss_intr_perf_intx(void *v)
+{
+	struct ciss_softc *sc = v;
+
+	CISS_DPRINTF(CISS_D_INTR, ("intr "));
+
+	/* Clear the interrupt and flush the bridges.  Docs say that the flush
+	 * needs to be done twice, which doesn't seem right.
+	 */
+	bus_space_read_4(sc->sc_iot, sc->sc_ioh, CISS_OSR);
+	bus_space_write_4(sc->sc_iot, sc->sc_ioh, CISS_ODC, CISS_ODC_CLEAR);
+
+	return ciss_intr_perf_msi(sc);
+}
+
+int
+ciss_intr_perf_msi(void *v)
+{
+	struct ciss_softc *sc = v;
+	ciss_queue_head q;
+
+	CISS_DPRINTF(CISS_D_INTR, ("intr "));
+
+	TAILQ_INIT(&q);
+	mutex_enter(&sc->sc_mutex);
+	ciss_completed_perf(sc, &q);
+	mutex_exit(&sc->sc_mutex);
+
+	ciss_completed_process(sc, &q);
+
+	KASSERT(TAILQ_EMPTY(&q));
+	CISS_DPRINTF(CISS_D_INTR, ("exit"));
+
+	return 1;
+}
+
 static void
 ciss_heartbeat(void *v)
 {

Index: src/sys/dev/ic/cissreg.h
diff -u src/sys/dev/ic/cissreg.h:1.5 src/sys/dev/ic/cissreg.h:1.6
--- src/sys/dev/ic/cissreg.h:1.5	Fri Jun 17 17:05:04 2016
+++ src/sys/dev/ic/cissreg.h	Tue Jul 14 10:37:30 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: cissreg.h,v 1.5 2016/06/17 17:05:04 christos Exp $	*/
+/*	$NetBSD: cissreg.h,v 1.6 2020/07/14 10:37:30 jdolecek Exp $	*/
 /*	$OpenBSD: cissreg.h,v 1.11 2010/06/03 01:02:13 dlg Exp $	*/
 
 /*
@@ -30,6 +30,9 @@
 #define	CISS_INTR_MSI		(1<<0)
 #define	CISS_INQ	0x40
 #define	CISS_OUTQ	0x44
+#define CISS_OSR	0x9c    /* outbound status register */
+#define CISS_ODC	0xa0    /* outbound doorbell clear register */
+#define CISS_ODC_CLEAR		(0x1)
 #define	CISS_CFG_BAR	0xb4
 #define	CISS_CFG_OFF	0xb8
 
@@ -107,8 +110,42 @@ struct ciss_config {
 #define	CISS_DRV_DBRD	0x0100
 #define	CISS_DRV_PRF	0x0200
 	u_int32_t	maxsg;
+/*
+ * these fields appear in OpenCISS Spec 1.06
+ * http://cciss.sourceforge.net/#docs
+ */
+	u_int32_t	max_logical_supported;
+	u_int32_t	max_physical_supported;
+	u_int32_t	max_physical_per_logical;
+	u_int32_t	max_perfomant_mode_cmds;
+	u_int32_t	max_block_fetch_count;
 } __packed;
 
+/*
+ * Configuration table for the Performant transport.  Only 4 request queues
+ * are mentioned in this table, though apparently up to 256 can exist.
+ */
+struct ciss_perf_config {
+	uint32_t	fetch_count[8];
+#define CISS_SG_FETCH_MAX	0
+#define CISS_SG_FETCH_1		1
+#define CISS_SG_FETCH_2		2
+#define CISS_SG_FETCH_4		3
+#define CISS_SG_FETCH_8		4
+#define CISS_SG_FETCH_16	5
+#define CISS_SG_FETCH_32	6
+#define CISS_SG_FETCH_NONE	7
+	uint32_t	rq_size;
+	uint32_t	rq_count;
+	uint32_t	rq_bank_lo;
+	uint32_t	rq_bank_hi;
+	struct {
+		uint32_t	rq_addr_lo;
+		uint32_t	rq_addr_hi;
+	} __packed rq[4];
+} __packed;
+#define	CISS_CYCLE_MASK	0x00000001
+
 struct ciss_inquiry {
 	u_int8_t	numld;
 	u_int8_t	sign[4];
@@ -411,6 +448,14 @@ struct ciss_evctrlstat { /* details poin
 	u_int8_t	prevfail;
 } __packed;
 
+struct ciss_sg_entry {
+	u_int32_t	addr_lo;
+	u_int32_t	addr_hi;
+	u_int32_t	len;
+	u_int32_t	flags;
+#define	CISS_SG_EXT	0x0001
+} __packed;
+
 struct ciss_cmd {
 	u_int8_t	resv0;	/* 00 */
 	u_int8_t	sgin;	/* 01: #sg in the cmd */
@@ -444,13 +489,7 @@ struct ciss_cmd {
 	u_int64_t	err_pa;	/* 28: pa(struct ciss_error *) */
 	u_int32_t	err_len;/* 30 */
 
-	struct {		/* 34 */
-		u_int32_t	addr_lo;
-		u_int32_t	addr_hi;
-		u_int32_t	len;
-		u_int32_t	flags;
-#define	CISS_SG_EXT	0x0001
-	} sgl[1];
+	struct ciss_sg_entry sgl[1];		/* 34 */
 } __packed;
 
 struct ciss_error {
@@ -494,9 +533,10 @@ struct ciss_ccb {
 	size_t			ccb_len;
 	void			*ccb_data;
 	bus_dmamap_t		ccb_dmamap;
+	uint8_t			ccb_sg_tag;
 
 	struct ciss_error	ccb_err;
-	struct ciss_cmd		ccb_cmd;	/* followed by sgl */
+	struct ciss_cmd		ccb_cmd __aligned(8);	/* followed by sgl */
 };
 
 typedef TAILQ_HEAD(ciss_queue_head, ciss_ccb)     ciss_queue_head;

Index: src/sys/dev/ic/cissvar.h
diff -u src/sys/dev/ic/cissvar.h:1.8 src/sys/dev/ic/cissvar.h:1.9
--- src/sys/dev/ic/cissvar.h:1.8	Fri Jul 10 14:24:14 2020
+++ src/sys/dev/ic/cissvar.h	Tue Jul 14 10:37:30 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: cissvar.h,v 1.8 2020/07/10 14:24:14 jdolecek Exp $	*/
+/*	$NetBSD: cissvar.h,v 1.9 2020/07/14 10:37:30 jdolecek Exp $	*/
 /*	$OpenBSD: cissvar.h,v 1.15 2013/05/30 16:15:02 deraadt Exp $	*/
 
 /*
@@ -68,8 +68,16 @@ struct ciss_softc {
 
 	bus_space_handle_t	cfg_ioh;
 
+	struct ciss_perf_config	perfcfg;
+	bus_dmamap_t		replymap;
+	bus_dma_segment_t	replyseg[1];
+	uint64_t		*perf_reply;
+	int			perf_rqidx, perf_cycle;
+#define	CISS_IS_PERF(sc)	((sc)->perf_reply != NULL)
+
 	int fibrillation;
 	struct ciss_config cfg;
+#define CISS_PERF_SUPPORTED(sc)	((sc)->cfg.methods & CISS_METH_PERF)
 	int cfgoff;
 	u_int32_t iem;
 	u_int32_t heartbeat;
@@ -88,4 +96,6 @@ struct ciss_rawsoftc {
 };
 
 int	ciss_attach(struct ciss_softc *sc);
-int	ciss_intr(void *v);
+int	ciss_intr_simple_intx(void *v);
+int	ciss_intr_perf_intx(void *v);
+int	ciss_intr_perf_msi(void *v);

Index: src/sys/dev/pci/ciss_pci.c
diff -u src/sys/dev/pci/ciss_pci.c:1.17 src/sys/dev/pci/ciss_pci.c:1.18
--- src/sys/dev/pci/ciss_pci.c:1.17	Sun Jul  5 19:28:37 2020
+++ src/sys/dev/pci/ciss_pci.c	Tue Jul 14 10:37:30 2020
@@ -1,4 +1,4 @@
-/*	$NetBSD: ciss_pci.c,v 1.17 2020/07/05 19:28:37 jdolecek Exp $	*/
+/*	$NetBSD: ciss_pci.c,v 1.18 2020/07/14 10:37:30 jdolecek Exp $	*/
 /*	$OpenBSD: ciss_pci.c,v 1.9 2005/12/13 15:56:01 brad Exp $	*/
 
 /*
@@ -19,7 +19,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: ciss_pci.c,v 1.17 2020/07/05 19:28:37 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: ciss_pci.c,v 1.18 2020/07/14 10:37:30 jdolecek Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -263,12 +263,13 @@ ciss_pci_attach(device_t parent, device_
 	struct ciss_softc *sc = device_private(self);
 	struct pci_attach_args *pa = aux;
 	bus_size_t size, cfgsz;
-	pci_intr_handle_t ih;
+	pci_intr_handle_t *ih;
 	const char *intrstr;
 	int cfg_bar, memtype;
 	pcireg_t reg = pci_conf_read(pa->pa_pc, pa->pa_tag, PCI_SUBSYS_ID_REG);
 	int i;
 	char intrbuf[PCI_INTRSTR_LEN];
+	int (*intr_handler)(void *);
 
 #ifdef CISS_NO_INTERRUPT_HACK
 	callout_init(&sc->sc_interrupt_hack, 0);
@@ -333,35 +334,72 @@ ciss_pci_attach(device_t parent, device_
 		return;
 	}
 
+	/* Read the configuration */
+	bus_space_read_region_4(sc->sc_iot, sc->cfg_ioh, sc->cfgoff,
+	    (u_int32_t *)&sc->cfg, sizeof(sc->cfg) / 4);
+
 	/* disable interrupts until ready */
 #ifndef CISS_NO_INTERRUPT_HACK
 	bus_space_write_4(sc->sc_iot, sc->sc_ioh, CISS_IMR,
-	    bus_space_read_4(sc->sc_iot, sc->sc_ioh, CISS_IMR) | sc->iem);
+	    bus_space_read_4(sc->sc_iot, sc->sc_ioh, CISS_IMR) |
+		sc->iem | CISS_INTR_OPQ | CISS_INTR_MSI);
 #endif
 
-	if (pci_intr_map(pa, &ih)) {
+	int counts[PCI_INTR_TYPE_SIZE] = {
+		[PCI_INTR_TYPE_INTX] = 1,
+		[PCI_INTR_TYPE_MSI] = 0,
+		[PCI_INTR_TYPE_MSIX] = 0,
+	};
+	int max_type = PCI_INTR_TYPE_INTX;
+
+	/*
+	 * Allow MSI/MSI-X only if PERFORMANT method is supported, SIMPLE
+	 * doesn't seem to work with MSI.
+	 */
+	if (CISS_PERF_SUPPORTED(sc)) {
+#if 1
+		counts[PCI_INTR_TYPE_MSI] = counts[PCI_INTR_TYPE_MSIX] = 1;
+		max_type = PCI_INTR_TYPE_MSIX;
+#endif
+		sc->iem |= CISS_INTR_OPQ | CISS_INTR_MSI;
+	}
+
+	if (pci_intr_alloc(pa, &ih, counts, max_type)) {
 		aprint_error_dev(self, "can't map interrupt\n");
 		bus_space_unmap(sc->sc_iot, sc->sc_ioh, size);
 		if (cfg_bar != CISS_BAR)
 			bus_space_unmap(sc->sc_iot, sc->cfg_ioh, cfgsz);
 		return;
 	}
-	intrstr = pci_intr_string(pa->pa_pc, ih, intrbuf, sizeof(intrbuf));
-	sc->sc_ih = pci_intr_establish_xname(pa->pa_pc, ih, IPL_BIO, ciss_intr,
-	    sc, device_xname(self));
+	intrstr = pci_intr_string(pa->pa_pc, ih[0], intrbuf, sizeof(intrbuf));
+
+	switch (pci_intr_type(pa->pa_pc, ih[0])) {
+	case PCI_INTR_TYPE_INTX:
+		intr_handler = CISS_PERF_SUPPORTED(sc)
+		    ? ciss_intr_perf_intx : ciss_intr_simple_intx;
+		break;
+	default:
+		KASSERT(CISS_PERF_SUPPORTED(sc));
+		intr_handler = ciss_intr_perf_msi;
+		break;
+	}
+
+	sc->sc_ih = pci_intr_establish_xname(pa->pa_pc, ih[0], IPL_BIO,
+	    intr_handler, sc, device_xname(self));
 	if (!sc->sc_ih) {
 		aprint_error_dev(sc->sc_dev, "can't establish interrupt");
 		if (intrstr)
 			aprint_error(" at %s", intrstr);
 		aprint_error("\n");
+		pci_intr_release(pa->pa_pc, ih, 1);
 		bus_space_unmap(sc->sc_iot, sc->sc_ioh, size);
 		if (cfg_bar != CISS_BAR)
 			bus_space_unmap(sc->sc_iot, sc->cfg_ioh, cfgsz);
+		return;
 	}
+	aprint_normal_dev(self, "interrupting at %s\n", intrstr);
 
-	aprint_normal_dev(self, "interrupting at %s\n%s", intrstr,
-	       device_xname(sc->sc_dev));
-
+	aprint_normal("%s", device_xname(sc->sc_dev));
 	if (ciss_attach(sc)) {
 		pci_intr_disestablish(pa->pa_pc, sc->sc_ih);
 		sc->sc_ih = NULL;

Reply via email to