Module Name:    src
Committed By:   jdolecek
Date:           Wed Apr 15 10:16:47 UTC 2020

Modified Files:
        src/sys/arch/xen/xen: xbd_xenbus.c

Log Message:
make xbd(4) accept 64k (aka regular MAXPHYS) I/O, use two linked requests
for this, so that it does not require MAXPHYS override in kernel config
any more

this is useful even if we get indirect segment support one day,
for compatibility with Dom0's not supporting it, like currently NetBSD


To generate a diff of this commit:
cvs rdiff -u -r1.113 -r1.114 src/sys/arch/xen/xen/xbd_xenbus.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/xen/xen/xbd_xenbus.c
diff -u src/sys/arch/xen/xen/xbd_xenbus.c:1.113 src/sys/arch/xen/xen/xbd_xenbus.c:1.114
--- src/sys/arch/xen/xen/xbd_xenbus.c:1.113	Tue Apr 14 15:16:06 2020
+++ src/sys/arch/xen/xen/xbd_xenbus.c	Wed Apr 15 10:16:47 2020
@@ -1,4 +1,4 @@
-/*      $NetBSD: xbd_xenbus.c,v 1.113 2020/04/14 15:16:06 jdolecek Exp $      */
+/*      $NetBSD: xbd_xenbus.c,v 1.114 2020/04/15 10:16:47 jdolecek Exp $      */
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -50,7 +50,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.113 2020/04/14 15:16:06 jdolecek Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.114 2020/04/15 10:16:47 jdolecek Exp $");
 
 #include "opt_xen.h"
 
@@ -94,17 +94,24 @@ __KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c
 
 #define XBD_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
 #define XBD_MAX_XFER (PAGE_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST)
+#define XBD_MAX_CHUNK	32*1024		/* max I/O size we process in 1 req */
+#define XBD_XFER_LIMIT	(2*XBD_MAX_XFER)
 
 #define XEN_BSHIFT      9               /* log2(XEN_BSIZE) */
 #define XEN_BSIZE       (1 << XEN_BSHIFT) 
 
+CTASSERT((MAXPHYS <= 2*XBD_MAX_CHUNK));
+CTASSERT(XEN_BSIZE == DEV_BSIZE);
+
 struct xbd_req {
 	SLIST_ENTRY(xbd_req) req_next;
 	uint16_t req_id; /* ID passed to backend */
 	bus_dmamap_t req_dmamap;
+	struct xbd_req *req_parent, *req_child;
+	bool req_parent_done;
 	union {
 	    struct {
-		grant_ref_t req_gntref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+		grant_ref_t req_gntref[XBD_XFER_LIMIT >> PAGE_SHIFT];
 		struct buf *req_bp; /* buffer associated with this request */
 		void *req_data; /* pointer to the data buffer */
 	    } req_rw;
@@ -138,7 +145,7 @@ struct xbd_xenbus_softc {
 	SLIST_HEAD(,xbd_req) sc_xbdreq_head; /* list of free requests */
 
 	vmem_addr_t sc_unalign_buffer;
-	bool sc_unalign_free;
+	struct xbd_req *sc_unalign_used;
 
 	int sc_backend_status; /* our status with backend */
 #define BLKIF_STATE_DISCONNECTED 0
@@ -185,6 +192,8 @@ static void xbd_iosize(device_t, int *);
 static void xbd_backend_changed(void *, XenbusState);
 static void xbd_connect(struct xbd_xenbus_softc *);
 
+static void xbd_diskstart_submit(struct xbd_xenbus_softc *, int,
+	struct buf *bp, int, bus_dmamap_t, grant_ref_t *);
 static int  xbd_map_align(struct xbd_xenbus_softc *, struct xbd_req *);
 static void xbd_unmap_align(struct xbd_xenbus_softc *, struct xbd_req *, bool);
 
@@ -309,7 +318,7 @@ xbd_xenbus_attach(device_t parent, devic
 
 	for (i = 0; i < XBD_RING_SIZE; i++) {
 		if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat,
-		    XBD_MAX_XFER, BLKIF_MAX_SEGMENTS_PER_REQUEST,
+		    MAXPHYS, XBD_XFER_LIMIT >> PAGE_SHIFT,
 		    PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
 		    &sc->sc_reqs[i].req_dmamap) != 0) {
 			aprint_error_dev(self, "can't alloc dma maps\n");
@@ -322,7 +331,6 @@ xbd_xenbus_attach(device_t parent, devic
 		aprint_error_dev(self, "can't alloc align buffer\n");
 		return;
 	}
-	sc->sc_unalign_free = true;
 
 	/* resume shared structures and tell backend that we are ready */
 	if (xbd_xenbus_resume(self, PMF_Q_NONE) == false) {
@@ -750,18 +758,37 @@ again:
 		DPRINTF(("%s(%p): b_bcount = %ld\n", __func__,
 		    bp, (long)bp->b_bcount));
 
-		if (rep->status != BLKIF_RSP_OKAY) {
+		if (bp->b_error != 0 || rep->status != BLKIF_RSP_OKAY) {
 			bp->b_error = EIO;
 			bp->b_resid = bp->b_bcount;
-		} else {
-			KASSERTMSG(xbdreq->req_dmamap->dm_mapsize <=
-			    bp->b_resid, "mapsize %d > b_resid %d",
-			    (int)xbdreq->req_dmamap->dm_mapsize,
-			    (int)bp->b_resid);
-			bp->b_resid -= xbdreq->req_dmamap->dm_mapsize;
-			KASSERT(bp->b_resid == 0);
 		}
 
+		if (xbdreq->req_parent) {
+			struct xbd_req *req_parent = xbdreq->req_parent;
+
+			/* Unhook and recycle child */
+			xbdreq->req_parent = NULL;
+			req_parent->req_child = NULL;
+			SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq,
+				    req_next);
+
+			if (!req_parent->req_parent_done) {
+				/* Finished before parent, nothig else to do */
+				continue;
+			}
+
+			/* Must do the cleanup now */
+			xbdreq = req_parent;
+		}
+		if (xbdreq->req_child) {
+			/* Finished before child, child will cleanup */
+			xbdreq->req_parent_done = true;
+			continue;
+		}
+
+		if (bp->b_error == 0)
+			bp->b_resid = 0;
+
 		for (seg = 0; seg < xbdreq->req_dmamap->dm_nsegs; seg++) {
 			/*
 			 * We are not allowing persistent mappings, so
@@ -801,8 +828,8 @@ again:
 static void
 xbdminphys(struct buf *bp)
 {
-	if (bp->b_bcount > XBD_MAX_XFER) {
-		bp->b_bcount = XBD_MAX_XFER;
+	if (bp->b_bcount > XBD_XFER_LIMIT) {
+		bp->b_bcount = XBD_XFER_LIMIT;
 	}
 	minphys(bp);
 }
@@ -999,17 +1026,14 @@ xbd_diskstart(device_t self, struct buf 
 {
 	struct xbd_xenbus_softc *sc = device_private(self);
 	struct xbd_req *xbdreq;
-	blkif_request_t *req;
-	size_t off;
-	paddr_t ma;
-	int nsects, nbytes, seg;
-	int notify, error = 0;
+	int error = 0;
+	int notify;
+
+	KASSERT(bp->b_bcount <= MAXPHYS);
 
 	DPRINTF(("xbd_diskstart(%p): b_bcount = %ld\n",
 	    bp, (long)bp->b_bcount));
 
-	KASSERT(bp->b_bcount <= XBD_MAX_XFER);
-
 	mutex_enter(&sc->sc_lock);
 
 	if (sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) {
@@ -1040,14 +1064,18 @@ xbd_diskstart(device_t self, struct buf 
 		goto out;
 	}
 
+	if (bp->b_bcount > XBD_MAX_CHUNK) {
+		if (!SLIST_NEXT(xbdreq, req_next)) {
+			DPRINTF(("%s: need extra req\n", __func__));
+			error = EAGAIN;
+			goto out;
+		}
+	}
+
+	bp->b_resid = bp->b_bcount;
 	xbdreq->req_bp = bp;
 	xbdreq->req_data = bp->b_data;
 	if (__predict_false((vaddr_t)bp->b_data & (XEN_BSIZE - 1))) {
-		/* Only can get here if this is physio() request */
-		KASSERT(bp->b_saveaddr != NULL);
-
-		sc->sc_cnt_map_unalign.ev_count++;
-
 		if (__predict_false(xbd_map_align(sc, xbdreq) != 0)) {
 			DPRINTF(("xbd_diskstart: no align\n"));
 			error = EAGAIN;
@@ -1058,40 +1086,23 @@ xbd_diskstart(device_t self, struct buf 
 	if (__predict_false(bus_dmamap_load(sc->sc_xbusd->xbusd_dmat,
 	    xbdreq->req_dmamap, xbdreq->req_data, bp->b_bcount, NULL,
 	    BUS_DMA_NOWAIT) != 0)) {
-		printf("%s: %s: bus_dmamap_load failed",
+		printf("%s: %s: bus_dmamap_load failed\n",
 		    device_xname(sc->sc_dksc.sc_dev), __func__);
+		if (__predict_false(bp->b_data != xbdreq->req_data))
+			xbd_unmap_align(sc, xbdreq, false);
 		error = EINVAL;
 		goto out;
 	}
 
-	/* We are now committed to the transfer */
-	SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next);
-	req = RING_GET_REQUEST(&sc->sc_ring, sc->sc_ring.req_prod_pvt);
-	req->id = xbdreq->req_id;
-	req->operation =
-	    bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE;
-	req->sector_number = bp->b_rawblkno;
-	req->handle = sc->sc_handle;
-
-	bp->b_resid = bp->b_bcount;
-	for (seg = 0; seg < xbdreq->req_dmamap->dm_nsegs; seg++) {
-		bus_dma_segment_t *dmaseg = &xbdreq->req_dmamap->dm_segs[seg];
-
-		ma = dmaseg->ds_addr;
-		off = ma & PAGE_MASK;
-		nbytes = dmaseg->ds_len;
-		nsects = nbytes >> XEN_BSHIFT;
-
-		req->seg[seg].first_sect = off >> XEN_BSHIFT;
-		req->seg[seg].last_sect = (off >> XEN_BSHIFT) + nsects - 1;
-		KASSERT(req->seg[seg].first_sect <= req->seg[seg].last_sect);
-		KASSERT(req->seg[seg].last_sect < (PAGE_SIZE / XEN_BSIZE));
+	for (int seg = 0; seg < xbdreq->req_dmamap->dm_nsegs; seg++) {
+		KASSERT(seg < __arraycount(xbdreq->req_gntref));
 
+		paddr_t ma = xbdreq->req_dmamap->dm_segs[seg].ds_addr;
 		if (__predict_false(xengnt_grant_access(
 		    sc->sc_xbusd->xbusd_otherend_id,
 		    (ma & ~PAGE_MASK), (bp->b_flags & B_READ) == 0,
 		    &xbdreq->req_gntref[seg]))) {
-			printf("%s: %s: xengnt_grant_access failed",
+			printf("%s: %s: xengnt_grant_access failed\n",
 			    device_xname(sc->sc_dksc.sc_dev), __func__);
 			if (seg > 0) {
 				for (; --seg >= 0; ) {
@@ -1103,34 +1114,109 @@ xbd_diskstart(device_t self, struct buf 
 			    xbdreq->req_dmamap);
 			if (__predict_false(bp->b_data != xbdreq->req_data))
 				xbd_unmap_align(sc, xbdreq, false);
-			SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq,
-			    req_next);
-			error = EFAULT;
+			error = EAGAIN;
 			goto out;
 		}
+	}
+
+	KASSERT(xbdreq->req_parent == NULL);
+	KASSERT(xbdreq->req_child == NULL);
 
-		req->seg[seg].gref = xbdreq->req_gntref[seg];
+	/* We are now committed to the transfer */
+	SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next);
+	xbd_diskstart_submit(sc, xbdreq->req_id,
+	    bp, 0, xbdreq->req_dmamap, xbdreq->req_gntref);
+
+	if (bp->b_bcount > XBD_MAX_CHUNK) {
+		struct xbd_req *xbdreq2 = SLIST_FIRST(&sc->sc_xbdreq_head);
+		KASSERT(xbdreq2 != NULL); /* Checked earlier */
+		SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next);
+		xbdreq->req_child = xbdreq2;
+		xbdreq->req_parent_done = false;
+		xbdreq2->req_parent = xbdreq;
+		xbdreq2->req_bp = bp;
+		xbdreq2->req_data = NULL;
+		xbd_diskstart_submit(sc, xbdreq2->req_id,
+		    bp, XBD_MAX_CHUNK, xbdreq->req_dmamap,
+		    xbdreq->req_gntref);
 	}
-	req->nr_segments = seg;
-	sc->sc_ring.req_prod_pvt++;
 
 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify);
 	if (notify)
 		hypervisor_notify_via_evtchn(sc->sc_evtchn);
-
 out:
 	mutex_exit(&sc->sc_lock);
 	return error;
 }
 
+static void
+xbd_diskstart_submit(struct xbd_xenbus_softc *sc,
+    int req_id, struct buf *bp, int start, bus_dmamap_t dmamap,
+    grant_ref_t *gntref)
+{
+	blkif_request_t *req;
+	paddr_t ma;
+	int nsects, nbytes, dmaseg, first_sect, size, segidx = 0;
+	struct blkif_request_segment *reqseg;
+
+	KASSERT(mutex_owned(&sc->sc_lock));
+
+	req = RING_GET_REQUEST(&sc->sc_ring, sc->sc_ring.req_prod_pvt);
+	req->id = req_id;
+	req->operation =
+	    bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE;
+	req->sector_number = bp->b_rawblkno + (start >> XEN_BSHIFT);
+	req->handle = sc->sc_handle;
+
+	size = uimin(bp->b_bcount - start, XBD_MAX_CHUNK); 
+	for (dmaseg = 0; dmaseg < dmamap->dm_nsegs && size > 0; dmaseg++) {
+		bus_dma_segment_t *ds = &dmamap->dm_segs[dmaseg];
+
+		ma = ds->ds_addr;
+		nbytes = imin(ds->ds_len, size);
+
+		if (start > 0) {
+			if (start >= nbytes) {
+				start -= nbytes;
+				continue;
+			}
+			ma += start;
+			nbytes -= start;
+			start = 0;
+		}
+		size -= nbytes;
+
+		KASSERT(((ma & PAGE_MASK) & (XEN_BSIZE - 1)) == 0);
+		KASSERT((nbytes & (XEN_BSIZE - 1)) == 0);
+		KASSERT((size & (XEN_BSIZE - 1)) == 0);
+		first_sect = (ma & PAGE_MASK) >> XEN_BSHIFT;
+		nsects = nbytes >> XEN_BSHIFT;
+
+		reqseg = &req->seg[segidx++];
+		reqseg->first_sect = first_sect;
+		reqseg->last_sect = first_sect + nsects - 1;
+		KASSERT(reqseg->first_sect <= reqseg->last_sect);
+		KASSERT(reqseg->last_sect < (PAGE_SIZE / XEN_BSIZE));
+
+		reqseg->gref = gntref[dmaseg];
+	}
+	req->nr_segments = segidx;
+	sc->sc_ring.req_prod_pvt++;
+}
+
 static int
 xbd_map_align(struct xbd_xenbus_softc *sc, struct xbd_req *req)
 {
-	if (!sc->sc_unalign_free) {
+	/* Only can get here if this is physio() request */
+	KASSERT(req->req_bp->b_saveaddr != NULL);
+
+	sc->sc_cnt_map_unalign.ev_count++;
+
+	if (sc->sc_unalign_used) {
 		sc->sc_cnt_unalign_busy.ev_count++;
 		return EAGAIN;
 	}
-	sc->sc_unalign_free = false;
+	sc->sc_unalign_used = req;
 
 	KASSERT(req->req_bp->b_bcount <= MAXPHYS);
 	req->req_data = (void *)sc->sc_unalign_buffer;
@@ -1143,8 +1229,9 @@ xbd_map_align(struct xbd_xenbus_softc *s
 static void
 xbd_unmap_align(struct xbd_xenbus_softc *sc, struct xbd_req *req, bool sync)
 {
+	KASSERT(sc->sc_unalign_used == req);
 	if (sync && req->req_bp->b_flags & B_READ)
 		memcpy(req->req_bp->b_data, req->req_data,
 		    req->req_bp->b_bcount);
-	sc->sc_unalign_free = true;
+	sc->sc_unalign_used = NULL;
 }

Reply via email to