Module Name: src
Committed By: jym
Date: Sun Jul 24 23:56:34 UTC 2011
Modified Files:
src/sys/arch/xen/xen: xbdback_xenbus.c
Log Message:
Add more comments to xbdback(4) code. These make the continuations a bit
easier to follow (and understand). Helped tracking down a regression
between save/restore xbdback(4) states.
A few minor fixes, which are merely cosmetic:
- call graph is (somewhat) more readable
- rework the xbdback_do_io routine with a switch statement, so as to
trigger a panic() in case an invalid operation passed through the sanity
checks. panic might be overkill here, but I am sure to catch errrors in
case it happens.
To generate a diff of this commit:
cvs rdiff -u -r1.40 -r1.41 src/sys/arch/xen/xen/xbdback_xenbus.c
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/sys/arch/xen/xen/xbdback_xenbus.c
diff -u src/sys/arch/xen/xen/xbdback_xenbus.c:1.40 src/sys/arch/xen/xen/xbdback_xenbus.c:1.41
--- src/sys/arch/xen/xen/xbdback_xenbus.c:1.40 Sun Jun 12 03:35:50 2011
+++ src/sys/arch/xen/xen/xbdback_xenbus.c Sun Jul 24 23:56:34 2011
@@ -1,4 +1,4 @@
-/* $NetBSD: xbdback_xenbus.c,v 1.40 2011/06/12 03:35:50 rmind Exp $ */
+/* $NetBSD: xbdback_xenbus.c,v 1.41 2011/07/24 23:56:34 jym Exp $ */
/*
* Copyright (c) 2006 Manuel Bouyer.
@@ -26,7 +26,7 @@
*/
#include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.40 2011/06/12 03:35:50 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.41 2011/07/24 23:56:34 jym Exp $");
#include <sys/types.h>
#include <sys/param.h>
@@ -87,31 +87,41 @@
* it's finished, set xbdi->xbdi_cont (see below) to NULL and the return
* doesn't matter. Otherwise it's passed as the second parameter to
* the new value of xbdi->xbdi_cont.
+ *
* Here's how the call graph is supposed to be for a single I/O:
- * xbdback_co_main()
- * | |-> xbdback_co_cache_doflush() -> stall
- * | xbdback_co_cache_flush2() <- xbdback_co_flush_done() <-
- * | | |
- * | |-> xbdback_co_cache_flush() -> xbdback_co_flush() --
- * xbdback_co_main_loop() -> xbdback_co_main_done() -> xbdback_co_flush()
- * | | |
- * | xbdback_co_main_done2() <- xbdback_co_flush_done()
- * | |
- * | xbdback_co_main() or NULL
- * xbdback_co_io() -> xbdback_co_main_incr() -> xbdback_co_main_loop()
+ * xbdback_co_main()
+ * |
+ * | --> xbdback_co_cache_doflush() or NULL
+ * | |
+ * | -- xbdback_co_cache_flush2() <- xbdback_co_flush_done() <--
+ * | | |
+ * | |-> xbdback_co_cache_flush() -> xbdback_co_flush() --
+ * xbdback_co_main_loop()-|
+ * | |-> xbdback_co_main_done() -> xbdback_co_flush() --
+ * | | |
+ * | -- xbdback_co_main_done2() <- xbdback_co_flush_done() <--
+ * | |
+ * | --> xbdback_co_main() or NULL
+ * |
+ * xbdback_co_io() -> xbdback_co_main_incr() -> xbdback_co_main_loop()
* |
- * xbdback_co_io_gotreq() -> xbdback_co_flush() -> xbdback_co_flush()
- * | | |
- * xbdback_co_io_loop() --- <---------------- xbdback_co_flush_done()
- * | |
- * xbdback_co_io_gotio() |
- * | |
- * xbdback_co_io_gotio2()<-
- * | |--------> xbdback_co_io_gotfrag
- * | |
- * xbdback_co_io_gotfrag2() <----------|
- * | |--> xbdback_co_io_loop()
- * xbdback_co_main_incr()
+ * xbdback_co_io_gotreq()--+---------> xbdback_co_flush() --
+ * | | |
+ * -> xbdback_co_io_loop()----| <- xbdback_co_flush_done() <--
+ * | | | |
+ * | | | |----------> xbdback_co_io_gotio()
+ * | | | |
+ * | | xbdback_co_main_incr() |
+ * | | | |
+ * | | xbdback_co_main_loop() |
+ * | | |
+ * | xbdback_co_io_gotio2() <-----------|
+ * | | |
+ * | | |----------> xbdback_co_io_gotfrag()
+ * | | |
+ * -- xbdback_co_io_gotfrag2() <---------|
+ * |
+ * xbdback_co_main_incr() -> xbdback_co_main_loop()
*/
typedef void *(* xbdback_cont_t)(struct xbdback_instance *, void *);
@@ -153,11 +163,11 @@
RING_IDX xbdi_req_prod; /* limit on request indices */
xbdback_cont_t xbdi_cont, xbdi_cont_aux;
SIMPLEQ_ENTRY(xbdback_instance) xbdi_on_hold; /* waiting on resources */
- /* _request state */
+ /* _request state: track requests fetched from ring */
struct xbdback_request *xbdi_req; /* if NULL, ignore following */
blkif_request_t xbdi_xen_req;
int xbdi_segno;
- /* _io state */
+ /* _io state: I/O associated to this instance */
struct xbdback_io *xbdi_io; /* if NULL, ignore next field */
daddr_t xbdi_next_sector;
uint8_t xbdi_last_fs, xbdi_this_fs; /* first sectors */
@@ -214,7 +224,7 @@
/* grants release */
grant_handle_t xio_gh[XENSHM_MAX_PAGES_PER_REQUEST];
uint16_t xio_nrma; /* number of guest pages */
- uint16_t xio_mapped;
+ uint16_t xio_mapped; /* == 1: grants are mapped */
} xio_rw;
uint64_t xio_flush_id;
} u;
@@ -230,7 +240,7 @@
#define xio_flush_id u.xio_flush_id
/*
- * Rather than have the xbdback_io keep an array of the
+ * Rather than having the xbdback_io keep an array of the
* xbdback_requests involved, since the actual number will probably be
* small but might be as large as BLKIF_RING_SIZE, use a list. This
* would be threaded through xbdback_request, but one of them might be
@@ -850,6 +860,7 @@
xbdi->xbdi_cont = xbdback_co_main;
xbdback_trampoline(xbdi, xbdi);
}
+
return 1;
}
@@ -867,6 +878,10 @@
return xbdi;
}
+/*
+ * Fetch a blkif request from the ring, and pass control to the appropriate
+ * continuation.
+ */
static void *
xbdback_co_main_loop(struct xbdback_instance *xbdi, void *obj)
{
@@ -931,6 +946,9 @@
return xbdi;
}
+/*
+ * Increment consumer index and move on to the next request.
+ */
static void *
xbdback_co_main_incr(struct xbdback_instance *xbdi, void *obj)
{
@@ -940,6 +958,10 @@
return xbdi;
}
+/*
+ * Ring processing is over. If there are any I/O still present for this
+ * instance, handle them first.
+ */
static void *
xbdback_co_main_done(struct xbdback_instance *xbdi, void *obj)
{
@@ -953,6 +975,10 @@
return xbdi;
}
+/*
+ * Check for requests in the instance's ring. In case there are, start again
+ * from the beginning. If not, stall.
+ */
static void *
xbdback_co_main_done2(struct xbdback_instance *xbdi, void *obj)
{
@@ -966,12 +992,16 @@
return xbdi;
}
+/*
+ * Frontend requested a cache flush operation.
+ */
static void *
xbdback_co_cache_flush(struct xbdback_instance *xbdi, void *obj)
{
(void)obj;
XENPRINTF(("xbdback_co_cache_flush %p %p\n", xbdi, obj));
if (xbdi->xbdi_io != NULL) {
+ /* Some I/Os are required for this instance. Process them. */
xbdi->xbdi_cont = xbdback_co_flush;
xbdi->xbdi_cont_aux = xbdback_co_cache_flush2;
} else {
@@ -986,7 +1016,8 @@
(void)obj;
XENPRINTF(("xbdback_co_cache_flush2 %p %p\n", xbdi, obj));
if (xbdi->xbdi_pendingreqs > 0) {
- /* event or iodone will restart processing */
+ /* There are pending requests.
+ * Event or iodone() will restart processing */
xbdi->xbdi_cont = NULL;
xbdi_put(xbdi);
return NULL;
@@ -995,6 +1026,7 @@
return xbdback_pool_get(&xbdback_io_pool, xbdi);
}
+/* Enqueue the flush work */
static void *
xbdback_co_cache_doflush(struct xbdback_instance *xbdi, void *obj)
{
@@ -1007,10 +1039,14 @@
xbd_io->xio_flush_id = xbdi->xbdi_xen_req.id;
workqueue_enqueue(xbdback_workqueue, &xbdi->xbdi_io->xio_work, NULL);
/* xbdback_do_io() will advance req pointer and restart processing */
- xbdi->xbdi_cont = xbdback_co_cache_doflush;
+ xbdi->xbdi_cont = NULL;
return NULL;
}
+/*
+ * A read or write I/O request must be processed. Do some checks first,
+ * then get the segment information directly from the ring request.
+ */
static void *
xbdback_co_io(struct xbdback_instance *xbdi, void *obj)
{
@@ -1061,6 +1097,7 @@
xbdi->xbdi_cont = xbdback_co_io_gotreq;
return xbdback_pool_get(&xbdback_request_pool, xbdi);
+
end:
xbdback_send_reply(xbdi, xbdi->xbdi_xen_req.id,
xbdi->xbdi_xen_req.operation, error);
@@ -1068,6 +1105,11 @@
return xbdi;
}
+/*
+ * We have fetched segment requests from the ring. In case there are already
+ * I/Os prepared for this instance, we can try coalescing the requests
+ * with these I/Os.
+ */
static void *
xbdback_co_io_gotreq(struct xbdback_instance *xbdi, void *obj)
{
@@ -1110,7 +1152,7 @@
return xbdi;
}
-
+/* Handle coalescing of multiple segment requests into one I/O work */
static void *
xbdback_co_io_loop(struct xbdback_instance *xbdi, void *obj)
{
@@ -1189,10 +1231,9 @@
return xbdi;
}
-
+/* Prepare an I/O buffer for a xbdback instance */
static void *
xbdback_co_io_gotio(struct xbdback_instance *xbdi, void *obj)
-
{
struct xbdback_io *xbd_io;
vaddr_t start_offset; /* start offset in vm area */
@@ -1234,7 +1275,7 @@
return xbdi;
}
-
+/* Manage fragments */
static void *
xbdback_co_io_gotio2(struct xbdback_instance *xbdi, void *obj)
{
@@ -1249,7 +1290,7 @@
return xbdi;
}
-
+/* Prepare the instance for its first fragment */
static void *
xbdback_co_io_gotfrag(struct xbdback_instance *xbdi, void *obj)
{
@@ -1264,6 +1305,7 @@
return xbdi;
}
+/* Last routine to manage segments fragments for one I/O */
static void *
xbdback_co_io_gotfrag2(struct xbdback_instance *xbdi, void *obj)
{
@@ -1302,7 +1344,10 @@
return xbdi;
}
-
+/*
+ * Map the different I/O requests in backend's VA space, then schedule
+ * the I/O work.
+ */
static void *
xbdback_co_flush(struct xbdback_instance *xbdi, void *obj)
{
@@ -1314,6 +1359,7 @@
return xbdback_map_shm(xbdi->xbdi_io);
}
+/* Transfer all I/O work to the workqueue */
static void *
xbdback_co_flush_done(struct xbdback_instance *xbdi, void *obj)
{
@@ -1331,13 +1377,19 @@
xbdback_iodone(&xbd_io->xio_buf);
}
+/*
+ * Main xbdback workqueue routine: performs I/O on behalf of backend. Has
+ * thread context.
+ */
static void
xbdback_do_io(struct work *wk, void *dummy)
{
struct xbdback_io *xbd_io = (void *)wk;
KASSERT(&xbd_io->xio_work == wk);
- if (xbd_io->xio_operation == BLKIF_OP_FLUSH_DISKCACHE) {
+ switch (xbd_io->xio_operation) {
+ case BLKIF_OP_FLUSH_DISKCACHE:
+ {
int error;
int force = 1;
struct xbdback_instance *xbdi = xbd_io->xio_xbdi;
@@ -1361,37 +1413,44 @@
xbdi->xbdi_io = NULL;
xbdi->xbdi_cont = xbdback_co_main_incr;
xbdback_trampoline(xbdi, xbdi);
- return;
+ break;
}
-
- /* should be read or write */
- xbd_io->xio_buf.b_data =
- (void *)((vaddr_t)xbd_io->xio_buf.b_data + xbd_io->xio_vaddr);
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
+ xbd_io->xio_buf.b_data = (void *)
+ ((vaddr_t)xbd_io->xio_buf.b_data + xbd_io->xio_vaddr);
#ifdef DIAGNOSTIC
- {
- vaddr_t bdata = (vaddr_t)xbd_io->xio_buf.b_data;
- int nsegs =
- ((((bdata + xbd_io->xio_buf.b_bcount - 1) & ~PAGE_MASK) -
- (bdata & ~PAGE_MASK)) >> PAGE_SHIFT) + 1;
- if ((bdata & ~PAGE_MASK) != (xbd_io->xio_vaddr & ~PAGE_MASK)) {
- printf("xbdback_do_io vaddr 0x%lx bdata 0x%lx\n",
- xbd_io->xio_vaddr, bdata);
- panic("xbdback_do_io: bdata page change");
- }
- if (nsegs > xbd_io->xio_nrma) {
- printf("xbdback_do_io vaddr 0x%lx bcount 0x%x doesn't fit in "
- " %d pages\n", bdata, xbd_io->xio_buf.b_bcount,
- xbd_io->xio_nrma);
- panic("xbdback_do_io: not enough pages");
- }
- }
+ {
+ vaddr_t bdata = (vaddr_t)xbd_io->xio_buf.b_data;
+ int nsegs =
+ ((((bdata + xbd_io->xio_buf.b_bcount - 1) & ~PAGE_MASK) -
+ (bdata & ~PAGE_MASK)) >> PAGE_SHIFT) + 1;
+ if ((bdata & ~PAGE_MASK) != (xbd_io->xio_vaddr & ~PAGE_MASK)) {
+ printf("xbdback_do_io: vaddr %#" PRIxVADDR
+ " bdata %#" PRIxVADDR "\n",
+ xbd_io->xio_vaddr, bdata);
+ panic("xbdback_do_io: bdata page change");
+ }
+ if (nsegs > xbd_io->xio_nrma) {
+ printf("xbdback_do_io: vaddr %#" PRIxVADDR
+ " bcount %#x doesn't fit in %d pages\n",
+ bdata, xbd_io->xio_buf.b_bcount, xbd_io->xio_nrma);
+ panic("xbdback_do_io: not enough pages");
+ }
+ }
#endif
- if ((xbd_io->xio_buf.b_flags & B_READ) == 0) {
- mutex_enter(xbd_io->xio_buf.b_vp->v_interlock);
- xbd_io->xio_buf.b_vp->v_numoutput++;
- mutex_exit(xbd_io->xio_buf.b_vp->v_interlock);
+ if ((xbd_io->xio_buf.b_flags & B_READ) == 0) {
+ mutex_enter(xbd_io->xio_buf.b_vp->v_interlock);
+ xbd_io->xio_buf.b_vp->v_numoutput++;
+ mutex_exit(xbd_io->xio_buf.b_vp->v_interlock);
+ }
+ bdev_strategy(&xbd_io->xio_buf);
+ break;
+ default:
+ /* Should never happen */
+ panic("xbdback_do_io: unsupported operation %d",
+ xbd_io->xio_operation);
}
- bdev_strategy(&xbd_io->xio_buf);
}
/* This gets reused by xbdback_io_error to report errors from other sources. */
@@ -1409,7 +1468,7 @@
XENPRINTF(("xbdback_io domain %d: iodone ptr 0x%lx\n",
xbdi->xbdi_domid, (long)xbd_io));
- if (xbd_io->xio_mapped)
+ if (xbd_io->xio_mapped == 1)
xbdback_unmap_shm(xbd_io);
if (bp->b_error != 0) {
@@ -1418,7 +1477,6 @@
errp = 1;
} else
errp = 0;
-
/* for each constituent xbd request */
while(!SLIST_EMPTY(&xbd_io->xio_rq)) {
@@ -1510,8 +1568,8 @@
}
/*
- * Map a request into our virtual address space. The xbd_req->rq_ma
- * array is to be filled out by the caller.
+ * Map multiple entries of an I/O request into backend's VA space.
+ * The xbd_io->xio_gref array has to be filled out by the caller.
*/
static void *
xbdback_map_shm(struct xbdback_io *xbd_io)
@@ -1679,6 +1737,10 @@
}
}
+/*
+ * Trampoline routine. Calls continuations in a loop and only exits when
+ * either the returned object or the next callback is NULL.
+ */
static void
xbdback_trampoline(struct xbdback_instance *xbdi, void *obj)
{
@@ -1693,7 +1755,7 @@
#ifdef DIAGNOSTIC
if (xbdi->xbdi_cont == (xbdback_cont_t)0xDEADBEEF) {
printf("xbdback_trampoline: 0x%lx didn't set "
- "xbdi->xbdi_cont!\n2", (long)cont);
+ "xbdi->xbdi_cont!\n", (long)cont);
panic("xbdback_trampoline: bad continuation");
}
#endif