Module Name:    src
Committed By:   riz
Date:           Fri Aug 12 20:48:48 UTC 2011

Modified Files:
        src/sys/arch/xen/xen [netbsd-5]: xbdback_xenbus.c

Log Message:
Pull up following revision(s) (requested by bouyer in ticket #1654):
        sys/arch/xen/xen/xbdback_xenbus.c: revision 1.42
        sys/arch/xen/xen/xbdback_xenbus.c: revision 1.43
        sys/arch/xen/xen/xbdback_xenbus.c: revision 1.44
Make sure to call xbdback_trampoline() at splbio()
Several fixes to the continuation engine:
- make sure to enter the continuation loop at splbio(), and add some
  KASSERT() for this.
- When a flush operation is enqueued to the workqueue, make sure the
  continuation loop can't be restarted by a previous workqueue
  completion or an event. We can't restart it at this point because
  the flush even is still recorded as the current I/O.
  For this add a xbdback_co_cache_doflush_wait() which acts as a noop;
  the workqueue callback will restart the loop once the flush is complete.
Should fix "kernel diagnostic assertion xbd_io->xio_mapped == 0" panics
reported by Jeff Rizzo on port-xen@.
Add a comment explaing why a flush workqueue is handled differently from
read/write workqueue requests.


To generate a diff of this commit:
cvs rdiff -u -r1.20.4.5 -r1.20.4.6 src/sys/arch/xen/xen/xbdback_xenbus.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sys/arch/xen/xen/xbdback_xenbus.c
diff -u src/sys/arch/xen/xen/xbdback_xenbus.c:1.20.4.5 src/sys/arch/xen/xen/xbdback_xenbus.c:1.20.4.6
--- src/sys/arch/xen/xen/xbdback_xenbus.c:1.20.4.5	Sat Jun 18 16:38:26 2011
+++ src/sys/arch/xen/xen/xbdback_xenbus.c	Fri Aug 12 20:48:47 2011
@@ -1,4 +1,4 @@
-/*      $NetBSD: xbdback_xenbus.c,v 1.20.4.5 2011/06/18 16:38:26 bouyer Exp $      */
+/*      $NetBSD: xbdback_xenbus.c,v 1.20.4.6 2011/08/12 20:48:47 riz Exp $      */
 
 /*
  * Copyright (c) 2006 Manuel Bouyer.
@@ -31,7 +31,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.20.4.5 2011/06/18 16:38:26 bouyer Exp $");
+__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.20.4.6 2011/08/12 20:48:47 riz Exp $");
 
 #include <sys/types.h>
 #include <sys/param.h>
@@ -283,6 +283,7 @@
 static void *xbdback_co_cache_flush(struct xbdback_instance *, void *);
 static void *xbdback_co_cache_flush2(struct xbdback_instance *, void *);
 static void *xbdback_co_cache_doflush(struct xbdback_instance *, void *);
+static void *xbdback_co_cache_doflush_wait(struct xbdback_instance *, void *);
 
 static void *xbdback_co_io(struct xbdback_instance *, void *);
 static void *xbdback_co_io_gotreq(struct xbdback_instance *, void *);
@@ -941,6 +942,8 @@
 {
 	(void)obj;
 	if (xbdi->xbdi_io != NULL) {
+		KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ ||
+		    xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE);
 		xbdi->xbdi_cont = xbdback_co_flush;
 		xbdi->xbdi_cont_aux = xbdback_co_main_done2;
 	} else {
@@ -966,8 +969,13 @@
 xbdback_co_cache_flush(struct xbdback_instance *xbdi, void *obj)
 {
 	(void)obj;
+	KASSERT(curcpu()->ci_ilevel >= IPL_BIO);
 	XENPRINTF(("xbdback_co_cache_flush %p %p\n", xbdi, obj));
 	if (xbdi->xbdi_io != NULL) {
+		/* Some I/Os are required for this instance. Process them. */
+		KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ ||
+		    xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE);
+		KASSERT(xbdi->xbdi_pendingreqs == 0);
 		xbdi->xbdi_cont = xbdback_co_flush;
 		xbdi->xbdi_cont_aux = xbdback_co_cache_flush2;
 	} else {
@@ -982,7 +990,10 @@
 	(void)obj;
 	XENPRINTF(("xbdback_co_cache_flush2 %p %p\n", xbdi, obj));
 	if (xbdi->xbdi_pendingreqs > 0) {
-		/* event or iodone will restart processing */
+		/*
+		 * There are pending requests.
+		 * Event or iodone() will restart processing
+		 */
 		xbdi->xbdi_cont = NULL;
 		xbdi_put(xbdi);
 		return NULL;
@@ -1002,8 +1013,23 @@
 	xbd_io->xio_operation = xbdi->xbdi_xen_req.operation;
 	xbd_io->xio_flush_id = xbdi->xbdi_xen_req.id;
 	workqueue_enqueue(xbdback_workqueue, &xbdi->xbdi_io->xio_work, NULL);
-	/* xbdback_do_io() will advance req pointer and restart processing */
-	xbdi->xbdi_cont = xbdback_co_cache_doflush;
+	/*
+	 * xbdback_do_io() will advance req pointer and restart processing.
+	 * Note that we could probably set xbdi->xbdi_io to NULL and
+	 * let the processing continue, but we really want to wait
+	 * for the flush to complete before doing any more work.
+	 */
+	xbdi->xbdi_cont = xbdback_co_cache_doflush_wait;
+	return NULL;
+}
+
+/* wait for the flush work to complete */
+static void *
+xbdback_co_cache_doflush_wait(struct xbdback_instance *xbdi, void *obj)
+{
+	(void)obj;
+	/* abort the continuation loop; xbdback_do_io() will restart it */
+	xbdi->xbdi_cont = xbdback_co_cache_doflush_wait;
 	return NULL;
 }
 
@@ -1027,7 +1053,9 @@
 		goto end;
 	}
 
-	if (xbdi->xbdi_xen_req.operation == BLKIF_OP_WRITE) {
+	KASSERT(req->operation == BLKIF_OP_READ ||
+	    req->operation == BLKIF_OP_WRITE);
+	if (req->operation == BLKIF_OP_WRITE) {
 		if (xbdi->xbdi_ro) {
 			error = EROFS;
 			goto end;
@@ -1076,6 +1104,8 @@
 	xrq->rq_ioerrs = 0;
 	xrq->rq_id = xbdi->xbdi_xen_req.id;
 	xrq->rq_operation = xbdi->xbdi_xen_req.operation;
+	KASSERT(xbdi->xbdi_req->rq_operation == BLKIF_OP_READ ||
+	    xbdi->xbdi_req->rq_operation == BLKIF_OP_WRITE);
 
 	/* 
 	 * Request-level reasons not to coalesce: different device,
@@ -1098,6 +1128,8 @@
 			xbdi->xbdi_next_sector =
 			    xbdi->xbdi_xen_req.sector_number;
 			xbdi->xbdi_cont_aux = xbdi->xbdi_cont; 
+			KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ ||
+			    xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE);
 			xbdi->xbdi_cont = xbdback_co_flush;
 		}
 	} else {
@@ -1113,6 +1145,8 @@
 	struct xbdback_io *xio;
 
 	(void)obj;
+	KASSERT(xbdi->xbdi_req->rq_operation == BLKIF_OP_READ ||
+	    xbdi->xbdi_req->rq_operation == BLKIF_OP_WRITE);
 	if (xbdi->xbdi_segno < xbdi->xbdi_xen_req.nr_segments) {
 		uint8_t this_fs, this_ls, last_fs, last_ls;
 		grant_ref_t thisgrt, lastgrt;
@@ -1165,6 +1199,10 @@
 				xbdi->xbdi_same_page = 1;
 			} else {
 				xbdi->xbdi_cont_aux = xbdback_co_io_loop;
+				KASSERT(xbdi->xbdi_io->xio_operation ==
+				     BLKIF_OP_READ ||
+				    xbdi->xbdi_io->xio_operation ==
+				     BLKIF_OP_WRITE);
 				xbdi->xbdi_cont = xbdback_co_flush;
 				return xbdi;
 			}
@@ -1194,6 +1232,7 @@
 	vaddr_t start_offset; /* start offset in vm area */
 	int buf_flags;
 
+	KASSERT(curcpu()->ci_ilevel >= IPL_BIO);
 	xbdi_get(xbdi);
 	atomic_inc_uint(&xbdi->xbdi_pendingreqs);
 	
@@ -1331,6 +1370,7 @@
 xbdback_do_io(struct work *wk, void *dummy)
 {
 	struct xbdback_io *xbd_io = (void *)wk;
+	int s;
 	KASSERT(&xbd_io->xio_work == wk);
 
 	if (xbd_io->xio_operation == BLKIF_OP_FLUSH_DISKCACHE) {
@@ -1354,9 +1394,11 @@
 		xbdback_pool_put(&xbdback_io_pool, xbd_io);
 		xbdi_put(xbdi);
 		/* handle next IO */
+		s = splbio();
 		xbdi->xbdi_io = NULL;
 		xbdi->xbdi_cont = xbdback_co_main_incr;
 		xbdback_trampoline(xbdi, xbdi);
+		splx(s);
 		return;
 	}
 
@@ -1595,19 +1637,17 @@
 		case 0:
 			xbd_io->xio_mapped = 1;
 			SIMPLEQ_REMOVE_HEAD(&xbdback_shmq, xbdi_on_hold);
-			splx(s);
+			(void)splbio();
 			xbdback_trampoline(xbdi, xbdi);
-			s = splvm();
 			break;
 		default:
 			SIMPLEQ_REMOVE_HEAD(&xbdback_shmq, xbdi_on_hold);
-			splx(s);
+			(void)splbio();
 			printf("xbdback_shm_callback: xen_shm error %d\n",
 			       error);
 			xbdi->xbdi_cont = xbdi->xbdi_cont_aux;
 			xbdback_io_error(xbd_io, error);
 			xbdback_trampoline(xbdi, xbdi);
-			s = splvm();
 			break;
 		}
 	}
@@ -1670,8 +1710,9 @@
 	} else {
 		struct xbdback_instance *xbdi = SIMPLEQ_FIRST(&pp->q);
 		SIMPLEQ_REMOVE_HEAD(&pp->q, xbdi_on_hold);
-		splx(s);
+		(void)splbio();
 		xbdback_trampoline(xbdi, item);
+		splx(s);
 	}
 }
 
@@ -1679,6 +1720,7 @@
 xbdback_trampoline(struct xbdback_instance *xbdi, void *obj)
 {
 	xbdback_cont_t cont;
+	KASSERT(curcpu()->ci_ilevel >= IPL_BIO);
 
 	while(obj != NULL && xbdi->xbdi_cont != NULL) {
 		cont = xbdi->xbdi_cont;

Reply via email to