Module Name: src Committed By: riz Date: Fri Aug 12 20:48:48 UTC 2011
Modified Files: src/sys/arch/xen/xen [netbsd-5]: xbdback_xenbus.c Log Message: Pull up following revision(s) (requested by bouyer in ticket #1654): sys/arch/xen/xen/xbdback_xenbus.c: revision 1.42 sys/arch/xen/xen/xbdback_xenbus.c: revision 1.43 sys/arch/xen/xen/xbdback_xenbus.c: revision 1.44 Make sure to call xbdback_trampoline() at splbio() Several fixes to the continuation engine: - make sure to enter the continuation loop at splbio(), and add some KASSERT() for this. - When a flush operation is enqueued to the workqueue, make sure the continuation loop can't be restarted by a previous workqueue completion or an event. We can't restart it at this point because the flush even is still recorded as the current I/O. For this add a xbdback_co_cache_doflush_wait() which acts as a noop; the workqueue callback will restart the loop once the flush is complete. Should fix "kernel diagnostic assertion xbd_io->xio_mapped == 0" panics reported by Jeff Rizzo on port-xen@. Add a comment explaing why a flush workqueue is handled differently from read/write workqueue requests. To generate a diff of this commit: cvs rdiff -u -r1.20.4.5 -r1.20.4.6 src/sys/arch/xen/xen/xbdback_xenbus.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/sys/arch/xen/xen/xbdback_xenbus.c diff -u src/sys/arch/xen/xen/xbdback_xenbus.c:1.20.4.5 src/sys/arch/xen/xen/xbdback_xenbus.c:1.20.4.6 --- src/sys/arch/xen/xen/xbdback_xenbus.c:1.20.4.5 Sat Jun 18 16:38:26 2011 +++ src/sys/arch/xen/xen/xbdback_xenbus.c Fri Aug 12 20:48:47 2011 @@ -1,4 +1,4 @@ -/* $NetBSD: xbdback_xenbus.c,v 1.20.4.5 2011/06/18 16:38:26 bouyer Exp $ */ +/* $NetBSD: xbdback_xenbus.c,v 1.20.4.6 2011/08/12 20:48:47 riz Exp $ */ /* * Copyright (c) 2006 Manuel Bouyer. @@ -31,7 +31,7 @@ */ #include <sys/cdefs.h> -__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.20.4.5 2011/06/18 16:38:26 bouyer Exp $"); +__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.20.4.6 2011/08/12 20:48:47 riz Exp $"); #include <sys/types.h> #include <sys/param.h> @@ -283,6 +283,7 @@ static void *xbdback_co_cache_flush(struct xbdback_instance *, void *); static void *xbdback_co_cache_flush2(struct xbdback_instance *, void *); static void *xbdback_co_cache_doflush(struct xbdback_instance *, void *); +static void *xbdback_co_cache_doflush_wait(struct xbdback_instance *, void *); static void *xbdback_co_io(struct xbdback_instance *, void *); static void *xbdback_co_io_gotreq(struct xbdback_instance *, void *); @@ -941,6 +942,8 @@ { (void)obj; if (xbdi->xbdi_io != NULL) { + KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ || + xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE); xbdi->xbdi_cont = xbdback_co_flush; xbdi->xbdi_cont_aux = xbdback_co_main_done2; } else { @@ -966,8 +969,13 @@ xbdback_co_cache_flush(struct xbdback_instance *xbdi, void *obj) { (void)obj; + KASSERT(curcpu()->ci_ilevel >= IPL_BIO); XENPRINTF(("xbdback_co_cache_flush %p %p\n", xbdi, obj)); if (xbdi->xbdi_io != NULL) { + /* Some I/Os are required for this instance. Process them. */ + KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ || + xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE); + KASSERT(xbdi->xbdi_pendingreqs == 0); xbdi->xbdi_cont = xbdback_co_flush; xbdi->xbdi_cont_aux = xbdback_co_cache_flush2; } else { @@ -982,7 +990,10 @@ (void)obj; XENPRINTF(("xbdback_co_cache_flush2 %p %p\n", xbdi, obj)); if (xbdi->xbdi_pendingreqs > 0) { - /* event or iodone will restart processing */ + /* + * There are pending requests. + * Event or iodone() will restart processing + */ xbdi->xbdi_cont = NULL; xbdi_put(xbdi); return NULL; @@ -1002,8 +1013,23 @@ xbd_io->xio_operation = xbdi->xbdi_xen_req.operation; xbd_io->xio_flush_id = xbdi->xbdi_xen_req.id; workqueue_enqueue(xbdback_workqueue, &xbdi->xbdi_io->xio_work, NULL); - /* xbdback_do_io() will advance req pointer and restart processing */ - xbdi->xbdi_cont = xbdback_co_cache_doflush; + /* + * xbdback_do_io() will advance req pointer and restart processing. + * Note that we could probably set xbdi->xbdi_io to NULL and + * let the processing continue, but we really want to wait + * for the flush to complete before doing any more work. + */ + xbdi->xbdi_cont = xbdback_co_cache_doflush_wait; + return NULL; +} + +/* wait for the flush work to complete */ +static void * +xbdback_co_cache_doflush_wait(struct xbdback_instance *xbdi, void *obj) +{ + (void)obj; + /* abort the continuation loop; xbdback_do_io() will restart it */ + xbdi->xbdi_cont = xbdback_co_cache_doflush_wait; return NULL; } @@ -1027,7 +1053,9 @@ goto end; } - if (xbdi->xbdi_xen_req.operation == BLKIF_OP_WRITE) { + KASSERT(req->operation == BLKIF_OP_READ || + req->operation == BLKIF_OP_WRITE); + if (req->operation == BLKIF_OP_WRITE) { if (xbdi->xbdi_ro) { error = EROFS; goto end; @@ -1076,6 +1104,8 @@ xrq->rq_ioerrs = 0; xrq->rq_id = xbdi->xbdi_xen_req.id; xrq->rq_operation = xbdi->xbdi_xen_req.operation; + KASSERT(xbdi->xbdi_req->rq_operation == BLKIF_OP_READ || + xbdi->xbdi_req->rq_operation == BLKIF_OP_WRITE); /* * Request-level reasons not to coalesce: different device, @@ -1098,6 +1128,8 @@ xbdi->xbdi_next_sector = xbdi->xbdi_xen_req.sector_number; xbdi->xbdi_cont_aux = xbdi->xbdi_cont; + KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ || + xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE); xbdi->xbdi_cont = xbdback_co_flush; } } else { @@ -1113,6 +1145,8 @@ struct xbdback_io *xio; (void)obj; + KASSERT(xbdi->xbdi_req->rq_operation == BLKIF_OP_READ || + xbdi->xbdi_req->rq_operation == BLKIF_OP_WRITE); if (xbdi->xbdi_segno < xbdi->xbdi_xen_req.nr_segments) { uint8_t this_fs, this_ls, last_fs, last_ls; grant_ref_t thisgrt, lastgrt; @@ -1165,6 +1199,10 @@ xbdi->xbdi_same_page = 1; } else { xbdi->xbdi_cont_aux = xbdback_co_io_loop; + KASSERT(xbdi->xbdi_io->xio_operation == + BLKIF_OP_READ || + xbdi->xbdi_io->xio_operation == + BLKIF_OP_WRITE); xbdi->xbdi_cont = xbdback_co_flush; return xbdi; } @@ -1194,6 +1232,7 @@ vaddr_t start_offset; /* start offset in vm area */ int buf_flags; + KASSERT(curcpu()->ci_ilevel >= IPL_BIO); xbdi_get(xbdi); atomic_inc_uint(&xbdi->xbdi_pendingreqs); @@ -1331,6 +1370,7 @@ xbdback_do_io(struct work *wk, void *dummy) { struct xbdback_io *xbd_io = (void *)wk; + int s; KASSERT(&xbd_io->xio_work == wk); if (xbd_io->xio_operation == BLKIF_OP_FLUSH_DISKCACHE) { @@ -1354,9 +1394,11 @@ xbdback_pool_put(&xbdback_io_pool, xbd_io); xbdi_put(xbdi); /* handle next IO */ + s = splbio(); xbdi->xbdi_io = NULL; xbdi->xbdi_cont = xbdback_co_main_incr; xbdback_trampoline(xbdi, xbdi); + splx(s); return; } @@ -1595,19 +1637,17 @@ case 0: xbd_io->xio_mapped = 1; SIMPLEQ_REMOVE_HEAD(&xbdback_shmq, xbdi_on_hold); - splx(s); + (void)splbio(); xbdback_trampoline(xbdi, xbdi); - s = splvm(); break; default: SIMPLEQ_REMOVE_HEAD(&xbdback_shmq, xbdi_on_hold); - splx(s); + (void)splbio(); printf("xbdback_shm_callback: xen_shm error %d\n", error); xbdi->xbdi_cont = xbdi->xbdi_cont_aux; xbdback_io_error(xbd_io, error); xbdback_trampoline(xbdi, xbdi); - s = splvm(); break; } } @@ -1670,8 +1710,9 @@ } else { struct xbdback_instance *xbdi = SIMPLEQ_FIRST(&pp->q); SIMPLEQ_REMOVE_HEAD(&pp->q, xbdi_on_hold); - splx(s); + (void)splbio(); xbdback_trampoline(xbdi, item); + splx(s); } } @@ -1679,6 +1720,7 @@ xbdback_trampoline(struct xbdback_instance *xbdi, void *obj) { xbdback_cont_t cont; + KASSERT(curcpu()->ci_ilevel >= IPL_BIO); while(obj != NULL && xbdi->xbdi_cont != NULL) { cont = xbdi->xbdi_cont;