This patch adds bypass mode support for the coroutinue in bdrv_co_aio_rw_vector(), which is in the fast path of lots of block device, especially for virtio-blk dataplane.
Signed-off-by: Ming Lei <ming....@canonical.com> --- block.c | 129 +++++++++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 105 insertions(+), 24 deletions(-) diff --git a/block.c b/block.c index 8cf519b..d84079a 100644 --- a/block.c +++ b/block.c @@ -55,6 +55,21 @@ struct BdrvDirtyBitmap { QLIST_ENTRY(BdrvDirtyBitmap) list; }; +typedef struct CoroutineIOCompletion { + Coroutine *coroutine; + int ret; + bool bypass; + QEMUIOVector *bounced_iov; +} CoroutineIOCompletion; + +typedef struct BlockDriverAIOCBCoroutine { + BlockDriverAIOCB common; + BlockRequest req; + bool is_write; + bool *done; + QEMUBH *bh; +} BlockDriverAIOCBCoroutine; + #define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */ static void bdrv_dev_change_media_cb(BlockDriverState *bs, bool load); @@ -120,6 +135,21 @@ int is_windows_drive(const char *filename) } #endif +static CoroutineIOCompletion *bdrv_get_co_io_comp(BlockDriverAIOCBCoroutine + *acb) +{ + return (CoroutineIOCompletion *)((void *)acb + + sizeof(BlockDriverAIOCBCoroutine)); +} + +static BlockDriverAIOCBCoroutine *bdrv_get_aio_co(CoroutineIOCompletion *co) +{ + assert(co->bypass); + + return (BlockDriverAIOCBCoroutine *)((void *)co - + sizeof(BlockDriverAIOCBCoroutine)); +} + /* throttling disk I/O limits */ void bdrv_set_io_limits(BlockDriverState *bs, ThrottleConfig *cfg) @@ -3081,7 +3111,16 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, ret = drv->bdrv_co_readv(bs, sector_num, local_sectors, &local_qiov); - qemu_iovec_destroy(&local_qiov); + + if (qemu_coroutine_self_bypassed()) { + CoroutineIOCompletion *pco = bdrv_get_co_io_comp( + (BlockDriverAIOCBCoroutine *) + qemu_coroutine_get_var()); + pco->bounced_iov = g_malloc(sizeof(QEMUIOVector)); + *pco->bounced_iov = local_qiov; + } else { + qemu_iovec_destroy(&local_qiov); + } } else { ret = 0; } @@ -4659,15 +4698,6 @@ static BlockDriverAIOCB *bdrv_aio_writev_em(BlockDriverState *bs, return bdrv_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); } - -typedef struct BlockDriverAIOCBCoroutine { - BlockDriverAIOCB common; - BlockRequest req; - bool is_write; - bool *done; - QEMUBH* bh; -} BlockDriverAIOCBCoroutine; - static void bdrv_aio_co_cancel_em(BlockDriverAIOCB *blockacb) { AioContext *aio_context = bdrv_get_aio_context(blockacb->bs); @@ -4686,6 +4716,12 @@ static const AIOCBInfo bdrv_em_co_aiocb_info = { .cancel = bdrv_aio_co_cancel_em, }; +static const AIOCBInfo bdrv_em_co_bypass_aiocb_info = { + .aiocb_size = sizeof(BlockDriverAIOCBCoroutine) + + sizeof(CoroutineIOCompletion), + .cancel = bdrv_aio_co_cancel_em, +}; + static void bdrv_co_em_bh(void *opaque) { BlockDriverAIOCBCoroutine *acb = opaque; @@ -4705,6 +4741,12 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque) { BlockDriverAIOCBCoroutine *acb = opaque; BlockDriverState *bs = acb->common.bs; + bool bypass = qemu_coroutine_self_bypassed(); + + if (bypass) { + qemu_coroutine_set_var(acb); + memset(bdrv_get_co_io_comp(acb), 0, sizeof(CoroutineIOCompletion)); + } if (!acb->is_write) { acb->req.error = bdrv_co_do_readv(bs, acb->req.sector, @@ -4714,8 +4756,10 @@ static void coroutine_fn bdrv_co_do_rw(void *opaque) acb->req.nb_sectors, acb->req.qiov, acb->req.flags); } - acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb); - qemu_bh_schedule(acb->bh); + if (!bypass) { + acb->bh = aio_bh_new(bdrv_get_aio_context(bs), bdrv_co_em_bh, acb); + qemu_bh_schedule(acb->bh); + } } static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, @@ -4729,8 +4773,18 @@ static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, { Coroutine *co; BlockDriverAIOCBCoroutine *acb; + const AIOCBInfo *aiocb_info; + bool bypass; - acb = qemu_aio_get(&bdrv_em_co_aiocb_info, bs, cb, opaque); + if (qemu_aio_get_bypass_co(bdrv_get_aio_context(bs))) { + aiocb_info = &bdrv_em_co_bypass_aiocb_info; + bypass = true; + } else { + aiocb_info = &bdrv_em_co_aiocb_info; + bypass = false; + } + + acb = qemu_aio_get(aiocb_info, bs, cb, opaque); acb->req.sector = sector_num; acb->req.nb_sectors = nb_sectors; acb->req.qiov = qiov; @@ -4738,8 +4792,14 @@ static BlockDriverAIOCB *bdrv_co_aio_rw_vector(BlockDriverState *bs, acb->is_write = is_write; acb->done = NULL; - co = qemu_coroutine_create(bdrv_co_do_rw); - qemu_coroutine_enter(co, acb); + if (!bypass) { + co = qemu_coroutine_create(bdrv_co_do_rw); + qemu_coroutine_enter(co, acb); + } else { + qemu_coroutine_set_bypass(true); + bdrv_co_do_rw(acb); + qemu_coroutine_set_bypass(false); + } return &acb->common; } @@ -4833,17 +4893,28 @@ void qemu_aio_release(void *p) /**************************************************************/ /* Coroutine block device emulation */ -typedef struct CoroutineIOCompletion { - Coroutine *coroutine; - int ret; -} CoroutineIOCompletion; - static void bdrv_co_io_em_complete(void *opaque, int ret) { CoroutineIOCompletion *co = opaque; co->ret = ret; - qemu_coroutine_enter(co->coroutine, NULL); + + if (!co->bypass) { + qemu_coroutine_enter(co->coroutine, NULL); + } else { + BlockDriverAIOCBCoroutine *acb = bdrv_get_aio_co(co); + + acb->common.cb(acb->common.opaque, ret); + if (acb->done) { + *acb->done = true; + } + qemu_aio_release(acb); + + if (co->bounced_iov) { + qemu_iovec_destroy(co->bounced_iov); + g_free(co->bounced_iov); + } + } } static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num, @@ -4853,21 +4924,31 @@ static int coroutine_fn bdrv_co_io_em(BlockDriverState *bs, int64_t sector_num, CoroutineIOCompletion co = { .coroutine = qemu_coroutine_self(), }; + CoroutineIOCompletion *pco = &co; BlockDriverAIOCB *acb; + if (qemu_coroutine_bypassed(co.coroutine)) { + pco = bdrv_get_co_io_comp((BlockDriverAIOCBCoroutine *) + qemu_coroutine_get_var()); + pco->bypass = true; + } + if (is_write) { acb = bs->drv->bdrv_aio_writev(bs, sector_num, iov, nb_sectors, - bdrv_co_io_em_complete, &co); + bdrv_co_io_em_complete, pco); } else { acb = bs->drv->bdrv_aio_readv(bs, sector_num, iov, nb_sectors, - bdrv_co_io_em_complete, &co); + bdrv_co_io_em_complete, pco); } trace_bdrv_co_io_em(bs, sector_num, nb_sectors, is_write, acb); if (!acb) { return -EIO; } - qemu_coroutine_yield(); + + if (!pco->bypass) { + qemu_coroutine_yield(); + } return co.ret; } -- 1.7.9.5