δΊ 2013/12/7 1:22, Kevin Wolf ει: > bs->buffer_alignment is set by the device emulation and contains the > logical block size of the guest device. This isn't something that the > block layer should know, and even less something to use for determining > the right alignment of buffers to be used for the host. > > The new function bdrv_opt_mem_align() allows for hooks in a BlockDriver > so that it can tell the qemu block layer the optimal alignment to be > used so that no bounce buffer must be used in the driver. > > This patch may change the buffer alignment from 4k to 512 for all > callers that used qemu_blockalign() with the top-level image format > BlockDriverState. The value was never propagated to other levels in the > tree, so in particular raw-posix never required anything else than 512. > > While on disks with 4k sectors direct I/O requires a 4k alignment, > memory may still be okay when aligned to 512 byte boundaries. This is > what must have happened in practice, because otherwise this would > already have failed earlier. Therefore I don't expect regressions even > with this intermediate state. Later, raw-posix can implement the hook > and expose a different memory alignment requirement. > > Signed-off-by: Kevin Wolf <kw...@redhat.com> > --- > block.c | 32 +++++++++++++++++++++++++++++--- > include/block/block.h | 1 + > include/block/block_int.h | 4 ++++ > 3 files changed, 34 insertions(+), 3 deletions(-) > > diff --git a/block.c b/block.c > index 613201b..669793b 100644 > --- a/block.c > +++ b/block.c > @@ -213,6 +213,31 @@ static void bdrv_io_limits_intercept(BlockDriverState > *bs, > qemu_co_queue_next(&bs->throttled_reqs[is_write]); > } > > +size_t bdrv_opt_mem_align(BlockDriverState *bs) > +{ > + size_t alignment; > + > + if (!bs || !bs->drv) { > + /* 4k should be on the safe side */ > + return 4096; > + } > + > + if (bs->drv->bdrv_opt_mem_align) { > + return bs->drv->bdrv_opt_mem_align(bs); > + } > + > + if (bs->file) { > + alignment = bdrv_opt_mem_align(bs->file); > + } else { > + alignment = 512; > + } > + > + if (bs->backing_hd) { > + alignment = MAX(alignment, bdrv_opt_mem_align(bs->backing_hd)); > + }
Maybe I didn't understand the commit message correctly, does this code intend to get MAX alignment value in a chain? For example: base(4096)->mid(512)->top(1024) results: 4096 The condition to traver the backing files seems complex. > + return alignment; > +} > + > /* check if the path starts with "<protocol>:" */ > static int path_has_protocol(const char *path) > { > @@ -4335,7 +4360,7 @@ void bdrv_set_buffer_alignment(BlockDriverState *bs, > int align) > > void *qemu_blockalign(BlockDriverState *bs, size_t size) > { > - return qemu_memalign((bs && bs->buffer_alignment) ? bs->buffer_alignment > : 512, size); > + return qemu_memalign(bdrv_opt_mem_align(bs), size); > } > > /* > @@ -4344,12 +4369,13 @@ void *qemu_blockalign(BlockDriverState *bs, size_t > size) > bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) > { > int i; > + size_t alignment = bdrv_opt_mem_align(bs); > > for (i = 0; i < qiov->niov; i++) { > - if ((uintptr_t) qiov->iov[i].iov_base % bs->buffer_alignment) { > + if ((uintptr_t) qiov->iov[i].iov_base % alignment) { > return false; > } > - if (qiov->iov[i].iov_len % bs->buffer_alignment) { > + if (qiov->iov[i].iov_len % alignment) { > return false; > } > } > diff --git a/include/block/block.h b/include/block/block.h > index 3560deb..d262c0e 100644 > --- a/include/block/block.h > +++ b/include/block/block.h > @@ -383,6 +383,7 @@ void bdrv_img_create(const char *filename, const char > *fmt, > char *options, uint64_t img_size, int flags, > Error **errp, bool quiet); > > +size_t bdrv_opt_mem_align(BlockDriverState *bs); > void bdrv_set_buffer_alignment(BlockDriverState *bs, int align); > void *qemu_blockalign(BlockDriverState *bs, size_t size); > bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov); > diff --git a/include/block/block_int.h b/include/block/block_int.h > index 1666066..6a84f83 100644 > --- a/include/block/block_int.h > +++ b/include/block/block_int.h > @@ -119,6 +119,10 @@ struct BlockDriver { > int64_t sector_num, int nb_sectors, > BlockDriverCompletionFunc *cb, void *opaque); > > + /* Returns the alignment in bytes that is required so that no bounce > buffer > + * is required throughout the stack */ > + int (*bdrv_opt_mem_align)(BlockDriverState *bs); > + > int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs, > int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); > int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs, >