This commit introduces "punch hole" operation and optimizes transfer block size for macOS.
This commit introduces two additional members, discard_granularity and opt_io to BlockSizes type in include/block/block.h. Also, the members of the type are now optional. Set -1 to discard_granularity and 0 to other members for the default values. Thanks to Konstantin Nazarov for detailed analysis of a flaw in an old version of this change: https://gist.github.com/akihikodaki/87df4149e7ca87f18dc56807ec5a1bc5#gistcomment-3654667 Signed-off-by: Akihiko Odaki <akihiko.od...@gmail.com> --- block/file-posix.c | 40 ++++++++++++++++++++++++++++++++++++++-- block/nvme.c | 2 ++ block/raw-format.c | 4 +++- hw/block/block.c | 12 ++++++++++-- include/block/block.h | 2 ++ 5 files changed, 55 insertions(+), 5 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index 05079b40cae..21bdaf969c5 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -44,6 +44,7 @@ #if defined(__APPLE__) && (__MACH__) #include <paths.h> #include <sys/param.h> +#include <sys/mount.h> #include <IOKit/IOKitLib.h> #include <IOKit/IOBSD.h> #include <IOKit/storage/IOMediaBSDClient.h> @@ -1292,6 +1293,8 @@ static int hdev_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) if (check_for_dasd(s->fd) < 0) { return -ENOTSUP; } + bsz->opt_io = 0; + bsz->discard_granularity = -1; ret = probe_logical_blocksize(s->fd, &bsz->log); if (ret < 0) { return ret; @@ -1586,6 +1589,7 @@ out: } } +G_GNUC_UNUSED static int translate_err(int err) { if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP || @@ -1795,16 +1799,27 @@ static int handle_aiocb_discard(void *opaque) } } while (errno == EINTR); - ret = -errno; + ret = translate_err(-errno); #endif } else { #ifdef CONFIG_FALLOCATE_PUNCH_HOLE ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, aiocb->aio_offset, aiocb->aio_nbytes); + ret = translate_err(-errno); +#elif defined(__APPLE__) && (__MACH__) + fpunchhole_t fpunchhole; + fpunchhole.fp_flags = 0; + fpunchhole.reserved = 0; + fpunchhole.fp_offset = aiocb->aio_offset; + fpunchhole.fp_length = aiocb->aio_nbytes; + if (fcntl(s->fd, F_PUNCHHOLE, &fpunchhole) == -1) { + ret = errno == ENODEV ? -ENOTSUP : -errno; + } else { + ret = 0; + } #endif } - ret = translate_err(ret); if (ret == -ENOTSUP) { s->has_discard = false; } @@ -2113,6 +2128,26 @@ static int raw_co_flush_to_disk(BlockDriverState *bs) return raw_thread_pool_submit(bs, handle_aiocb_flush, &acb); } +static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) +{ +#if defined(__APPLE__) && (__MACH__) + BDRVRawState *s = bs->opaque; + struct statfs buf; + + if (!fstatfs(s->fd, &buf)) { + bsz->phys = 0; + bsz->log = 0; + bsz->opt_io = buf.f_iosize; + bsz->discard_granularity = buf.f_bsize; + return 0; + } + + return -errno; +#else + return -ENOTSUP; +#endif +} + static void raw_aio_attach_aio_context(BlockDriverState *bs, AioContext *new_context) { @@ -3247,6 +3282,7 @@ BlockDriver bdrv_file = { .bdrv_refresh_limits = raw_refresh_limits, .bdrv_io_plug = raw_aio_plug, .bdrv_io_unplug = raw_aio_unplug, + .bdrv_probe_blocksizes = raw_probe_blocksizes, .bdrv_attach_aio_context = raw_aio_attach_aio_context, .bdrv_co_truncate = raw_co_truncate, diff --git a/block/nvme.c b/block/nvme.c index 2b5421e7aa6..1845d07577b 100644 --- a/block/nvme.c +++ b/block/nvme.c @@ -989,6 +989,8 @@ static int nvme_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) uint32_t blocksize = nvme_get_blocksize(bs); bsz->phys = blocksize; bsz->log = blocksize; + bsz->opt_io = 0; + bsz->discard_granularity = -1; return 0; } diff --git a/block/raw-format.c b/block/raw-format.c index 7717578ed6a..847df11f2ae 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -507,6 +507,7 @@ static int raw_probe(const uint8_t *buf, int buf_size, const char *filename) static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) { BDRVRawState *s = bs->opaque; + uint32_t size; int ret; ret = bdrv_probe_blocksizes(bs->file->bs, bsz); @@ -514,7 +515,8 @@ static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) return ret; } - if (!QEMU_IS_ALIGNED(s->offset, MAX(bsz->log, bsz->phys))) { + size = MAX(bsz->log, bsz->phys); + if (size && !QEMU_IS_ALIGNED(s->offset, size)) { return -ENOTSUP; } diff --git a/hw/block/block.c b/hw/block/block.c index 1e34573da71..c907e5a7722 100644 --- a/hw/block/block.c +++ b/hw/block/block.c @@ -70,19 +70,27 @@ bool blkconf_blocksizes(BlockConf *conf, Error **errp) backend_ret = blk_probe_blocksizes(blk, &blocksizes); /* fill in detected values if they are not defined via qemu command line */ if (!conf->physical_block_size) { - if (!backend_ret) { + if (!backend_ret && blocksizes.phys) { conf->physical_block_size = blocksizes.phys; } else { conf->physical_block_size = BDRV_SECTOR_SIZE; } } if (!conf->logical_block_size) { - if (!backend_ret) { + if (!backend_ret && blocksizes.log) { conf->logical_block_size = blocksizes.log; } else { conf->logical_block_size = BDRV_SECTOR_SIZE; } } + if (!backend_ret) { + if (!conf->opt_io_size) { + conf->opt_io_size = blocksizes.opt_io; + } + if (conf->discard_granularity == -1) { + conf->discard_granularity = blocksizes.discard_granularity; + } + } if (conf->logical_block_size > conf->physical_block_size) { error_setg(errp, diff --git a/include/block/block.h b/include/block/block.h index b3f6e509d49..d12471a6cc4 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -93,6 +93,8 @@ typedef enum { typedef struct BlockSizes { uint32_t phys; uint32_t log; + uint32_t discard_granularity; + uint32_t opt_io; } BlockSizes; typedef struct HDGeometry { -- 2.24.3 (Apple Git-128)