This commit introduces "punch hole" operation and optimizes transfer
block size for macOS.
This commit introduces two additional members,
discard_granularity and opt_io to BlockSizes type in
include/block/block.h. Also, the members of the type are now
optional. Set -1 to discard_granularity and 0 to other members
for the default values.
Thanks to Konstantin Nazarov for detailed analysis of a flaw in an
old version of this change:
https://gist.github.com/akihikodaki/87df4149e7ca87f18dc56807ec5a1bc5#gistcomment-3654667
Signed-off-by: Akihiko Odaki
---
block/file-posix.c| 40 ++--
block/nvme.c | 2 ++
block/raw-format.c| 4 +++-
hw/block/block.c | 12 ++--
include/block/block.h | 2 ++
5 files changed, 55 insertions(+), 5 deletions(-)
diff --git a/block/file-posix.c b/block/file-posix.c
index 05079b40cae..21bdaf969c5 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -44,6 +44,7 @@
#if defined(__APPLE__) && (__MACH__)
#include
#include
+#include
#include
#include
#include
@@ -1292,6 +1293,8 @@ static int hdev_probe_blocksizes(BlockDriverState *bs,
BlockSizes *bsz)
if (check_for_dasd(s->fd) < 0) {
return -ENOTSUP;
}
+bsz->opt_io = 0;
+bsz->discard_granularity = -1;
ret = probe_logical_blocksize(s->fd, >log);
if (ret < 0) {
return ret;
@@ -1586,6 +1589,7 @@ out:
}
}
+G_GNUC_UNUSED
static int translate_err(int err)
{
if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP ||
@@ -1795,16 +1799,27 @@ static int handle_aiocb_discard(void *opaque)
}
} while (errno == EINTR);
-ret = -errno;
+ret = translate_err(-errno);
#endif
} else {
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
aiocb->aio_offset, aiocb->aio_nbytes);
+ret = translate_err(-errno);
+#elif defined(__APPLE__) && (__MACH__)
+fpunchhole_t fpunchhole;
+fpunchhole.fp_flags = 0;
+fpunchhole.reserved = 0;
+fpunchhole.fp_offset = aiocb->aio_offset;
+fpunchhole.fp_length = aiocb->aio_nbytes;
+if (fcntl(s->fd, F_PUNCHHOLE, ) == -1) {
+ret = errno == ENODEV ? -ENOTSUP : -errno;
+} else {
+ret = 0;
+}
#endif
}
-ret = translate_err(ret);
if (ret == -ENOTSUP) {
s->has_discard = false;
}
@@ -2113,6 +2128,26 @@ static int raw_co_flush_to_disk(BlockDriverState *bs)
return raw_thread_pool_submit(bs, handle_aiocb_flush, );
}
+static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
+{
+#if defined(__APPLE__) && (__MACH__)
+BDRVRawState *s = bs->opaque;
+struct statfs buf;
+
+if (!fstatfs(s->fd, )) {
+bsz->phys = 0;
+bsz->log = 0;
+bsz->opt_io = buf.f_iosize;
+bsz->discard_granularity = buf.f_bsize;
+return 0;
+}
+
+return -errno;
+#else
+return -ENOTSUP;
+#endif
+}
+
static void raw_aio_attach_aio_context(BlockDriverState *bs,
AioContext *new_context)
{
@@ -3247,6 +3282,7 @@ BlockDriver bdrv_file = {
.bdrv_refresh_limits = raw_refresh_limits,
.bdrv_io_plug = raw_aio_plug,
.bdrv_io_unplug = raw_aio_unplug,
+.bdrv_probe_blocksizes = raw_probe_blocksizes,
.bdrv_attach_aio_context = raw_aio_attach_aio_context,
.bdrv_co_truncate = raw_co_truncate,
diff --git a/block/nvme.c b/block/nvme.c
index 2b5421e7aa6..1845d07577b 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -989,6 +989,8 @@ static int nvme_probe_blocksizes(BlockDriverState *bs,
BlockSizes *bsz)
uint32_t blocksize = nvme_get_blocksize(bs);
bsz->phys = blocksize;
bsz->log = blocksize;
+bsz->opt_io = 0;
+bsz->discard_granularity = -1;
return 0;
}
diff --git a/block/raw-format.c b/block/raw-format.c
index 7717578ed6a..847df11f2ae 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -507,6 +507,7 @@ static int raw_probe(const uint8_t *buf, int buf_size,
const char *filename)
static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
{
BDRVRawState *s = bs->opaque;
+uint32_t size;
int ret;
ret = bdrv_probe_blocksizes(bs->file->bs, bsz);
@@ -514,7 +515,8 @@ static int raw_probe_blocksizes(BlockDriverState *bs,
BlockSizes *bsz)
return ret;
}
-if (!QEMU_IS_ALIGNED(s->offset, MAX(bsz->log, bsz->phys))) {
+size = MAX(bsz->log, bsz->phys);
+if (size && !QEMU_IS_ALIGNED(s->offset, size)) {
return -ENOTSUP;
}
diff --git a/hw/block/block.c b/hw/block/block.c
index 1e34573da71..c907e5a7722 100644
--- a/hw/block/block.c
+++ b/hw/block/block.c
@@ -70,19 +70,27 @@ bool blkconf_blocksizes(BlockConf *conf, Error **errp)
backend_ret = blk_probe_blocksizes(blk, );
/* fill in detected values if they are not defined via qemu command