Re: [PATCH v3 1/2] block/file-posix: Optimize for macOS

2021-04-01 Thread Stefan Hajnoczi
On Thu, Mar 11, 2021 at 12:39:15AM +0900, Akihiko Odaki wrote:
> @@ -1586,6 +1589,7 @@ out:
>  }
>  }
>  
> +G_GNUC_UNUSED

Why isn't translate_err() used in the F_PUNCHHOLE case below?

If you really want to avoid using it on macOS, please add a #if with the
necessary conditions here so it's clear when this translate_err() is
needed.

> @@ -514,7 +515,8 @@ static int raw_probe_blocksizes(BlockDriverState *bs, 
> BlockSizes *bsz)
>  return ret;
>  }
>  
> -if (!QEMU_IS_ALIGNED(s->offset, MAX(bsz->log, bsz->phys))) {
> +size = MAX(bsz->log, bsz->phys);
> +if (size && !QEMU_IS_ALIGNED(s->offset, size)) {
>  return -ENOTSUP;
>  }
>  

This patch changes the semantics of bdrv_probe_blocksizes(). It used to
return -ENOTSUP when phys/log weren't available. Now it returns 0 and
the fields are 0. Please update the bdrv_probe_blocksizes doc comment in
include/block/block_int.h to mention phys and log, as well as that
fields can be set to 0 (or -1 in the case of discard_granularity).


signature.asc
Description: PGP signature


[PATCH v3 1/2] block/file-posix: Optimize for macOS

2021-03-10 Thread Akihiko Odaki
This commit introduces "punch hole" operation and optimizes transfer
block size for macOS.

This commit introduces two additional members,
discard_granularity and opt_io to BlockSizes type in
include/block/block.h. Also, the members of the type are now
optional. Set -1 to discard_granularity and 0 to other members
for the default values.

Thanks to Konstantin Nazarov for detailed analysis of a flaw in an
old version of this change:
https://gist.github.com/akihikodaki/87df4149e7ca87f18dc56807ec5a1bc5#gistcomment-3654667

Signed-off-by: Akihiko Odaki 
---
 block/file-posix.c| 40 ++--
 block/nvme.c  |  2 ++
 block/raw-format.c|  4 +++-
 hw/block/block.c  | 12 ++--
 include/block/block.h |  2 ++
 5 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/block/file-posix.c b/block/file-posix.c
index 05079b40cae..21bdaf969c5 100644
--- a/block/file-posix.c
+++ b/block/file-posix.c
@@ -44,6 +44,7 @@
 #if defined(__APPLE__) && (__MACH__)
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -1292,6 +1293,8 @@ static int hdev_probe_blocksizes(BlockDriverState *bs, 
BlockSizes *bsz)
 if (check_for_dasd(s->fd) < 0) {
 return -ENOTSUP;
 }
+bsz->opt_io = 0;
+bsz->discard_granularity = -1;
 ret = probe_logical_blocksize(s->fd, >log);
 if (ret < 0) {
 return ret;
@@ -1586,6 +1589,7 @@ out:
 }
 }
 
+G_GNUC_UNUSED
 static int translate_err(int err)
 {
 if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP ||
@@ -1795,16 +1799,27 @@ static int handle_aiocb_discard(void *opaque)
 }
 } while (errno == EINTR);
 
-ret = -errno;
+ret = translate_err(-errno);
 #endif
 } else {
 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
 ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
aiocb->aio_offset, aiocb->aio_nbytes);
+ret = translate_err(-errno);
+#elif defined(__APPLE__) && (__MACH__)
+fpunchhole_t fpunchhole;
+fpunchhole.fp_flags = 0;
+fpunchhole.reserved = 0;
+fpunchhole.fp_offset = aiocb->aio_offset;
+fpunchhole.fp_length = aiocb->aio_nbytes;
+if (fcntl(s->fd, F_PUNCHHOLE, ) == -1) {
+ret = errno == ENODEV ? -ENOTSUP : -errno;
+} else {
+ret = 0;
+}
 #endif
 }
 
-ret = translate_err(ret);
 if (ret == -ENOTSUP) {
 s->has_discard = false;
 }
@@ -2113,6 +2128,26 @@ static int raw_co_flush_to_disk(BlockDriverState *bs)
 return raw_thread_pool_submit(bs, handle_aiocb_flush, );
 }
 
+static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
+{
+#if defined(__APPLE__) && (__MACH__)
+BDRVRawState *s = bs->opaque;
+struct statfs buf;
+
+if (!fstatfs(s->fd, )) {
+bsz->phys = 0;
+bsz->log = 0;
+bsz->opt_io = buf.f_iosize;
+bsz->discard_granularity = buf.f_bsize;
+return 0;
+}
+
+return -errno;
+#else
+return -ENOTSUP;
+#endif
+}
+
 static void raw_aio_attach_aio_context(BlockDriverState *bs,
AioContext *new_context)
 {
@@ -3247,6 +3282,7 @@ BlockDriver bdrv_file = {
 .bdrv_refresh_limits = raw_refresh_limits,
 .bdrv_io_plug = raw_aio_plug,
 .bdrv_io_unplug = raw_aio_unplug,
+.bdrv_probe_blocksizes = raw_probe_blocksizes,
 .bdrv_attach_aio_context = raw_aio_attach_aio_context,
 
 .bdrv_co_truncate = raw_co_truncate,
diff --git a/block/nvme.c b/block/nvme.c
index 2b5421e7aa6..1845d07577b 100644
--- a/block/nvme.c
+++ b/block/nvme.c
@@ -989,6 +989,8 @@ static int nvme_probe_blocksizes(BlockDriverState *bs, 
BlockSizes *bsz)
 uint32_t blocksize = nvme_get_blocksize(bs);
 bsz->phys = blocksize;
 bsz->log = blocksize;
+bsz->opt_io = 0;
+bsz->discard_granularity = -1;
 return 0;
 }
 
diff --git a/block/raw-format.c b/block/raw-format.c
index 7717578ed6a..847df11f2ae 100644
--- a/block/raw-format.c
+++ b/block/raw-format.c
@@ -507,6 +507,7 @@ static int raw_probe(const uint8_t *buf, int buf_size, 
const char *filename)
 static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
 {
 BDRVRawState *s = bs->opaque;
+uint32_t size;
 int ret;
 
 ret = bdrv_probe_blocksizes(bs->file->bs, bsz);
@@ -514,7 +515,8 @@ static int raw_probe_blocksizes(BlockDriverState *bs, 
BlockSizes *bsz)
 return ret;
 }
 
-if (!QEMU_IS_ALIGNED(s->offset, MAX(bsz->log, bsz->phys))) {
+size = MAX(bsz->log, bsz->phys);
+if (size && !QEMU_IS_ALIGNED(s->offset, size)) {
 return -ENOTSUP;
 }
 
diff --git a/hw/block/block.c b/hw/block/block.c
index 1e34573da71..c907e5a7722 100644
--- a/hw/block/block.c
+++ b/hw/block/block.c
@@ -70,19 +70,27 @@ bool blkconf_blocksizes(BlockConf *conf, Error **errp)
 backend_ret = blk_probe_blocksizes(blk, );
 /* fill in detected values if they are not defined via qemu command