Upstream NBD protocol recently added the ability to efficiently write zeroes without having to send the zeroes over the wire, along with a flag to control whether the client wants a hole.
The generic block code takes care of falling back to the obvious write of lots of zeroes if we return -ENOTSUP because the server does not have WRITE_ZEROES. Ideally, since NBD_CMD_WRITE_ZEROES does not involve any data over the wire, we want to support transactions that are much larger than the normal 32M limit imposed on NBD_CMD_WRITE. But the server may still have a limit smaller than UINT_MAX, so until experimental NBD protocol additions for advertising various command sizes is finalized (see [1], [2]), for now we just stick to the same limits as normal writes. [1] https://github.com/yoe/nbd/blob/extension-info/doc/proto.md [2] https://sourceforge.net/p/nbd/mailman/message/35081223/ Signed-off-by: Eric Blake <ebl...@redhat.com> --- v6: rebase v5: enhance commit message v4: rebase to byte-based limits v3: rebase, tell block layer about our support --- block/nbd-client.h | 2 ++ block/nbd-client.c | 35 +++++++++++++++++++++++++++++++++++ block/nbd.c | 4 ++++ 3 files changed, 41 insertions(+) diff --git a/block/nbd-client.h b/block/nbd-client.h index 78e8e57..e51df22 100644 --- a/block/nbd-client.h +++ b/block/nbd-client.h @@ -48,6 +48,8 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count); int nbd_client_co_flush(BlockDriverState *bs); int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); +int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, + int count, BdrvRequestFlags flags); int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); diff --git a/block/nbd-client.c b/block/nbd-client.c index 8e89add..31db557 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -275,6 +275,41 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, return -reply.error; } +int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, + int count, BdrvRequestFlags flags) +{ + ssize_t ret; + NBDClientSession *client = nbd_get_client_session(bs); + NBDRequest request = { + .type = NBD_CMD_WRITE_ZEROES, + .from = offset, + .len = count, + }; + NBDReply reply; + + if (!(client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES)) { + return -ENOTSUP; + } + + if (flags & BDRV_REQ_FUA) { + assert(client->nbdflags & NBD_FLAG_SEND_FUA); + request.flags |= NBD_CMD_FLAG_FUA; + } + if (!(flags & BDRV_REQ_MAY_UNMAP)) { + request.flags |= NBD_CMD_FLAG_NO_HOLE; + } + + nbd_coroutine_start(client, &request); + ret = nbd_co_send_request(bs, &request, NULL); + if (ret < 0) { + reply.error = -ret; + } else { + nbd_co_receive_reply(client, &request, &reply, NULL); + } + nbd_coroutine_end(client, &request); + return -reply.error; +} + int nbd_client_co_flush(BlockDriverState *bs) { NBDClientSession *client = nbd_get_client_session(bs); diff --git a/block/nbd.c b/block/nbd.c index e227490..6c7bbc8 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -403,6 +403,7 @@ static int nbd_co_flush(BlockDriverState *bs) static void nbd_refresh_limits(BlockDriverState *bs, Error **errp) { bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE; + bs->bl.max_pwrite_zeroes = NBD_MAX_BUFFER_SIZE; bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE; } @@ -491,6 +492,7 @@ static BlockDriver bdrv_nbd = { .bdrv_file_open = nbd_open, .bdrv_co_preadv = nbd_client_co_preadv, .bdrv_co_pwritev = nbd_client_co_pwritev, + .bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes, .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_pdiscard = nbd_client_co_pdiscard, @@ -509,6 +511,7 @@ static BlockDriver bdrv_nbd_tcp = { .bdrv_file_open = nbd_open, .bdrv_co_preadv = nbd_client_co_preadv, .bdrv_co_pwritev = nbd_client_co_pwritev, + .bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes, .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_pdiscard = nbd_client_co_pdiscard, @@ -527,6 +530,7 @@ static BlockDriver bdrv_nbd_unix = { .bdrv_file_open = nbd_open, .bdrv_co_preadv = nbd_client_co_preadv, .bdrv_co_pwritev = nbd_client_co_pwritev, + .bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes, .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_pdiscard = nbd_client_co_pdiscard, -- 2.7.4