Upstream NBD protocol recently added the ability to efficiently write zeroes without having to send the zeroes over the wire, along with a flag to control whether the client wants a hole.
The generic block code takes care of falling back to the obvious write of lots of zeroes if we return -ENOTSUP because the server does not have WRITE_ZEROES. Ideally, since NBD_CMD_WRITE_ZEROES does not involve any data over the wire, we want to support transactions that are much larger than the normal 32M limit imposed on NBD_CMD_WRITE. But the server may still have a limit smaller than UINT_MAX, so until experimental NBD protocol additions for advertising various command sizes is finalized (see [1], [2]), for now we just stick to the same limits as normal writes. [1] https://github.com/yoe/nbd/blob/extension-info/doc/proto.md [2] https://sourceforge.net/p/nbd/mailman/message/35081223/ Signed-off-by: Eric Blake <ebl...@redhat.com> --- v5: enhance commit message v4: rebase to byte-based limits v3: rebase, tell block layer about our support --- block/nbd-client.h | 2 ++ block/nbd-client.c | 35 +++++++++++++++++++++++++++++++++++ block/nbd.c | 4 ++++ 3 files changed, 41 insertions(+) diff --git a/block/nbd-client.h b/block/nbd-client.h index 044aca4..2cfe377 100644 --- a/block/nbd-client.h +++ b/block/nbd-client.h @@ -48,6 +48,8 @@ int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count); int nbd_client_co_flush(BlockDriverState *bs); int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); +int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, + int count, BdrvRequestFlags flags); int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); diff --git a/block/nbd-client.c b/block/nbd-client.c index 7e9c3ec..104ba2f 100644 --- a/block/nbd-client.c +++ b/block/nbd-client.c @@ -275,6 +275,41 @@ int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset, return -reply.error; } +int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, + int count, BdrvRequestFlags flags) +{ + ssize_t ret; + NbdClientSession *client = nbd_get_client_session(bs); + struct nbd_request request = { + .type = NBD_CMD_WRITE_ZEROES, + .from = offset, + .len = count, + }; + struct nbd_reply reply; + + if (!(client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES)) { + return -ENOTSUP; + } + + if (flags & BDRV_REQ_FUA) { + assert(client->nbdflags & NBD_FLAG_SEND_FUA); + request.flags |= NBD_CMD_FLAG_FUA; + } + if (!(flags & BDRV_REQ_MAY_UNMAP)) { + request.flags |= NBD_CMD_FLAG_NO_HOLE; + } + + nbd_coroutine_start(client, &request); + ret = nbd_co_send_request(bs, &request, NULL); + if (ret < 0) { + reply.error = -ret; + } else { + nbd_co_receive_reply(client, &request, &reply, NULL); + } + nbd_coroutine_end(client, &request); + return -reply.error; +} + int nbd_client_co_flush(BlockDriverState *bs) { NbdClientSession *client = nbd_get_client_session(bs); diff --git a/block/nbd.c b/block/nbd.c index 8d57220..049d1bd 100644 --- a/block/nbd.c +++ b/block/nbd.c @@ -357,6 +357,7 @@ static int nbd_co_flush(BlockDriverState *bs) static void nbd_refresh_limits(BlockDriverState *bs, Error **errp) { bs->bl.max_pdiscard = NBD_MAX_BUFFER_SIZE; + bs->bl.max_pwrite_zeroes = NBD_MAX_BUFFER_SIZE; bs->bl.max_transfer = NBD_MAX_BUFFER_SIZE; } @@ -440,6 +441,7 @@ static BlockDriver bdrv_nbd = { .bdrv_file_open = nbd_open, .bdrv_co_preadv = nbd_client_co_preadv, .bdrv_co_pwritev = nbd_client_co_pwritev, + .bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes, .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_pdiscard = nbd_client_co_pdiscard, @@ -458,6 +460,7 @@ static BlockDriver bdrv_nbd_tcp = { .bdrv_file_open = nbd_open, .bdrv_co_preadv = nbd_client_co_preadv, .bdrv_co_pwritev = nbd_client_co_pwritev, + .bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes, .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_pdiscard = nbd_client_co_pdiscard, @@ -476,6 +479,7 @@ static BlockDriver bdrv_nbd_unix = { .bdrv_file_open = nbd_open, .bdrv_co_preadv = nbd_client_co_preadv, .bdrv_co_pwritev = nbd_client_co_pwritev, + .bdrv_co_pwrite_zeroes = nbd_client_co_pwrite_zeroes, .bdrv_close = nbd_close, .bdrv_co_flush_to_os = nbd_co_flush, .bdrv_co_pdiscard = nbd_client_co_pdiscard, -- 2.5.5