Re: [Qemu-block] [Qemu-devel] [RFC PATCH 17/18] nbd: Implement NBD_CMD_WRITE_ZEROES on server
On Fri, Apr 08, 2016 at 04:05:57PM -0600, Eric Blake wrote: > RFC because there is still discussion on the NBD list about > adding an NBD_OPT_ to let the client suggest server defaults > related to scanning for zeroes during NBD_CMD_WRITE, which may > tweak this patch. > > Upstream NBD protocol recently added the ability to efficiently > write zeroes without having to send the zeroes over the wire, > along with a flag to control whether the client wants a hole. > > Signed-off-by: Eric Blake> --- > include/block/nbd.h | 5 - > nbd/server.c| 63 > ++--- > 2 files changed, 64 insertions(+), 4 deletions(-) > > diff --git a/include/block/nbd.h b/include/block/nbd.h > index 4c57754..a1d955c 100644 > --- a/include/block/nbd.h > +++ b/include/block/nbd.h > @@ -70,6 +70,7 @@ typedef struct nbd_reply nbd_reply; > #define NBD_FLAG_SEND_FUA (1 << 3)/* Send FUA (Force Unit > Access) */ > #define NBD_FLAG_ROTATIONAL (1 << 4)/* Use elevator algorithm - > rotational media */ > #define NBD_FLAG_SEND_TRIM (1 << 5)/* Send TRIM (discard) */ > +#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */ > #define NBD_FLAG_SEND_CLOSE (1 << 8)/* Send CLOSE */ > > /* New-style handshake (global) flags, sent from server to client, and > @@ -92,7 +93,8 @@ typedef struct nbd_reply nbd_reply; > #define NBD_REP_ERR_UNKNOWN ((UINT32_C(1) << 31) | 6) /* Export unknown > */ > > /* Request flags, sent from client to server during transmission phase */ > -#define NBD_CMD_FLAG_FUA(1 << 0) > +#define NBD_CMD_FLAG_FUA(1 << 0) /* 'force unit access' during write > */ > +#define NBD_CMD_FLAG_NO_HOLE(1 << 1) /* don't punch hole on zero run */ > > /* Supported request types */ > enum { > @@ -101,6 +103,7 @@ enum { > NBD_CMD_DISC = 2, > NBD_CMD_FLUSH = 3, > NBD_CMD_TRIM = 4, > +NBD_CMD_WRITE_ZEROES = 5, > NBD_CMD_CLOSE = 7, > }; > > diff --git a/nbd/server.c b/nbd/server.c > index 2a6eaf2..09af915 100644 > --- a/nbd/server.c > +++ b/nbd/server.c > @@ -625,7 +625,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData > *data) > int rc; > const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | >NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA | > - NBD_FLAG_SEND_CLOSE); > + NBD_FLAG_SEND_CLOSE | > + NBD_FLAG_SEND_WRITE_ZEROES); > bool oldStyle; > size_t len; > > @@ -1088,7 +1089,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, > struct nbd_request *reque > goto out; > } > > -if (request->flags & ~NBD_CMD_FLAG_FUA) { > +if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) { > LOG("unsupported flags (got 0x%x)", request->flags); > return -EINVAL; > } > @@ -1102,7 +1103,13 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, > struct nbd_request *reque > TRACE("Decoding type"); > > command = request->type; > -if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) { > +if (request->flags & NBD_CMD_FLAG_NO_HOLE && > +!(command == NBD_CMD_WRITE || command == NBD_CMD_WRITE_ZEROES)) { > +LOG("NO_HOLE flag valid only with write operation"); > +return -EINVAL; > +} > +if (command == NBD_CMD_READ || command == NBD_CMD_WRITE || > +command == NBD_CMD_WRITE_ZEROES) { > if (request->len > NBD_MAX_BUFFER_SIZE) { > LOG("len (%" PRIu32" ) is larger than max len (%u)", > request->len, NBD_MAX_BUFFER_SIZE); > @@ -1143,6 +1150,7 @@ static void nbd_trip(void *opaque) > struct nbd_reply reply; > ssize_t ret; > uint32_t command; > +int flags; > > TRACE("Reading request."); > if (client->closing) { > @@ -1221,6 +1229,9 @@ static void nbd_trip(void *opaque) > > TRACE("Writing to device"); > > +/* FIXME: if the client passes NBD_CMD_FLAG_NO_HOLE, can we > + * make that override a server that is set to look for > + * holes? */ > ret = blk_write(exp->blk, > (request.from + exp->dev_offset) / BDRV_SECTOR_SIZE, > req->data, request.len / BDRV_SECTOR_SIZE); > @@ -1243,6 +1254,52 @@ static void nbd_trip(void *opaque) > goto out; > } > break; > +case NBD_CMD_WRITE_ZEROES: > +TRACE("Request type is WRITE_ZEROES"); > + > +if (exp->nbdflags & NBD_FLAG_READ_ONLY) { > +TRACE("Server is read-only, return error"); > +reply.error = EROFS; > +goto error_reply; > +} > + > +TRACE("Writing to device"); > + > +flags = 0; > +if (request.flags & NBD_CMD_FLAG_FUA) { > +flags |= BDRV_REQ_FUA; > +} > +
Re: [Qemu-block] [Qemu-devel] [RFC PATCH 17/18] nbd: Implement NBD_CMD_WRITE_ZEROES on server
On 8 Apr 2016, at 23:05, Eric Blakewrote: > RFC because there is still discussion on the NBD list about > adding an NBD_OPT_ to let the client suggest server defaults > related to scanning for zeroes during NBD_CMD_WRITE, which may > tweak this patch. > > Upstream NBD protocol recently added the ability to efficiently > write zeroes without having to send the zeroes over the wire, > along with a flag to control whether the client wants a hole. > > Signed-off-by: Eric Blake > --- > include/block/nbd.h | 5 - > nbd/server.c| 63 ++--- > 2 files changed, 64 insertions(+), 4 deletions(-) > > diff --git a/include/block/nbd.h b/include/block/nbd.h > index 4c57754..a1d955c 100644 > --- a/include/block/nbd.h > +++ b/include/block/nbd.h > @@ -70,6 +70,7 @@ typedef struct nbd_reply nbd_reply; > #define NBD_FLAG_SEND_FUA (1 << 3)/* Send FUA (Force Unit > Access) */ > #define NBD_FLAG_ROTATIONAL (1 << 4)/* Use elevator algorithm - > rotational media */ > #define NBD_FLAG_SEND_TRIM (1 << 5)/* Send TRIM (discard) */ > +#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */ > #define NBD_FLAG_SEND_CLOSE (1 << 8)/* Send CLOSE */ > > /* New-style handshake (global) flags, sent from server to client, and > @@ -92,7 +93,8 @@ typedef struct nbd_reply nbd_reply; > #define NBD_REP_ERR_UNKNOWN ((UINT32_C(1) << 31) | 6) /* Export unknown */ > > /* Request flags, sent from client to server during transmission phase */ > -#define NBD_CMD_FLAG_FUA(1 << 0) > +#define NBD_CMD_FLAG_FUA(1 << 0) /* 'force unit access' during write > */ > +#define NBD_CMD_FLAG_NO_HOLE(1 << 1) /* don't punch hole on zero run */ > > /* Supported request types */ > enum { > @@ -101,6 +103,7 @@ enum { > NBD_CMD_DISC = 2, > NBD_CMD_FLUSH = 3, > NBD_CMD_TRIM = 4, > +NBD_CMD_WRITE_ZEROES = 5, > NBD_CMD_CLOSE = 7, > }; > > diff --git a/nbd/server.c b/nbd/server.c > index 2a6eaf2..09af915 100644 > --- a/nbd/server.c > +++ b/nbd/server.c > @@ -625,7 +625,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData > *data) > int rc; > const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM | > NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA | > - NBD_FLAG_SEND_CLOSE); > + NBD_FLAG_SEND_CLOSE | > + NBD_FLAG_SEND_WRITE_ZEROES); > bool oldStyle; > size_t len; > > @@ -1088,7 +1089,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, > struct nbd_request *reque > goto out; > } > > -if (request->flags & ~NBD_CMD_FLAG_FUA) { > +if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) { > LOG("unsupported flags (got 0x%x)", request->flags); > return -EINVAL; > } > @@ -1102,7 +1103,13 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, > struct nbd_request *reque > TRACE("Decoding type"); > > command = request->type; > -if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) { > +if (request->flags & NBD_CMD_FLAG_NO_HOLE && > +!(command == NBD_CMD_WRITE || command == NBD_CMD_WRITE_ZEROES)) { > +LOG("NO_HOLE flag valid only with write operation"); > +return -EINVAL; > +} > +if (command == NBD_CMD_READ || command == NBD_CMD_WRITE || > +command == NBD_CMD_WRITE_ZEROES) { > if (request->len > NBD_MAX_BUFFER_SIZE) { > LOG("len (%" PRIu32" ) is larger than max len (%u)", > request->len, NBD_MAX_BUFFER_SIZE); > @@ -1143,6 +1150,7 @@ static void nbd_trip(void *opaque) > struct nbd_reply reply; > ssize_t ret; > uint32_t command; > +int flags; > > TRACE("Reading request."); > if (client->closing) { > @@ -1221,6 +1229,9 @@ static void nbd_trip(void *opaque) > > TRACE("Writing to device"); > > +/* FIXME: if the client passes NBD_CMD_FLAG_NO_HOLE, can we > + * make that override a server that is set to look for > + * holes? */ No, and I reckon that's an error in the spec. There is a 'MUST' which should be a 'SHOULD'. Good luck with any FS which supports dedupe for instance! > ret = blk_write(exp->blk, > (request.from + exp->dev_offset) / BDRV_SECTOR_SIZE, > req->data, request.len / BDRV_SECTOR_SIZE); > @@ -1243,6 +1254,52 @@ static void nbd_trip(void *opaque) > goto out; > } > break; > +case NBD_CMD_WRITE_ZEROES: > +TRACE("Request type is WRITE_ZEROES"); > + > +if (exp->nbdflags & NBD_FLAG_READ_ONLY) { > +TRACE("Server is read-only, return error"); > +reply.error = EROFS; > +goto error_reply; > +} > + > +TRACE("Writing to device"); > + > +flags = 0; > +