Re: [Qemu-block] [Qemu-devel] [RFC PATCH 17/18] nbd: Implement NBD_CMD_WRITE_ZEROES on server

2016-04-09 Thread Pavel Borzenkov
On Fri, Apr 08, 2016 at 04:05:57PM -0600, Eric Blake wrote:
> RFC because there is still discussion on the NBD list about
> adding an NBD_OPT_ to let the client suggest server defaults
> related to scanning for zeroes during NBD_CMD_WRITE, which may
> tweak this patch.
> 
> Upstream NBD protocol recently added the ability to efficiently
> write zeroes without having to send the zeroes over the wire,
> along with a flag to control whether the client wants a hole.
> 
> Signed-off-by: Eric Blake 
> ---
>  include/block/nbd.h |  5 -
>  nbd/server.c| 63 
> ++---
>  2 files changed, 64 insertions(+), 4 deletions(-)
> 
> diff --git a/include/block/nbd.h b/include/block/nbd.h
> index 4c57754..a1d955c 100644
> --- a/include/block/nbd.h
> +++ b/include/block/nbd.h
> @@ -70,6 +70,7 @@ typedef struct nbd_reply nbd_reply;
>  #define NBD_FLAG_SEND_FUA   (1 << 3)/* Send FUA (Force Unit 
> Access) */
>  #define NBD_FLAG_ROTATIONAL (1 << 4)/* Use elevator algorithm - 
> rotational media */
>  #define NBD_FLAG_SEND_TRIM  (1 << 5)/* Send TRIM (discard) */
> +#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */
>  #define NBD_FLAG_SEND_CLOSE (1 << 8)/* Send CLOSE */
> 
>  /* New-style handshake (global) flags, sent from server to client, and
> @@ -92,7 +93,8 @@ typedef struct nbd_reply nbd_reply;
>  #define NBD_REP_ERR_UNKNOWN ((UINT32_C(1) << 31) | 6) /* Export unknown 
> */
> 
>  /* Request flags, sent from client to server during transmission phase */
> -#define NBD_CMD_FLAG_FUA(1 << 0)
> +#define NBD_CMD_FLAG_FUA(1 << 0) /* 'force unit access' during write 
> */
> +#define NBD_CMD_FLAG_NO_HOLE(1 << 1) /* don't punch hole on zero run */
> 
>  /* Supported request types */
>  enum {
> @@ -101,6 +103,7 @@ enum {
>  NBD_CMD_DISC = 2,
>  NBD_CMD_FLUSH = 3,
>  NBD_CMD_TRIM = 4,
> +NBD_CMD_WRITE_ZEROES = 5,
>  NBD_CMD_CLOSE = 7,
>  };
> 
> diff --git a/nbd/server.c b/nbd/server.c
> index 2a6eaf2..09af915 100644
> --- a/nbd/server.c
> +++ b/nbd/server.c
> @@ -625,7 +625,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData 
> *data)
>  int rc;
>  const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
>NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
> -  NBD_FLAG_SEND_CLOSE);
> +  NBD_FLAG_SEND_CLOSE |
> +  NBD_FLAG_SEND_WRITE_ZEROES);
>  bool oldStyle;
>  size_t len;
> 
> @@ -1088,7 +1089,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, 
> struct nbd_request *reque
>  goto out;
>  }
> 
> -if (request->flags & ~NBD_CMD_FLAG_FUA) {
> +if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
>  LOG("unsupported flags (got 0x%x)", request->flags);
>  return -EINVAL;
>  }
> @@ -1102,7 +1103,13 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, 
> struct nbd_request *reque
>  TRACE("Decoding type");
> 
>  command = request->type;
> -if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) {
> +if (request->flags & NBD_CMD_FLAG_NO_HOLE &&
> +!(command == NBD_CMD_WRITE || command == NBD_CMD_WRITE_ZEROES)) {
> +LOG("NO_HOLE flag valid only with write operation");
> +return -EINVAL;
> +}
> +if (command == NBD_CMD_READ || command == NBD_CMD_WRITE ||
> +command == NBD_CMD_WRITE_ZEROES) {
>  if (request->len > NBD_MAX_BUFFER_SIZE) {
>  LOG("len (%" PRIu32" ) is larger than max len (%u)",
>  request->len, NBD_MAX_BUFFER_SIZE);
> @@ -1143,6 +1150,7 @@ static void nbd_trip(void *opaque)
>  struct nbd_reply reply;
>  ssize_t ret;
>  uint32_t command;
> +int flags;
> 
>  TRACE("Reading request.");
>  if (client->closing) {
> @@ -1221,6 +1229,9 @@ static void nbd_trip(void *opaque)
> 
>  TRACE("Writing to device");
> 
> +/* FIXME: if the client passes NBD_CMD_FLAG_NO_HOLE, can we
> + * make that override a server that is set to look for
> + * holes? */
>  ret = blk_write(exp->blk,
>  (request.from + exp->dev_offset) / BDRV_SECTOR_SIZE,
>  req->data, request.len / BDRV_SECTOR_SIZE);
> @@ -1243,6 +1254,52 @@ static void nbd_trip(void *opaque)
>  goto out;
>  }
>  break;
> +case NBD_CMD_WRITE_ZEROES:
> +TRACE("Request type is WRITE_ZEROES");
> +
> +if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
> +TRACE("Server is read-only, return error");
> +reply.error = EROFS;
> +goto error_reply;
> +}
> +
> +TRACE("Writing to device");
> +
> +flags = 0;
> +if (request.flags & NBD_CMD_FLAG_FUA) {
> +flags |= BDRV_REQ_FUA;
> +}
> +

Re: [Qemu-block] [Qemu-devel] [RFC PATCH 17/18] nbd: Implement NBD_CMD_WRITE_ZEROES on server

2016-04-09 Thread Alex Bligh

On 8 Apr 2016, at 23:05, Eric Blake  wrote:

> RFC because there is still discussion on the NBD list about
> adding an NBD_OPT_ to let the client suggest server defaults
> related to scanning for zeroes during NBD_CMD_WRITE, which may
> tweak this patch.
> 
> Upstream NBD protocol recently added the ability to efficiently
> write zeroes without having to send the zeroes over the wire,
> along with a flag to control whether the client wants a hole.
> 
> Signed-off-by: Eric Blake 
> ---
> include/block/nbd.h |  5 -
> nbd/server.c| 63 ++---
> 2 files changed, 64 insertions(+), 4 deletions(-)
> 
> diff --git a/include/block/nbd.h b/include/block/nbd.h
> index 4c57754..a1d955c 100644
> --- a/include/block/nbd.h
> +++ b/include/block/nbd.h
> @@ -70,6 +70,7 @@ typedef struct nbd_reply nbd_reply;
> #define NBD_FLAG_SEND_FUA   (1 << 3)/* Send FUA (Force Unit 
> Access) */
> #define NBD_FLAG_ROTATIONAL (1 << 4)/* Use elevator algorithm - 
> rotational media */
> #define NBD_FLAG_SEND_TRIM  (1 << 5)/* Send TRIM (discard) */
> +#define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */
> #define NBD_FLAG_SEND_CLOSE (1 << 8)/* Send CLOSE */
> 
> /* New-style handshake (global) flags, sent from server to client, and
> @@ -92,7 +93,8 @@ typedef struct nbd_reply nbd_reply;
> #define NBD_REP_ERR_UNKNOWN ((UINT32_C(1) << 31) | 6) /* Export unknown */
> 
> /* Request flags, sent from client to server during transmission phase */
> -#define NBD_CMD_FLAG_FUA(1 << 0)
> +#define NBD_CMD_FLAG_FUA(1 << 0) /* 'force unit access' during write 
> */
> +#define NBD_CMD_FLAG_NO_HOLE(1 << 1) /* don't punch hole on zero run */
> 
> /* Supported request types */
> enum {
> @@ -101,6 +103,7 @@ enum {
> NBD_CMD_DISC = 2,
> NBD_CMD_FLUSH = 3,
> NBD_CMD_TRIM = 4,
> +NBD_CMD_WRITE_ZEROES = 5,
> NBD_CMD_CLOSE = 7,
> };
> 
> diff --git a/nbd/server.c b/nbd/server.c
> index 2a6eaf2..09af915 100644
> --- a/nbd/server.c
> +++ b/nbd/server.c
> @@ -625,7 +625,8 @@ static coroutine_fn int nbd_negotiate(NBDClientNewData 
> *data)
> int rc;
> const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
>   NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
> -  NBD_FLAG_SEND_CLOSE);
> +  NBD_FLAG_SEND_CLOSE |
> +  NBD_FLAG_SEND_WRITE_ZEROES);
> bool oldStyle;
> size_t len;
> 
> @@ -1088,7 +1089,7 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, 
> struct nbd_request *reque
> goto out;
> }
> 
> -if (request->flags & ~NBD_CMD_FLAG_FUA) {
> +if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
> LOG("unsupported flags (got 0x%x)", request->flags);
> return -EINVAL;
> }
> @@ -1102,7 +1103,13 @@ static ssize_t nbd_co_receive_request(NBDRequest *req, 
> struct nbd_request *reque
> TRACE("Decoding type");
> 
> command = request->type;
> -if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) {
> +if (request->flags & NBD_CMD_FLAG_NO_HOLE &&
> +!(command == NBD_CMD_WRITE || command == NBD_CMD_WRITE_ZEROES)) {
> +LOG("NO_HOLE flag valid only with write operation");
> +return -EINVAL;
> +}
> +if (command == NBD_CMD_READ || command == NBD_CMD_WRITE ||
> +command == NBD_CMD_WRITE_ZEROES) {
> if (request->len > NBD_MAX_BUFFER_SIZE) {
> LOG("len (%" PRIu32" ) is larger than max len (%u)",
> request->len, NBD_MAX_BUFFER_SIZE);
> @@ -1143,6 +1150,7 @@ static void nbd_trip(void *opaque)
> struct nbd_reply reply;
> ssize_t ret;
> uint32_t command;
> +int flags;
> 
> TRACE("Reading request.");
> if (client->closing) {
> @@ -1221,6 +1229,9 @@ static void nbd_trip(void *opaque)
> 
> TRACE("Writing to device");
> 
> +/* FIXME: if the client passes NBD_CMD_FLAG_NO_HOLE, can we
> + * make that override a server that is set to look for
> + * holes? */

No, and I reckon that's an error in the spec. There is a 'MUST'
which should be a 'SHOULD'. Good luck with any FS which supports
dedupe for instance!

> ret = blk_write(exp->blk,
> (request.from + exp->dev_offset) / BDRV_SECTOR_SIZE,
> req->data, request.len / BDRV_SECTOR_SIZE);
> @@ -1243,6 +1254,52 @@ static void nbd_trip(void *opaque)
> goto out;
> }
> break;
> +case NBD_CMD_WRITE_ZEROES:
> +TRACE("Request type is WRITE_ZEROES");
> +
> +if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
> +TRACE("Server is read-only, return error");
> +reply.error = EROFS;
> +goto error_reply;
> +}
> +
> +TRACE("Writing to device");
> +
> +flags = 0;
> +