Minimal implementation of structured read: one data chunk + finishing
none chunk. No segmentation.
Minimal structured error implementation: no text message.
Support DF flag, but just ignore it, as there is no segmentation any
way.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 include/block/nbd.h |  31 +++++++++++++
 nbd/nbd-internal.h  |   2 +
 nbd/server.c        | 125 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 154 insertions(+), 4 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index 3c65cf8d87..58b864f145 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -70,6 +70,25 @@ struct NBDSimpleReply {
 };
 typedef struct NBDSimpleReply NBDSimpleReply;
 
+typedef struct NBDStructuredReplyChunk {
+    uint32_t magic;
+    uint16_t flags;
+    uint16_t type;
+    uint64_t handle;
+    uint32_t length;
+} QEMU_PACKED NBDStructuredReplyChunk;
+
+typedef struct NBDStructuredRead {
+    NBDStructuredReplyChunk h;
+    uint64_t offset;
+} QEMU_PACKED NBDStructuredRead;
+
+typedef struct NBDStructuredError {
+    NBDStructuredReplyChunk h;
+    uint32_t error;
+    uint16_t message_length;
+} QEMU_PACKED NBDStructuredError;
+
 /* Transmission (export) flags: sent from server to client during handshake,
    but describe what will happen during transmission */
 #define NBD_FLAG_HAS_FLAGS      (1 << 0)        /* Flags are there */
@@ -79,6 +98,7 @@ typedef struct NBDSimpleReply NBDSimpleReply;
 #define NBD_FLAG_ROTATIONAL     (1 << 4)        /* Use elevator algorithm - 
rotational media */
 #define NBD_FLAG_SEND_TRIM      (1 << 5)        /* Send TRIM (discard) */
 #define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6)     /* Send WRITE_ZEROES */
+#define NBD_FLAG_SEND_DF        (1 << 7)        /* Send DF (Do not Fragment) */
 
 /* New-style handshake (global) flags, sent from server to client, and
    control what will happen during handshake phase. */
@@ -106,6 +126,7 @@ typedef struct NBDSimpleReply NBDSimpleReply;
 /* Request flags, sent from client to server during transmission phase */
 #define NBD_CMD_FLAG_FUA        (1 << 0) /* 'force unit access' during write */
 #define NBD_CMD_FLAG_NO_HOLE    (1 << 1) /* don't punch hole on zero run */
+#define NBD_CMD_FLAG_DF         (1 << 2) /* don't fragment structured read */
 
 /* Supported request types */
 enum {
@@ -130,6 +151,16 @@ enum {
  * aren't overflowing some other buffer. */
 #define NBD_MAX_NAME_SIZE 256
 
+/* Structured reply flags */
+#define NBD_REPLY_FLAG_DONE 1
+
+/* Structured reply types */
+#define NBD_REPLY_TYPE_NONE 0
+#define NBD_REPLY_TYPE_OFFSET_DATA 1
+#define NBD_REPLY_TYPE_OFFSET_HOLE 2
+#define NBD_REPLY_TYPE_ERROR ((1 << 15) + 1)
+#define NBD_REPLY_TYPE_ERROR_OFFSET ((1 << 15) + 2)
+
 ssize_t nbd_wr_syncv(QIOChannel *ioc,
                      struct iovec *iov,
                      size_t niov,
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index 49b66b6896..489eeaf887 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -60,6 +60,7 @@
 #define NBD_REPLY_SIZE          (4 + 4 + 8)
 #define NBD_REQUEST_MAGIC       0x25609513
 #define NBD_SIMPLE_REPLY_MAGIC  0x67446698
+#define NBD_STRUCTURED_REPLY_MAGIC 0x668e33ef
 #define NBD_OPTS_MAGIC          0x49484156454F5054LL
 #define NBD_CLIENT_MAGIC        0x0000420281861253LL
 #define NBD_REP_MAGIC           0x0003e889045565a9LL
@@ -81,6 +82,7 @@
 #define NBD_OPT_LIST            (3)
 #define NBD_OPT_PEEK_EXPORT     (4)
 #define NBD_OPT_STARTTLS        (5)
+#define NBD_OPT_STRUCTURED_REPLY (8)
 
 /* NBD errors are based on errno numbers, so there is a 1:1 mapping,
  * but only a limited set of errno values is specified in the protocol.
diff --git a/nbd/server.c b/nbd/server.c
index 4cfc02123b..cb79a93c87 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -100,6 +100,8 @@ struct NBDClient {
     QTAILQ_ENTRY(NBDClient) next;
     int nb_requests;
     bool closing;
+
+    bool structured_reply;
 };
 
 /* That's all folks */
@@ -573,6 +575,16 @@ static int nbd_negotiate_options(NBDClient *client)
                     return ret;
                 }
                 break;
+
+            case NBD_OPT_STRUCTURED_REPLY:
+                client->structured_reply = true;
+                ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
+                                             clientflags);
+                if (ret < 0) {
+                    return ret;
+                }
+                break;
+
             default:
                 if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
                     return -EIO;
@@ -1067,6 +1079,86 @@ static ssize_t nbd_co_send_simple_reply(NBDRequestData 
*req,
     return rc;
 }
 
+static void set_be_chunk(NBDStructuredReplyChunk *chunk, uint16_t flags,
+                         uint16_t type, uint64_t handle, uint32_t length)
+{
+    stl_be_p(&chunk->magic, NBD_STRUCTURED_REPLY_MAGIC);
+    stw_be_p(&chunk->flags, flags);
+    stw_be_p(&chunk->type, type);
+    stq_be_p(&chunk->handle, handle);
+    stl_be_p(&chunk->length, length);
+}
+
+static int nbd_co_send_iov(NBDClient *client, struct iovec *iov, unsigned niov)
+{
+    ssize_t ret;
+    size_t size = iov_size(iov, niov);
+
+    g_assert(qemu_in_coroutine());
+    qemu_co_mutex_lock(&client->send_lock);
+    client->send_coroutine = qemu_coroutine_self();
+    nbd_set_handlers(client);
+
+    ret = nbd_wr_syncv(client->ioc, iov, niov, size, false);
+    if (ret >= 0 && ret != size) {
+        ret = -EIO;
+    }
+
+    client->send_coroutine = NULL;
+    nbd_set_handlers(client);
+    qemu_co_mutex_unlock(&client->send_lock);
+
+    return ret;
+}
+
+static inline int nbd_co_send_buf(NBDClient *client, void *buf, size_t size)
+{
+    struct iovec iov[] = {
+        {.iov_base = buf, .iov_len = size}
+    };
+
+    return nbd_co_send_iov(client, iov, 1);
+}
+
+static int nbd_co_send_structured_read(NBDClient *client, uint64_t handle,
+                                       uint64_t offset, void *data, size_t 
size)
+{
+    NBDStructuredRead chunk;
+
+    struct iovec iov[] = {
+        {.iov_base = &chunk, .iov_len = sizeof(chunk)},
+        {.iov_base = data, .iov_len = size}
+    };
+
+    set_be_chunk(&chunk.h, 0, NBD_REPLY_TYPE_OFFSET_DATA, handle,
+                 sizeof(chunk) - sizeof(chunk.h) + size);
+    stq_be_p(&chunk.offset, offset);
+
+    return nbd_co_send_iov(client, iov, 2);
+}
+
+static int nbd_co_send_structured_error(NBDClient *client, uint64_t handle,
+                                        uint32_t error)
+{
+    NBDStructuredError chunk;
+
+    set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle,
+                 sizeof(chunk) - sizeof(chunk.h));
+    stl_be_p(&chunk.error, error);
+    stw_be_p(&chunk.message_length, 0);
+
+    return nbd_co_send_buf(client, &chunk, sizeof(chunk));
+}
+
+static int nbd_co_send_structured_none(NBDClient *client, uint64_t handle)
+{
+    NBDStructuredReplyChunk chunk;
+
+    set_be_chunk(&chunk, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_NONE, handle, 0);
+
+    return nbd_co_send_buf(client, &chunk, sizeof(chunk));
+}
+
 /* Collect a client request.  Return 0 if request looks valid, -EAGAIN
  * to keep trying the collection, -EIO to drop connection right away,
  * and any other negative value to report an error to the client
@@ -1147,7 +1239,8 @@ static ssize_t nbd_co_receive_request(NBDRequestData *req,
         rc = request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
         goto out;
     }
-    if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
+    if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE |
+                           NBD_CMD_FLAG_DF)) {
         LOG("unsupported flags (got 0x%x)", request->flags);
         rc = -EINVAL;
         goto out;
@@ -1226,12 +1319,34 @@ static void nbd_trip(void *opaque)
                         req->data, request.len);
         if (ret < 0) {
             LOG("reading from file failed");
-            reply.error = -ret;
-            goto error_reply;
+            if (client->structured_reply) {
+                ret = nbd_co_send_structured_error(req->client, request.handle,
+                                                   -ret);
+                if (ret < 0) {
+                    goto out;
+                } else {
+                    break;
+                }
+            } else {
+                reply.error = -ret;
+                goto error_reply;
+            }
         }
 
         TRACE("Read %" PRIu32" byte(s)", request.len);
-        if (nbd_co_send_simple_reply(req, &reply, request.len) < 0) {
+        if (client->structured_reply) {
+            ret = nbd_co_send_structured_read(req->client, request.handle,
+                                              request.from, req->data,
+                                              request.len);
+            if (ret < 0) {
+                goto out;
+            }
+
+            ret = nbd_co_send_structured_none(req->client, request.handle);
+        } else {
+            ret = nbd_co_send_simple_reply(req, &reply, request.len);
+        }
+        if (ret < 0) {
             goto out;
         }
         break;
@@ -1444,6 +1559,8 @@ void nbd_client_new(NBDExport *exp,
     client->can_read = true;
     client->close = close_fn;
 
+    client->structured_reply = false;
+
     data->client = client;
     data->co = qemu_coroutine_create(nbd_co_client_start, data);
     qemu_coroutine_enter(data->co);
-- 
2.11.0


Reply via email to