Only one meta context type is defined: qemu-bitmap:<bitmap-name>.
Maximum one query is allowed for NBD_OPT_{SET,LIST}_META_CONTEXT,
NBD_REP_ERR_TOO_BIG is returned otherwise.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 include/block/nbd.h |  15 ++
 nbd/nbd-internal.h  |   6 +
 nbd/server.c        | 445 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 466 insertions(+)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index dae2e4bd03..516a24765c 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -94,6 +94,16 @@ typedef struct NBDStructuredError {
     uint16_t message_length;
 } QEMU_PACKED NBDStructuredError;
 
+typedef struct NBDStructuredMeta {
+    NBDStructuredReplyChunk h;
+    uint32_t context_id;
+} QEMU_PACKED NBDStructuredMeta;
+
+typedef struct NBDExtent {
+    uint32_t length;
+    uint32_t flags;
+} QEMU_PACKED NBDExtent;
+
 /* Transmission (export) flags: sent from server to client during handshake,
    but describe what will happen during transmission */
 #define NBD_FLAG_HAS_FLAGS      (1 << 0)        /* Flags are there */
@@ -120,6 +130,7 @@ typedef struct NBDStructuredError {
 
 #define NBD_REP_ACK             (1)             /* Data sending finished. */
 #define NBD_REP_SERVER          (2)             /* Export description. */
+#define NBD_REP_META_CONTEXT    (4)
 
 #define NBD_REP_ERR_UNSUP       NBD_REP_ERR(1)  /* Unknown option */
 #define NBD_REP_ERR_POLICY      NBD_REP_ERR(2)  /* Server denied */
@@ -127,6 +138,8 @@ typedef struct NBDStructuredError {
 #define NBD_REP_ERR_PLATFORM    NBD_REP_ERR(4)  /* Not compiled in */
 #define NBD_REP_ERR_TLS_REQD    NBD_REP_ERR(5)  /* TLS required */
 #define NBD_REP_ERR_SHUTDOWN    NBD_REP_ERR(7)  /* Server shutting down */
+#define NBD_REP_ERR_TOO_BIG     NBD_REP_ERR(9)  /* The request or the reply is
+                                                   too large to process */
 
 /* Request flags, sent from client to server during transmission phase */
 #define NBD_CMD_FLAG_FUA        (1 << 0) /* 'force unit access' during write */
@@ -142,6 +155,7 @@ enum {
     NBD_CMD_TRIM = 4,
     /* 5 reserved for failed experiment NBD_CMD_CACHE */
     NBD_CMD_WRITE_ZEROES = 6,
+    NBD_CMD_BLOCK_STATUS = 7
 };
 
 #define NBD_DEFAULT_PORT       10809
@@ -163,6 +177,7 @@ enum {
 #define NBD_REPLY_TYPE_NONE 0
 #define NBD_REPLY_TYPE_OFFSET_DATA 1
 #define NBD_REPLY_TYPE_OFFSET_HOLE 2
+#define NBD_REPLY_TYPE_BLOCK_STATUS 5
 #define NBD_REPLY_TYPE_ERROR ((1 << 15) + 1)
 #define NBD_REPLY_TYPE_ERROR_OFFSET ((1 << 15) + 2)
 
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index 3284bfc85a..fbbcf69925 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -83,6 +83,10 @@
 #define NBD_OPT_PEEK_EXPORT     (4)
 #define NBD_OPT_STARTTLS        (5)
 #define NBD_OPT_STRUCTURED_REPLY (8)
+#define NBD_OPT_LIST_META_CONTEXT (9)
+#define NBD_OPT_SET_META_CONTEXT  (10)
+
+#define NBD_META_NS_BITMAPS "qemu-dirty-bitmap"
 
 /* NBD errors are based on errno numbers, so there is a 1:1 mapping,
  * but only a limited set of errno values is specified in the protocol.
@@ -105,6 +109,8 @@ static inline const char *nbd_opt_name(int opt)
     case NBD_OPT_PEEK_EXPORT: return "peek_export";
     case NBD_OPT_STARTTLS: return "tls";
     case NBD_OPT_STRUCTURED_REPLY: return "structured_reply";
+    case NBD_OPT_LIST_META_CONTEXT: return "list_meta_context";
+    case NBD_OPT_SET_META_CONTEXT: return "set_meta_context";
     }
 
     return "<unknown option>";
diff --git a/nbd/server.c b/nbd/server.c
index cb79a93c87..0b7b7230df 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -21,6 +21,8 @@
 #include "qapi/error.h"
 #include "nbd-internal.h"
 
+#define NBD_MAX_BITMAP_EXTENTS (0x100000 / 8) /* 1 mb of extents data */
+
 static int system_errno_to_nbd_errno(int err)
 {
     switch (err) {
@@ -102,6 +104,7 @@ struct NBDClient {
     bool closing;
 
     bool structured_reply;
+    BdrvDirtyBitmap *export_bitmap;
 };
 
 /* That's all folks */
@@ -421,7 +424,304 @@ static QIOChannel 
*nbd_negotiate_handle_starttls(NBDClient *client,
     return QIO_CHANNEL(tioc);
 }
 
+static int nbd_negotiate_read_size_string(QIOChannel *ioc, char **str,
+                                          uint32_t max_len)
+{
+    uint32_t len;
+
+    if (nbd_negotiate_read(ioc, &len, sizeof(len)) != sizeof(len)) {
+        LOG("read failed");
+        return -EIO;
+    }
+
+    cpu_to_be32s(&len);
+
+    if (max_len > 0 && len > max_len) {
+        LOG("Bad length received");
+        return -EINVAL;
+    }
+
+    *str = g_malloc(len + 1);
+
+    if (nbd_negotiate_read(ioc, *str, len) != len) {
+        LOG("read failed");
+        g_free(str);
+        return -EIO;
+    }
+    (*str)[len] = '\0';
+
+    return sizeof(len) + len;
+}
+
+static int nbd_negotiate_send_meta_context(QIOChannel *ioc,
+                                           const char *context,
+                                           uint32_t opt)
+{
+    int ret;
+    size_t len = strlen(context);
+    uint32_t context_id = cpu_to_be32(100);
+
+    ret = nbd_negotiate_send_rep_len(ioc, NBD_REP_META_CONTEXT, opt,
+                                     len + sizeof(context_id));
+    if (ret < 0) {
+        return ret;
+    }
+
+    if (nbd_negotiate_write(ioc, &context_id, sizeof(context_id)) !=
+        sizeof(context_id))
+    {
+        LOG("write failed");
+        return -EIO;
+    }
+
+    if (nbd_negotiate_write(ioc, context, len) != len) {
+        LOG("write failed");
+        return -EIO;
+    }
+
+    return 0;
+}
+
+static int nbd_negotiate_send_bitmap(QIOChannel *ioc, const char *bitmap_name,
+                                     uint32_t opt)
+{
+    char *context = g_strdup_printf("%s:%s", NBD_META_NS_BITMAPS, bitmap_name);
+    int ret = nbd_negotiate_send_meta_context(ioc, context, opt);
+
+    g_free(context);
+
+    return ret;
+}
+
+static int nbd_negotiate_one_bitmap_query(QIOChannel *ioc, BlockDriverState 
*bs,
+                                          uint32_t opt, const char *query,
+                                          BdrvDirtyBitmap **bitmap)
+{
+    BdrvDirtyBitmap *bm = bdrv_find_dirty_bitmap(bs, query);
+    if (bm != NULL) {
+        if (bitmap != NULL) {
+            *bitmap = bm;
+        }
+        return nbd_negotiate_send_bitmap(ioc, query, opt);
+    }
+
+    return 0;
+}
+
+static int nbd_negotiate_one_meta_query(QIOChannel *ioc, BlockDriverState *bs,
+                                        uint32_t opt, BdrvDirtyBitmap **bitmap)
+{
+    int ret = 0, nb_read;
+    char *query, *colon, *namespace, *subquery;
+
+    *bitmap = NULL;
 
+    nb_read = nbd_negotiate_read_size_string(ioc, &query, 0);
+    if (nb_read < 0) {
+        return nb_read;
+    }
+
+    colon = strchr(query, ':');
+    if (colon == NULL) {
+        ret = -EINVAL;
+        goto out;
+    }
+    *colon = '\0';
+    namespace = query;
+    subquery = colon + 1;
+
+    if (strcmp(namespace, NBD_META_NS_BITMAPS) == 0) {
+        ret = nbd_negotiate_one_bitmap_query(ioc, bs, opt, subquery, bitmap);
+    }
+
+out:
+    g_free(query);
+    return ret < 0 ? ret : nb_read;
+}
+
+/* start handle LIST_META_CONTEXT and SET_META_CONTEXT requests
+ * @opt          should be NBD_OPT_LIST_META_CONTEXT or 
NBD_OPT_SET_META_CONTEXT
+ * @length       related option data to read
+ * @nb_queries   out parameter, number of queries specified by client
+ * @bs           out parameter, bs for export, selected by client
+ *               will be zero if some not critical error occured and error 
reply
+ *               was sent.
+ *
+ * Returns:
+ *   Err. code < 0 on critical error
+ *   Number of bytes read otherwise (will be equal to length on non critical
+ *     error or if there no queries in request)
+ */
+static int nbd_negotiate_opt_meta_context_start(NBDClient *client, uint32_t 
opt,
+                                                uint32_t length,
+                                                uint32_t *nb_queries,
+                                                BlockDriverState **bs)
+{
+    int ret;
+    NBDExport *exp;
+    char *export_name;
+    int nb_read = 0;
+
+    if (!client->structured_reply) {
+        uint32_t tail = length - nb_read;
+        LOG("Structured reply is not negotiated");
+
+        if (nbd_negotiate_drop_sync(client->ioc, tail) != tail) {
+            return -EIO;
+        }
+        ret = nbd_negotiate_send_rep_err(client->ioc, NBD_REP_ERR_INVALID, opt,
+                                         "Structured reply is not negotiated");
+        g_free(export_name);
+
+        if (ret < 0) {
+            return ret;
+        } else {
+            *bs = NULL;
+            *nb_queries = 0;
+            return length;
+        }
+    }
+
+    nb_read = nbd_negotiate_read_size_string(client->ioc, &export_name,
+                                             NBD_MAX_NAME_SIZE);
+    if (nb_read < 0) {
+        return nb_read;
+    }
+
+    exp = nbd_export_find(export_name);
+    if (exp == NULL) {
+        uint32_t tail = length - nb_read;
+        LOG("export '%s' is not found", export_name);
+
+        if (nbd_negotiate_drop_sync(client->ioc, tail) != tail) {
+            return -EIO;
+        }
+        ret = nbd_negotiate_send_rep_err(client->ioc, NBD_REP_ERR_INVALID, opt,
+                                         "export '%s' is not found",
+                                         export_name);
+        g_free(export_name);
+
+        if (ret < 0) {
+            return ret;
+        } else {
+            *bs = NULL;
+            *nb_queries = 0;
+            return length;
+        }
+    }
+    g_free(export_name);
+
+    *bs = blk_bs(exp->blk);
+    if (*bs == NULL) {
+        LOG("export without bs");
+        return -EINVAL;
+    }
+
+    if (nbd_negotiate_read(client->ioc, nb_queries,
+                           sizeof(*nb_queries)) != sizeof(*nb_queries))
+    {
+        LOG("read failed");
+        return -EIO;
+    }
+    cpu_to_be32s(nb_queries);
+
+    nb_read += sizeof(*nb_queries);
+
+    return nb_read;
+}
+
+static int nbd_negotiate_list_meta_context(NBDClient *client, uint32_t length)
+{
+    int ret;
+    BlockDriverState *bs;
+    uint32_t nb_queries;
+    int i;
+    int nb_read;
+
+    nb_read = nbd_negotiate_opt_meta_context_start(client,
+                                                   NBD_OPT_LIST_META_CONTEXT,
+                                                   length, &nb_queries, &bs);
+    if (nb_read < 0) {
+        return nb_read;
+    }
+    if (bs == NULL) {
+        /* error reply was already sent by nbd_negotiate_opt_meta_context_start
+         * */
+        return 0;
+    }
+
+    if (nb_queries == 0) {
+        BdrvDirtyBitmap *bm = NULL;
+
+        if (nb_read != length) {
+            return -EINVAL;
+        }
+
+        while ((bm = bdrv_dirty_bitmap_next(bs, bm)) != 0) {
+            nbd_negotiate_send_bitmap(client->ioc, bdrv_dirty_bitmap_name(bm),
+                                      NBD_OPT_LIST_META_CONTEXT);
+        }
+    }
+
+    for (i = 0; i < nb_queries; ++i) {
+        ret = nbd_negotiate_one_meta_query(client->ioc, bs,
+                                           NBD_OPT_LIST_META_CONTEXT, NULL);
+        if (ret < 0) {
+            return ret;
+        }
+
+        nb_read += ret;
+    }
+
+    if (nb_read != length) {
+        return -EINVAL;
+    }
+
+    return nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
+                                  NBD_OPT_LIST_META_CONTEXT);
+}
+
+static int nbd_negotiate_set_meta_context(NBDClient *client, uint32_t length)
+{
+    int ret;
+    BlockDriverState *bs;
+    uint32_t nb_queries;
+    int nb_read;
+
+    nb_read = nbd_negotiate_opt_meta_context_start(client,
+                                                   NBD_OPT_SET_META_CONTEXT,
+                                                   length, &nb_queries, &bs);
+    if (nb_read < 0) {
+        return nb_read;
+    }
+    if (bs == NULL) {
+        /* error reply was already sent by nbd_negotiate_opt_meta_context_start
+         * */
+        return 0;
+    }
+
+    if (nb_queries == 0) {
+        return nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
+                                      NBD_OPT_SET_META_CONTEXT);
+    }
+
+    if (nb_queries > 1) {
+        return nbd_negotiate_send_rep_err(client->ioc, NBD_REP_ERR_TOO_BIG,
+                                          NBD_OPT_SET_META_CONTEXT,
+                                          "Only one exporting context is"
+                                          "supported");
+    }
+
+    ret = nbd_negotiate_one_meta_query(client->ioc, bs,
+                                       NBD_OPT_SET_META_CONTEXT,
+                                       &client->export_bitmap);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
+                                  NBD_OPT_SET_META_CONTEXT);
+}
 /* Process all NBD_OPT_* client option commands.
  * Return -errno on error, 0 on success. */
 static int nbd_negotiate_options(NBDClient *client)
@@ -585,6 +885,20 @@ static int nbd_negotiate_options(NBDClient *client)
                 }
                 break;
 
+            case NBD_OPT_LIST_META_CONTEXT:
+                ret = nbd_negotiate_list_meta_context(client, length);
+                if (ret < 0) {
+                    return ret;
+                }
+                break;
+
+            case NBD_OPT_SET_META_CONTEXT:
+                ret = nbd_negotiate_set_meta_context(client, length);
+                if (ret < 0) {
+                    return ret;
+                }
+                break;
+
             default:
                 if (nbd_negotiate_drop_sync(client->ioc, length) != length) {
                     return -EIO;
@@ -1159,6 +1473,124 @@ static int nbd_co_send_structured_none(NBDClient 
*client, uint64_t handle)
     return nbd_co_send_buf(client, &chunk, sizeof(chunk));
 }
 
+#define MAX_EXTENT_LENGTH UINT32_MAX
+
+static unsigned add_extents(NBDExtent *extents, unsigned nb_extents,
+                            uint64_t length, uint32_t flags)
+{
+    unsigned i = 0;
+    uint32_t big_chunk = (MAX_EXTENT_LENGTH >> 9) << 9;
+    uint32_t big_chunk_be = cpu_to_be32(big_chunk);
+    uint32_t flags_be = cpu_to_be32(flags);
+
+    for (i = 0; i < nb_extents && length > MAX_EXTENT_LENGTH;
+         i++, length -= big_chunk)
+    {
+        extents[i].length = big_chunk_be;
+        extents[i].flags = flags_be;
+    }
+
+    if (length > 0 && i < nb_extents) {
+        extents[i].length = cpu_to_be32(length);
+        extents[i].flags = flags_be;
+        i++;
+    }
+
+    return i;
+}
+
+static unsigned bitmap_to_extents(BdrvDirtyBitmap *bitmap, uint64_t offset,
+                                  uint64_t length, NBDExtent *extents,
+                                  unsigned nb_extents)
+{
+    uint64_t begin, end; /* dirty region */
+    uint64_t start_sector = offset >> BDRV_SECTOR_BITS;
+    uint64_t last_sector = (offset + length - 1) >> BDRV_SECTOR_BITS;
+    unsigned i = 0;
+    uint64_t len;
+    uint32_t ma = -1;
+    ma = (ma / bdrv_dirty_bitmap_granularity(bitmap)) *
+        bdrv_dirty_bitmap_granularity(bitmap);
+
+    BdrvDirtyBitmapIter *it = bdrv_dirty_iter_new(bitmap, start_sector);
+
+    assert(nb_extents > 0);
+
+    begin = bdrv_dirty_iter_next(it);
+    if (begin == -1) {
+        begin = last_sector + 1;
+    }
+    if (begin > start_sector) {
+        len = (begin - start_sector) << BDRV_SECTOR_BITS;
+        i += add_extents(extents + i, nb_extents - i, len, 0);
+    }
+
+    while (begin != -1 && begin <= last_sector && i < nb_extents) {
+        end = bdrv_dirty_bitmap_next_zero(bitmap, begin + 1);
+
+        i += add_extents(extents + i, nb_extents - i,
+                         (end - begin) << BDRV_SECTOR_BITS, 1);
+
+        if (end > last_sector || i >= nb_extents) {
+            break;
+        }
+
+        bdrv_set_dirty_iter(it, end);
+        begin = bdrv_dirty_iter_next(it);
+        if (begin == -1) {
+            begin = last_sector + 1;
+        }
+        if (begin > end) {
+            i += add_extents(extents + i, nb_extents - i,
+                             (begin - end) << BDRV_SECTOR_BITS, 0);
+        }
+    }
+
+    bdrv_dirty_iter_free(it);
+
+    extents[0].length =
+        cpu_to_be32(be32_to_cpu(extents[0].length) -
+                    (offset - (start_sector << BDRV_SECTOR_BITS)));
+
+    return i;
+}
+
+static int nbd_co_send_extents(NBDClient *client, uint64_t handle,
+                               NBDExtent *extents, unsigned nb_extents,
+                               uint32_t context_id)
+{
+    NBDStructuredMeta chunk;
+
+    struct iovec iov[] = {
+        {.iov_base = &chunk, .iov_len = sizeof(chunk)},
+        {.iov_base = extents, .iov_len = nb_extents * sizeof(extents[0])}
+    };
+
+    set_be_chunk(&chunk.h, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_BLOCK_STATUS,
+                 handle, sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
+    stl_be_p(&chunk.context_id, context_id);
+
+    return nbd_co_send_iov(client, iov, 2);
+}
+
+static int nbd_co_send_bitmap(NBDClient *client, uint64_t handle,
+                              BdrvDirtyBitmap *bitmap, uint64_t offset,
+                              uint64_t length, uint32_t context_id)
+{
+    int ret;
+    unsigned nb_extents;
+    NBDExtent *extents = g_new(NBDExtent, NBD_MAX_BITMAP_EXTENTS);
+
+    nb_extents = bitmap_to_extents(bitmap, offset, length, extents,
+                                   NBD_MAX_BITMAP_EXTENTS);
+
+    ret = nbd_co_send_extents(client, handle, extents, nb_extents, context_id);
+
+    g_free(extents);
+
+    return ret;
+}
+
 /* Collect a client request.  Return 0 if request looks valid, -EAGAIN
  * to keep trying the collection, -EIO to drop connection right away,
  * and any other negative value to report an error to the client
@@ -1437,6 +1869,19 @@ static void nbd_trip(void *opaque)
             goto out;
         }
         break;
+    case NBD_CMD_BLOCK_STATUS:
+        TRACE("Request type is BLOCK_STATUS");
+        if (client->export_bitmap == NULL) {
+            reply.error = EINVAL;
+            goto error_reply;
+        }
+        ret = nbd_co_send_bitmap(req->client, request.handle,
+                                 client->export_bitmap, request.from,
+                                 request.len, 0);
+        if (ret < 0) {
+            goto out;
+        }
+        break;
     default:
         LOG("invalid request type (%" PRIu32 ") received", request.type);
         reply.error = EINVAL;
-- 
2.11.0


Reply via email to