date:20171030

[Qemu-block] USB Device is not stable, Cannot use it with long time

2017-10-30 Thread Lying

First, I can running my qemu guest normally,
And i can use keyboard and mouse in my VM,
But i cannot use those after one day.
And cannot find any useful log in my system.

those message is my VM state( with qemu-monitor-command ):
  Device 0.1, Port 1, Speed 1.5 Mb/s, Product G100s Gaming Keyboard, ID: 
hostdev0
  Device 0.2, Port 2, Speed 1.5 Mb/s, Product USB Optical Mouse, ID: hostdev1
it show me qemu add device normally,But i cannot use it

[Qemu-block] [PULL 11/12] nbd: Move nbd_read() to common header

2017-10-30 Thread Eric Blake

An upcoming change to block/nbd-client.c will want to read the
tail of a structured reply chunk directly from the wire.  Move
this function to make it easier.

Based on a patch from Vladimir Sementsov-Ogievskiy.

Signed-off-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20171027104037.8319-12-ebl...@redhat.com>
---
 include/block/nbd.h | 10 ++
 nbd/nbd-internal.h  |  9 -
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index 2ee1578420..da6e305dd5 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -264,6 +264,16 @@ void nbd_client_put(NBDClient *client);
 void nbd_server_start(SocketAddress *addr, const char *tls_creds,
   Error **errp);

+
+/* nbd_read
+ * Reads @size bytes from @ioc. Returns 0 on success.
+ */
+static inline int nbd_read(QIOChannel *ioc, void *buffer, size_t size,
+   Error **errp)
+{
+return qio_channel_read_all(ioc, buffer, size, errp) < 0 ? -EIO : 0;
+}
+
 static inline bool nbd_reply_is_simple(NBDReply *reply)
 {
 return reply->magic == NBD_SIMPLE_REPLY_MAGIC;
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index 4f24d6e57d..b64eb1cc9b 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -82,15 +82,6 @@ static inline int nbd_read_eof(QIOChannel *ioc, void 
*buffer, size_t size,
 return ret;
 }

-/* nbd_read
- * Reads @size bytes from @ioc. Returns 0 on success.
- */
-static inline int nbd_read(QIOChannel *ioc, void *buffer, size_t size,
-   Error **errp)
-{
-return qio_channel_read_all(ioc, buffer, size, errp) < 0 ? -EIO : 0;
-}
-
 /* nbd_write
  * Writes @size bytes to @ioc. Returns 0 on success.
  */
-- 
2.13.6

[Qemu-block] [PULL 09/12] nbd/client: refactor nbd_receive_starttls

2017-10-30 Thread Eric Blake

From: Vladimir Sementsov-Ogievskiy 

Split out nbd_request_simple_option to be reused for structured reply
option.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Eric Blake 
Message-Id: <20171027104037.8319-10-ebl...@redhat.com>
---
 nbd/client.c | 70 ++--
 nbd/trace-events |  4 +---
 2 files changed, 49 insertions(+), 25 deletions(-)

diff --git a/nbd/client.c b/nbd/client.c
index 50f36b511e..9acf745b79 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -508,35 +508,61 @@ static int nbd_receive_query_exports(QIOChannel *ioc,
 }
 }

+/* nbd_request_simple_option: Send an option request, and parse the reply
+ * return 1 for successful negotiation,
+ *0 if operation is unsupported,
+ *-1 with errp set for any other error
+ */
+static int nbd_request_simple_option(QIOChannel *ioc, int opt, Error **errp)
+{
+nbd_opt_reply reply;
+int error;
+
+if (nbd_send_option_request(ioc, opt, 0, NULL, errp) < 0) {
+return -1;
+}
+
+if (nbd_receive_option_reply(ioc, opt, , errp) < 0) {
+return -1;
+}
+error = nbd_handle_reply_err(ioc, , errp);
+if (error <= 0) {
+return error;
+}
+
+if (reply.type != NBD_REP_ACK) {
+error_setg(errp, "Server answered option %d (%s) with unexpected "
+   "reply %" PRIx32 " (%s)", opt, nbd_opt_lookup(opt),
+   reply.type, nbd_rep_lookup(reply.type));
+nbd_send_opt_abort(ioc);
+return -1;
+}
+
+if (reply.length != 0) {
+error_setg(errp, "Option %d ('%s') response length is %" PRIu32
+   " (it should be zero)", opt, nbd_opt_lookup(opt),
+   reply.length);
+nbd_send_opt_abort(ioc);
+return -1;
+}
+
+return 1;
+}
+
 static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
 QCryptoTLSCreds *tlscreds,
 const char *hostname, Error **errp)
 {
-nbd_opt_reply reply;
+int ret;
 QIOChannelTLS *tioc;
 struct NBDTLSHandshakeData data = { 0 };

-trace_nbd_receive_starttls_request();
-if (nbd_send_option_request(ioc, NBD_OPT_STARTTLS, 0, NULL, errp) < 0) {
-return NULL;
-}
-
-trace_nbd_receive_starttls_reply();
-if (nbd_receive_option_reply(ioc, NBD_OPT_STARTTLS, , errp) < 0) {
-return NULL;
-}
-
-if (reply.type != NBD_REP_ACK) {
-error_setg(errp, "Server rejected request to start TLS %" PRIx32,
-   reply.type);
-nbd_send_opt_abort(ioc);
-return NULL;
-}
-
-if (reply.length != 0) {
-error_setg(errp, "Start TLS response was not zero %" PRIu32,
-   reply.length);
-nbd_send_opt_abort(ioc);
+ret = nbd_request_simple_option(ioc, NBD_OPT_STARTTLS, errp);
+if (ret <= 0) {
+if (ret == 0) {
+error_setg(errp, "Server don't support STARTTLS option");
+nbd_send_opt_abort(ioc);
+}
 return NULL;
 }

diff --git a/nbd/trace-events b/nbd/trace-events
index 52150bd738..596df96575 100644
--- a/nbd/trace-events
+++ b/nbd/trace-events
@@ -8,9 +8,7 @@ nbd_opt_go_info_unknown(int info, const char *name) "Ignoring 
unknown info %d (%
 nbd_opt_go_info_block_size(uint32_t minimum, uint32_t preferred, uint32_t 
maximum) "Block sizes are 0x%" PRIx32 ", 0x%" PRIx32 ", 0x%" PRIx32
 nbd_receive_query_exports_start(const char *wantname) "Querying export list 
for '%s'"
 nbd_receive_query_exports_success(const char *wantname) "Found desired export 
name '%s'"
-nbd_receive_starttls_request(void) "Requesting TLS from server"
-nbd_receive_starttls_reply(void) "Getting TLS reply from server"
-nbd_receive_starttls_new_client(void) "TLS request approved, setting up TLS"
+nbd_receive_starttls_new_client(void) "Setting up TLS"
 nbd_receive_starttls_tls_handshake(void) "Starting TLS handshake"
 nbd_receive_negotiate(void *tlscreds, const char *hostname) "Receiving 
negotiation tlscreds=%p hostname=%s"
 nbd_receive_negotiate_magic(uint64_t magic) "Magic is 0x%" PRIx64
-- 
2.13.6

[Qemu-block] [PULL 08/12] nbd/server: Include human-readable message in structured errors

2017-10-30 Thread Eric Blake

The NBD spec permits including a human-readable error string if
structured replies are in force, so we might as well send the
client the message that we logged on any error.

Signed-off-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20171027104037.8319-9-ebl...@redhat.com>
---
 nbd/server.c | 20 +---
 nbd/trace-events |  2 +-
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index 3261fd1bd7..70b40ed27e 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1296,24 +1296,25 @@ static int coroutine_fn 
nbd_co_send_structured_read(NBDClient *client,
 static int coroutine_fn nbd_co_send_structured_error(NBDClient *client,
  uint64_t handle,
  uint32_t error,
+ const char *msg,
  Error **errp)
 {
 NBDStructuredError chunk;
 int nbd_err = system_errno_to_nbd_errno(error);
 struct iovec iov[] = {
 {.iov_base = , .iov_len = sizeof(chunk)},
-/* FIXME: Support human-readable error message */
+{.iov_base = (char *)msg, .iov_len = msg ? strlen(msg) : 0},
 };

 assert(nbd_err);
 trace_nbd_co_send_structured_error(handle, nbd_err,
-   nbd_err_lookup(nbd_err));
+   nbd_err_lookup(nbd_err), msg ? msg : 
"");
 set_be_chunk(, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle,
- sizeof(chunk) - sizeof(chunk.h));
+ sizeof(chunk) - sizeof(chunk.h) + iov[1].iov_len);
 stl_be_p(, nbd_err);
-stw_be_p(_length, 0);
+stw_be_p(_length, iov[1].iov_len);

-return nbd_co_send_iov(client, iov, 1, errp);
+return nbd_co_send_iov(client, iov, 1 + !!iov[1].iov_len, errp);
 }

 /* nbd_co_receive_request
@@ -1414,6 +1415,7 @@ static coroutine_fn void nbd_trip(void *opaque)
 int flags;
 int reply_data_len = 0;
 Error *local_err = NULL;
+char *msg = NULL;

 trace_nbd_trip();
 if (client->closing) {
@@ -1530,14 +1532,17 @@ reply:
 if (local_err) {
 /* If we get here, local_err was not a fatal error, and should be sent
  * to the client. */
+assert(ret < 0);
+msg = g_strdup(error_get_pretty(local_err));
 error_report_err(local_err);
 local_err = NULL;
 }

-if (client->structured_reply && request.type == NBD_CMD_READ) {
+if (client->structured_reply &&
+(ret < 0 || request.type == NBD_CMD_READ)) {
 if (ret < 0) {
 ret = nbd_co_send_structured_error(req->client, request.handle,
-   -ret, _err);
+   -ret, msg, _err);
 } else {
 ret = nbd_co_send_structured_read(req->client, request.handle,
   request.from, req->data,
@@ -1548,6 +1553,7 @@ reply:
ret < 0 ? -ret : 0,
req->data, reply_data_len, _err);
 }
+g_free(msg);
 if (ret < 0) {
 error_prepend(_err, "Failed to send reply: ");
 goto disconnect;
diff --git a/nbd/trace-events b/nbd/trace-events
index 6894f8bbb4..52150bd738 100644
--- a/nbd/trace-events
+++ b/nbd/trace-events
@@ -57,7 +57,7 @@ nbd_blk_aio_attached(const char *name, void *ctx) "Export %s: 
Attaching clients
 nbd_blk_aio_detach(const char *name, void *ctx) "Export %s: Detaching clients 
from AIO context %p\n"
 nbd_co_send_simple_reply(uint64_t handle, uint32_t error, const char *errname, 
int len) "Send simple reply: handle = %" PRIu64 ", error = %" PRIu32 " (%s), 
len = %d"
 nbd_co_send_structured_read(uint64_t handle, uint64_t offset, void *data, 
size_t size) "Send structured read data reply: handle = %" PRIu64 ", offset = 
%" PRIu64 ", data = %p, len = %zu"
-nbd_co_send_structured_error(uint64_t handle, int err, const char *errname) 
"Send structured error reply: handle = %" PRIu64 ", error = %d (%s)"
+nbd_co_send_structured_error(uint64_t handle, int err, const char *errname, 
const char *msg) "Send structured error reply: handle = %" PRIu64 ", error = %d 
(%s), msg = '%s'"
 nbd_co_receive_request_decode_type(uint64_t handle, uint16_t type, const char 
*name) "Decoding type: handle = %" PRIu64 ", type = %" PRIu16 " (%s)"
 nbd_co_receive_request_payload_received(uint64_t handle, uint32_t len) 
"Payload received: handle = %" PRIu64 ", len = %" PRIu32
 nbd_co_receive_request_cmd_write(uint32_t len) "Reading %" PRIu32 " byte(s)"
-- 
2.13.6

[Qemu-block] [PULL 10/12] nbd/client: prepare nbd_receive_reply for structured reply

2017-10-30 Thread Eric Blake

From: Vladimir Sementsov-Ogievskiy 

In following patch nbd_receive_reply will be used both for simple
and structured reply header receiving.
NBDReply is altered into union of simple reply header and structured
reply chunk header, simple error translation moved to block/nbd-client
to be consistent with further structured reply error translation.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Eric Blake 
Message-Id: <20171027104037.8319-11-ebl...@redhat.com>
---
 include/block/nbd.h |  30 ---
 block/nbd-client.c  |   8 ++--
 nbd/client.c| 104 +---
 nbd/trace-events|   3 +-
 4 files changed, 113 insertions(+), 32 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index 225e9575e4..2ee1578420 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -57,12 +57,6 @@ struct NBDRequest {
 };
 typedef struct NBDRequest NBDRequest;

-struct NBDReply {
-uint64_t handle;
-uint32_t error;
-};
-typedef struct NBDReply NBDReply;
-
 typedef struct NBDSimpleReply {
 uint32_t magic;  /* NBD_SIMPLE_REPLY_MAGIC */
 uint32_t error;
@@ -78,6 +72,20 @@ typedef struct NBDStructuredReplyChunk {
 uint32_t length; /* length of payload */
 } QEMU_PACKED NBDStructuredReplyChunk;

+typedef union NBDReply {
+NBDSimpleReply simple;
+NBDStructuredReplyChunk structured;
+struct {
+/* @magic and @handle fields have the same offset and size both in
+ * simple reply and structured reply chunk, so let them be accessible
+ * without ".simple." or ".structured." specification
+ */
+uint32_t magic;
+uint32_t _skip;
+uint64_t handle;
+} QEMU_PACKED;
+} NBDReply;
+
 /* Header of NBD_REPLY_TYPE_OFFSET_DATA, complete NBD_REPLY_TYPE_OFFSET_HOLE */
 typedef struct NBDStructuredRead {
 NBDStructuredReplyChunk h;
@@ -256,4 +264,14 @@ void nbd_client_put(NBDClient *client);
 void nbd_server_start(SocketAddress *addr, const char *tls_creds,
   Error **errp);

+static inline bool nbd_reply_is_simple(NBDReply *reply)
+{
+return reply->magic == NBD_SIMPLE_REPLY_MAGIC;
+}
+
+static inline bool nbd_reply_is_structured(NBDReply *reply)
+{
+return reply->magic == NBD_STRUCTURED_REPLY_MAGIC;
+}
+
 #endif
diff --git a/block/nbd-client.c b/block/nbd-client.c
index c0683c3c83..58493b7ac4 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -92,7 +92,9 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
 i = HANDLE_TO_INDEX(s, s->reply.handle);
 if (i >= MAX_NBD_REQUESTS ||
 !s->requests[i].coroutine ||
-!s->requests[i].receiving) {
+!s->requests[i].receiving ||
+nbd_reply_is_structured(>reply))
+{
 break;
 }

@@ -194,8 +196,8 @@ static int nbd_co_receive_reply(NBDClientSession *s,
 ret = -EIO;
 } else {
 assert(s->reply.handle == handle);
-ret = -s->reply.error;
-if (qiov && s->reply.error == 0) {
+ret = -nbd_errno_to_system_errno(s->reply.simple.error);
+if (qiov && ret == 0) {
 if (qio_channel_readv_all(s->ioc, qiov->iov, qiov->niov,
   NULL) < 0) {
 ret = -EIO;
diff --git a/nbd/client.c b/nbd/client.c
index 9acf745b79..4f0745f601 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -908,6 +908,57 @@ int nbd_send_request(QIOChannel *ioc, NBDRequest *request)
 return nbd_write(ioc, buf, sizeof(buf), NULL);
 }

+/* nbd_receive_simple_reply
+ * Read simple reply except magic field (which should be already read).
+ * Payload is not read (payload is possible for CMD_READ, but here we even
+ * don't know whether it take place or not).
+ */
+static int nbd_receive_simple_reply(QIOChannel *ioc, NBDSimpleReply *reply,
+Error **errp)
+{
+int ret;
+
+assert(reply->magic == NBD_SIMPLE_REPLY_MAGIC);
+
+ret = nbd_read(ioc, (uint8_t *)reply + sizeof(reply->magic),
+   sizeof(*reply) - sizeof(reply->magic), errp);
+if (ret < 0) {
+return ret;
+}
+
+be32_to_cpus(>error);
+be64_to_cpus(>handle);
+
+return 0;
+}
+
+/* nbd_receive_structured_reply_chunk
+ * Read structured reply chunk except magic field (which should be already
+ * read).
+ * Payload is not read.
+ */
+static int nbd_receive_structured_reply_chunk(QIOChannel *ioc,
+  NBDStructuredReplyChunk *chunk,
+  Error **errp)
+{
+int ret;
+
+assert(chunk->magic == NBD_STRUCTURED_REPLY_MAGIC);
+
+ret = nbd_read(ioc, (uint8_t *)chunk + sizeof(chunk->magic),
+   sizeof(*chunk) - sizeof(chunk->magic), errp);
+if (ret < 0) {
+return ret;
+}
+
+be16_to_cpus(>flags);
+

[Qemu-block] [PULL 07/12] nbd: Minimal structured read for server

2017-10-30 Thread Eric Blake

From: Vladimir Sementsov-Ogievskiy 

Minimal implementation of structured read: one structured reply chunk,
no segmentation.
Minimal structured error implementation: no text message.
Support DF flag, but just ignore it, as there is no segmentation any
way.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Eric Blake 
Message-Id: <20171027104037.8319-8-ebl...@redhat.com>
---
 nbd/server.c | 107 +--
 nbd/trace-events |   2 ++
 2 files changed, 99 insertions(+), 10 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index cf815603a6..3261fd1bd7 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -100,6 +100,8 @@ struct NBDClient {
 QTAILQ_ENTRY(NBDClient) next;
 int nb_requests;
 bool closing;
+
+bool structured_reply;
 };

 /* That's all folks */
@@ -769,6 +771,23 @@ static int nbd_negotiate_options(NBDClient *client, 
uint16_t myflags,
  "TLS not configured");
 }
 break;
+
+case NBD_OPT_STRUCTURED_REPLY:
+if (length) {
+ret = nbd_reject_length(client, length, option, false,
+errp);
+} else if (client->structured_reply) {
+ret = nbd_negotiate_send_rep_err(
+client->ioc, NBD_REP_ERR_INVALID, option, errp,
+"structured reply already negotiated");
+} else {
+ret = nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
+ option, errp);
+client->structured_reply = true;
+myflags |= NBD_FLAG_SEND_DF;
+}
+break;
+
 default:
 if (nbd_drop(client->ioc, length, errp) < 0) {
 return -EIO;
@@ -1243,6 +1262,60 @@ static int nbd_co_send_simple_reply(NBDClient *client,
 return nbd_co_send_iov(client, iov, len ? 2 : 1, errp);
 }

+static inline void set_be_chunk(NBDStructuredReplyChunk *chunk, uint16_t flags,
+uint16_t type, uint64_t handle, uint32_t 
length)
+{
+stl_be_p(>magic, NBD_STRUCTURED_REPLY_MAGIC);
+stw_be_p(>flags, flags);
+stw_be_p(>type, type);
+stq_be_p(>handle, handle);
+stl_be_p(>length, length);
+}
+
+static int coroutine_fn nbd_co_send_structured_read(NBDClient *client,
+uint64_t handle,
+uint64_t offset,
+void *data,
+size_t size,
+Error **errp)
+{
+NBDStructuredRead chunk;
+struct iovec iov[] = {
+{.iov_base = , .iov_len = sizeof(chunk)},
+{.iov_base = data, .iov_len = size}
+};
+
+trace_nbd_co_send_structured_read(handle, offset, data, size);
+set_be_chunk(, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_OFFSET_DATA,
+ handle, sizeof(chunk) - sizeof(chunk.h) + size);
+stq_be_p(, offset);
+
+return nbd_co_send_iov(client, iov, 2, errp);
+}
+
+static int coroutine_fn nbd_co_send_structured_error(NBDClient *client,
+ uint64_t handle,
+ uint32_t error,
+ Error **errp)
+{
+NBDStructuredError chunk;
+int nbd_err = system_errno_to_nbd_errno(error);
+struct iovec iov[] = {
+{.iov_base = , .iov_len = sizeof(chunk)},
+/* FIXME: Support human-readable error message */
+};
+
+assert(nbd_err);
+trace_nbd_co_send_structured_error(handle, nbd_err,
+   nbd_err_lookup(nbd_err));
+set_be_chunk(, NBD_REPLY_FLAG_DONE, NBD_REPLY_TYPE_ERROR, handle,
+ sizeof(chunk) - sizeof(chunk.h));
+stl_be_p(, nbd_err);
+stw_be_p(_length, 0);
+
+return nbd_co_send_iov(client, iov, 1, errp);
+}
+
 /* nbd_co_receive_request
  * Collect a client request. Return 0 if request looks valid, -EIO to drop
  * connection right away, and any other negative value to report an error to
@@ -1253,6 +1326,7 @@ static int nbd_co_receive_request(NBDRequestData *req, 
NBDRequest *request,
   Error **errp)
 {
 NBDClient *client = req->client;
+int valid_flags;

 g_assert(qemu_in_coroutine());
 assert(client->recv_coroutine == qemu_coroutine_self());
@@ -1314,13 +1388,15 @@ static int nbd_co_receive_request(NBDRequestData *req, 
NBDRequest *request,
(uint64_t)client->exp->size);
 return request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
 }
-if (request->flags

[Qemu-block] [PULL 05/12] nbd/server: Simplify nbd_negotiate_options loop

2017-10-30 Thread Eric Blake

Instead of making each caller check whether a transmission error
occurred, we can sink a common error check to the end of the loop.

Signed-off-by: Eric Blake 
Message-Id: <20171027104037.8319-6-ebl...@redhat.com>
[eblake: squash in compiler warning fix]
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 nbd/server.c | 19 ---
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index 05ff7470d5..2f03059b4c 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -678,6 +678,7 @@ static int nbd_negotiate_options(NBDClient *client, 
uint16_t myflags,
 if (!tioc) {
 return -EIO;
 }
+ret = 0;
 object_unref(OBJECT(client->ioc));
 client->ioc = QIO_CHANNEL(tioc);
 break;
@@ -698,9 +699,6 @@ static int nbd_negotiate_options(NBDClient *client, 
uint16_t myflags,
  "Option 0x%" PRIx32
  "not permitted before TLS",
  option);
-if (ret < 0) {
-return ret;
-}
 /* Let the client keep trying, unless they asked to
  * quit. In this mode, we've already sent an error, so
  * we can't ack the abort.  */
@@ -713,9 +711,6 @@ static int nbd_negotiate_options(NBDClient *client, 
uint16_t myflags,
 switch (option) {
 case NBD_OPT_LIST:
 ret = nbd_negotiate_handle_list(client, length, errp);
-if (ret < 0) {
-return ret;
-}
 break;

 case NBD_OPT_ABORT:
@@ -738,9 +733,6 @@ static int nbd_negotiate_options(NBDClient *client, 
uint16_t myflags,
 assert(option == NBD_OPT_GO);
 return 0;
 }
-if (ret) {
-return ret;
-}
 break;

 case NBD_OPT_STARTTLS:
@@ -758,9 +750,6 @@ static int nbd_negotiate_options(NBDClient *client, 
uint16_t myflags,
  option, errp,
  "TLS not configured");
 }
-if (ret < 0) {
-return ret;
-}
 break;
 default:
 if (nbd_drop(client->ioc, length, errp) < 0) {
@@ -772,9 +761,6 @@ static int nbd_negotiate_options(NBDClient *client, 
uint16_t myflags,
  "Unsupported option 0x%"
  PRIx32 " (%s)", option,
  nbd_opt_lookup(option));
-if (ret < 0) {
-return ret;
-}
 break;
 }
 } else {
@@ -794,6 +780,9 @@ static int nbd_negotiate_options(NBDClient *client, 
uint16_t myflags,
 return -EINVAL;
 }
 }
+if (ret < 0) {
+return ret;
+}
 }
 }

-- 
2.13.6

[Qemu-block] [PULL 04/12] nbd/server: Report error for write to read-only export

2017-10-30 Thread Eric Blake

When the server is read-only, we were already reporting an error
message for NBD_CMD_WRITE_ZEROES, but failed to set errp for a
similar NBD_CMD_WRITE.  This will matter more once structured
replies allow the server to propagate the errp information back
to the client.  While at it, use an error message that makes a
bit more sense if viewed on the client side.

Note that when using qemu-io to test qemu-nbd behavior, it is
rather difficult to convince qemu-io to send protocol violations
(such as a read beyond bounds), because we have a lot of active
checking on the client side that a qemu-io request makes sense
before it ever goes over the wire to the server.  The case of a
client attempting a write when the server is started as
'qemu-nbd -r' is one of the few places where we can easily test
error path handling, without having to resort to hacking in known
temporary bugs to either the server or client.  [Maybe we want a
future patch to the client to do up-front checking on writes to a
read-only export, the way it does up-front bounds checking; but I
don't see anything in the NBD spec that points to a protocol
violation in our current behavior.]

Signed-off-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20171027104037.8319-5-ebl...@redhat.com>
---
 nbd/server.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/nbd/server.c b/nbd/server.c
index efb6003364..05ff7470d5 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1381,6 +1381,7 @@ static coroutine_fn void nbd_trip(void *opaque)
 break;
 case NBD_CMD_WRITE:
 if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
+error_setg(_err, "Export is read-only");
 ret = -EROFS;
 break;
 }
@@ -1398,7 +1399,7 @@ static coroutine_fn void nbd_trip(void *opaque)
 break;
 case NBD_CMD_WRITE_ZEROES:
 if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
-error_setg(_err, "Server is read-only, return error");
+error_setg(_err, "Export is read-only");
 ret = -EROFS;
 break;
 }
-- 
2.13.6

[Qemu-block] [PULL 06/12] nbd/server: Refactor zero-length option check

2017-10-30 Thread Eric Blake

Consolidate the response for a non-zero-length option payload
into a new function, nbd_reject_length().  This check will
also be used when introducing support for structured replies.

Note that STARTTLS response differs based on time: if the connection
is still unencrypted, we set fatal to true (a client that can't
request TLS correctly may still think that we are ready to start
the TLS handshake, so we must disconnect); while if the connection
is already encrypted, the client is sending a bogus request but
is no longer at risk of being confused by continuing the connection.

Signed-off-by: Eric Blake 
Message-Id: <20171027104037.8319-7-ebl...@redhat.com>
[eblake: correct return value on STARTTLS]
Reviewed-by: Vladimir Sementsov-Ogievskiy 
---
 nbd/server.c | 74 +---
 1 file changed, 46 insertions(+), 28 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index 2f03059b4c..cf815603a6 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -253,21 +253,10 @@ static int nbd_negotiate_send_rep_list(QIOChannel *ioc, 
NBDExport *exp,

 /* Process the NBD_OPT_LIST command, with a potential series of replies.
  * Return -errno on error, 0 on success. */
-static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length,
- Error **errp)
+static int nbd_negotiate_handle_list(NBDClient *client, Error **errp)
 {
 NBDExport *exp;

-if (length) {
-if (nbd_drop(client->ioc, length, errp) < 0) {
-return -EIO;
-}
-return nbd_negotiate_send_rep_err(client->ioc,
-  NBD_REP_ERR_INVALID, NBD_OPT_LIST,
-  errp,
-  "OPT_LIST should not have length");
-}
-
 /* For each export, send a NBD_REP_SERVER reply. */
 QTAILQ_FOREACH(exp, , next) {
 if (nbd_negotiate_send_rep_list(client->ioc, exp, errp)) {
@@ -531,7 +520,6 @@ static int nbd_negotiate_handle_info(NBDClient *client, 
uint32_t length,
 /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
  * new channel for all further (now-encrypted) communication. */
 static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
- uint32_t length,
  Error **errp)
 {
 QIOChannel *ioc;
@@ -540,15 +528,6 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient 
*client,

 trace_nbd_negotiate_handle_starttls();
 ioc = client->ioc;
-if (length) {
-if (nbd_drop(ioc, length, errp) < 0) {
-return NULL;
-}
-nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS,
-   errp,
-   "OPT_STARTTLS should not have length");
-return NULL;
-}

 if (nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
NBD_OPT_STARTTLS, errp) < 0) {
@@ -584,6 +563,34 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient 
*client,
 return QIO_CHANNEL(tioc);
 }

+/* nbd_reject_length: Handle any unexpected payload.
+ * @fatal requests that we quit talking to the client, even if we are able
+ * to successfully send an error to the guest.
+ * Return:
+ * -errno  transmission error occurred or @fatal was requested, errp is set
+ * 0   error message successfully sent to client, errp is not set
+ */
+static int nbd_reject_length(NBDClient *client, uint32_t length,
+ uint32_t option, bool fatal, Error **errp)
+{
+int ret;
+
+assert(length);
+if (nbd_drop(client->ioc, length, errp) < 0) {
+return -EIO;
+}
+ret = nbd_negotiate_send_rep_err(client->ioc, NBD_REP_ERR_INVALID,
+ option, errp,
+ "option '%s' should have zero length",
+ nbd_opt_lookup(option));
+if (fatal && !ret) {
+error_setg(errp, "option '%s' should have zero length",
+   nbd_opt_lookup(option));
+return -EINVAL;
+}
+return ret;
+}
+
 /* nbd_negotiate_options
  * Process all NBD_OPT_* client option commands, during fixed newstyle
  * negotiation.
@@ -674,7 +681,13 @@ static int nbd_negotiate_options(NBDClient *client, 
uint16_t myflags,
 }
 switch (option) {
 case NBD_OPT_STARTTLS:
-tioc = nbd_negotiate_handle_starttls(client, length, errp);
+if (length) {
+/* Unconditionally drop the connection if the client
+ * can't start a TLS negotiation correctly */
+return nbd_reject_length(client, length, option, true,
+ errp);
+}
+tioc =

[Qemu-block] [PULL 03/12] nbd: Expose constants and structs for structured read

2017-10-30 Thread Eric Blake

Upcoming patches will implement the NBD structured reply
extension [1] for both client and server roles.  Declare the
constants, structs, and lookup routines that will be valuable
whether the server or client code is backported in isolation.

This includes moving one constant from an internal header to
the public header, as part of the structured read processing
will be done in block/nbd-client.c rather than nbd/client.c.

[1]https://github.com/NetworkBlockDevice/nbd/blob/extension-structured-reply/doc/proto.md

Based on patches from Vladimir Sementsov-Ogievskiy.

Signed-off-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20171027104037.8319-4-ebl...@redhat.com>
---
 include/block/nbd.h | 41 +
 nbd/nbd-internal.h  |  2 +-
 nbd/common.c| 27 +++
 nbd/server.c|  2 ++
 4 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index dc62b5cd19..225e9575e4 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -69,6 +69,28 @@ typedef struct NBDSimpleReply {
 uint64_t handle;
 } QEMU_PACKED NBDSimpleReply;

+/* Header of all structured replies */
+typedef struct NBDStructuredReplyChunk {
+uint32_t magic;  /* NBD_STRUCTURED_REPLY_MAGIC */
+uint16_t flags;  /* combination of NBD_REPLY_FLAG_* */
+uint16_t type;   /* NBD_REPLY_TYPE_* */
+uint64_t handle; /* request handle */
+uint32_t length; /* length of payload */
+} QEMU_PACKED NBDStructuredReplyChunk;
+
+/* Header of NBD_REPLY_TYPE_OFFSET_DATA, complete NBD_REPLY_TYPE_OFFSET_HOLE */
+typedef struct NBDStructuredRead {
+NBDStructuredReplyChunk h;
+uint64_t offset;
+} QEMU_PACKED NBDStructuredRead;
+
+/* Header of all NBD_REPLY_TYPE_ERROR* errors */
+typedef struct NBDStructuredError {
+NBDStructuredReplyChunk h;
+uint32_t error;
+uint16_t message_length;
+} QEMU_PACKED NBDStructuredError;
+
 /* Transmission (export) flags: sent from server to client during handshake,
but describe what will happen during transmission */
 #define NBD_FLAG_HAS_FLAGS (1 << 0) /* Flags are there */
@@ -79,6 +101,7 @@ typedef struct NBDSimpleReply {
rotational media */
 #define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */
 #define NBD_FLAG_SEND_WRITE_ZEROES (1 << 6) /* Send WRITE_ZEROES */
+#define NBD_FLAG_SEND_DF   (1 << 7) /* Send DF (Do not Fragment) */

 /* New-style handshake (global) flags, sent from server to client, and
control what will happen during handshake phase. */
@@ -125,6 +148,7 @@ typedef struct NBDSimpleReply {
 /* Request flags, sent from client to server during transmission phase */
 #define NBD_CMD_FLAG_FUA(1 << 0) /* 'force unit access' during write */
 #define NBD_CMD_FLAG_NO_HOLE(1 << 1) /* don't punch hole on zero run */
+#define NBD_CMD_FLAG_DF (1 << 2) /* don't fragment structured read */

 /* Supported request types */
 enum {
@@ -149,6 +173,22 @@ enum {
  * aren't overflowing some other buffer. */
 #define NBD_MAX_NAME_SIZE 256

+/* Two types of reply structures */
+#define NBD_SIMPLE_REPLY_MAGIC  0x67446698
+#define NBD_STRUCTURED_REPLY_MAGIC  0x668e33ef
+
+/* Structured reply flags */
+#define NBD_REPLY_FLAG_DONE  (1 << 0) /* This reply-chunk is last */
+
+/* Structured reply types */
+#define NBD_REPLY_ERR(value) ((1 << 15) | (value))
+
+#define NBD_REPLY_TYPE_NONE  0
+#define NBD_REPLY_TYPE_OFFSET_DATA   1
+#define NBD_REPLY_TYPE_OFFSET_HOLE   2
+#define NBD_REPLY_TYPE_ERROR NBD_REPLY_ERR(1)
+#define NBD_REPLY_TYPE_ERROR_OFFSET  NBD_REPLY_ERR(2)
+
 /* NBD errors are based on errno numbers, so there is a 1:1 mapping,
  * but only a limited set of errno values is specified in the protocol.
  * Everything else is squashed to EINVAL.
@@ -159,6 +199,7 @@ enum {
 #define NBD_ENOMEM 12
 #define NBD_EINVAL 22
 #define NBD_ENOSPC 28
+#define NBD_EOVERFLOW  75
 #define NBD_ESHUTDOWN  108

 /* Details collected by NBD_OPT_EXPORT_NAME and NBD_OPT_GO */
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index df6c8b2f24..4f24d6e57d 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -47,7 +47,6 @@
 #define NBD_OLDSTYLE_NEGOTIATE_SIZE (8 + 8 + 8 + 4 + 124)

 #define NBD_REQUEST_MAGIC   0x25609513
-#define NBD_SIMPLE_REPLY_MAGIC  0x67446698
 #define NBD_OPTS_MAGIC  0x49484156454F5054LL
 #define NBD_CLIENT_MAGIC0x420281861253LL
 #define NBD_REP_MAGIC   0x0003e889045565a9LL
@@ -114,6 +113,7 @@ const char *nbd_opt_lookup(uint32_t opt);
 const char *nbd_rep_lookup(uint32_t rep);
 const char *nbd_info_lookup(uint16_t info);
 const char *nbd_cmd_lookup(uint16_t info);
+const char *nbd_reply_type_lookup(uint16_t type);
 const char *nbd_err_lookup(int err);

 int nbd_drop(QIOChannel *ioc, size_t size, Error **errp);

[Qemu-block] [PULL 02/12] nbd: Move nbd_errno_to_system_errno() to public header

2017-10-30 Thread Eric Blake

This is needed in preparation for structured reply handling,
as we will be performing the translation from NBD error to
system errno value higher in the stack at block/nbd-client.c.

Signed-off-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20171027104037.8319-3-ebl...@redhat.com>
---
 include/block/nbd.h | 13 +
 nbd/nbd-internal.h  | 12 
 nbd/client.c| 32 
 nbd/common.c| 34 ++
 nbd/trace-events|  4 +++-
 5 files changed, 50 insertions(+), 45 deletions(-)

diff --git a/include/block/nbd.h b/include/block/nbd.h
index a6df5ce8b5..dc62b5cd19 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -149,6 +149,18 @@ enum {
  * aren't overflowing some other buffer. */
 #define NBD_MAX_NAME_SIZE 256

+/* NBD errors are based on errno numbers, so there is a 1:1 mapping,
+ * but only a limited set of errno values is specified in the protocol.
+ * Everything else is squashed to EINVAL.
+ */
+#define NBD_SUCCESS0
+#define NBD_EPERM  1
+#define NBD_EIO5
+#define NBD_ENOMEM 12
+#define NBD_EINVAL 22
+#define NBD_ENOSPC 28
+#define NBD_ESHUTDOWN  108
+
 /* Details collected by NBD_OPT_EXPORT_NAME and NBD_OPT_GO */
 struct NBDExportInfo {
 /* Set by client before nbd_receive_negotiate() */
@@ -172,6 +184,7 @@ int nbd_send_request(QIOChannel *ioc, NBDRequest *request);
 int nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp);
 int nbd_client(int fd);
 int nbd_disconnect(int fd);
+int nbd_errno_to_system_errno(int err);

 typedef struct NBDExport NBDExport;
 typedef struct NBDClient NBDClient;
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index 4bfe5be884..df6c8b2f24 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -64,18 +64,6 @@
 #define NBD_SET_TIMEOUT _IO(0xab, 9)
 #define NBD_SET_FLAGS   _IO(0xab, 10)

-/* NBD errors are based on errno numbers, so there is a 1:1 mapping,
- * but only a limited set of errno values is specified in the protocol.
- * Everything else is squashed to EINVAL.
- */
-#define NBD_SUCCESS0
-#define NBD_EPERM  1
-#define NBD_EIO5
-#define NBD_ENOMEM 12
-#define NBD_EINVAL 22
-#define NBD_ENOSPC 28
-#define NBD_ESHUTDOWN  108
-
 /* nbd_read_eof
  * Tries to read @size bytes from @ioc.
  * Returns 1 on success
diff --git a/nbd/client.c b/nbd/client.c
index 59d7c9d49f..50f36b511e 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -22,38 +22,6 @@
 #include "trace.h"
 #include "nbd-internal.h"

-static int nbd_errno_to_system_errno(int err)
-{
-int ret;
-switch (err) {
-case NBD_SUCCESS:
-ret = 0;
-break;
-case NBD_EPERM:
-ret = EPERM;
-break;
-case NBD_EIO:
-ret = EIO;
-break;
-case NBD_ENOMEM:
-ret = ENOMEM;
-break;
-case NBD_ENOSPC:
-ret = ENOSPC;
-break;
-case NBD_ESHUTDOWN:
-ret = ESHUTDOWN;
-break;
-default:
-trace_nbd_unknown_error(err);
-/* fallthrough */
-case NBD_EINVAL:
-ret = EINVAL;
-break;
-}
-return ret;
-}
-
 /* Definitions for opaque data types */

 static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
diff --git a/nbd/common.c b/nbd/common.c
index 7456021f7e..593904f148 100644
--- a/nbd/common.c
+++ b/nbd/common.c
@@ -18,6 +18,7 @@

 #include "qemu/osdep.h"
 #include "qapi/error.h"
+#include "trace.h"
 #include "nbd-internal.h"

 /* Discard length bytes from channel.  Return -errno on failure and 0 on
@@ -171,3 +172,36 @@ const char *nbd_err_lookup(int err)
 return "";
 }
 }
+
+
+int nbd_errno_to_system_errno(int err)
+{
+int ret;
+switch (err) {
+case NBD_SUCCESS:
+ret = 0;
+break;
+case NBD_EPERM:
+ret = EPERM;
+break;
+case NBD_EIO:
+ret = EIO;
+break;
+case NBD_ENOMEM:
+ret = ENOMEM;
+break;
+case NBD_ENOSPC:
+ret = ENOSPC;
+break;
+case NBD_ESHUTDOWN:
+ret = ESHUTDOWN;
+break;
+default:
+trace_nbd_unknown_error(err);
+/* fallthrough */
+case NBD_EINVAL:
+ret = EINVAL;
+break;
+}
+return ret;
+}
diff --git a/nbd/trace-events b/nbd/trace-events
index 920c8a0e5e..ab3d7dad4f 100644
--- a/nbd/trace-events
+++ b/nbd/trace-events
@@ -1,5 +1,4 @@
 # nbd/client.c
-nbd_unknown_error(int err) "Squashing unexpected error %d to EINVAL"
 nbd_send_option_request(uint32_t opt, const char *name, uint32_t len) "Sending 
option request %" PRIu32" (%s), len %" PRIu32
 nbd_receive_option_reply(uint32_t option, const char *optname, uint32_t type, 
const char *typename, uint32_t length) "Received option reply 0x%" PRIx32" 
(%s), type 0x%" PRIx32" (%s), len %" PRIu32
 nbd_reply_err_unsup(uint32_t option, const char *name) "server

[Qemu-block] [PULL 12/12] nbd: Minimal structured read for client

2017-10-30 Thread Eric Blake

From: Vladimir Sementsov-Ogievskiy 

Minimal implementation: for structured error only error_report error
message.

Note that test 83 is now more verbose, because the implementation
prints more warnings about unexpected communication errors; perhaps
future patches should tone things down by using trace messages
instead of traces, but the common case of successful communication
is no noisier than before.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Eric Blake 
Message-Id: <20171027104037.8319-13-ebl...@redhat.com>
---
 block/nbd-client.h |   1 +
 include/block/nbd.h|  12 ++
 nbd/nbd-internal.h |   1 -
 block/nbd-client.c | 490 ++---
 nbd/client.c   |  12 ++
 tests/qemu-iotests/083.out |  15 ++
 6 files changed, 498 insertions(+), 33 deletions(-)

diff --git a/block/nbd-client.h b/block/nbd-client.h
index b435754b82..612c4c21a0 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -19,6 +19,7 @@

 typedef struct {
 Coroutine *coroutine;
+uint64_t offset;/* original offset of the request */
 bool receiving; /* waiting for read_reply_co? */
 } NBDClientRequest;

diff --git a/include/block/nbd.h b/include/block/nbd.h
index da6e305dd5..92d1723d7c 100644
--- a/include/block/nbd.h
+++ b/include/block/nbd.h
@@ -197,6 +197,11 @@ enum {
 #define NBD_REPLY_TYPE_ERROR NBD_REPLY_ERR(1)
 #define NBD_REPLY_TYPE_ERROR_OFFSET  NBD_REPLY_ERR(2)

+static inline bool nbd_reply_type_is_error(int type)
+{
+return type & (1 << 15);
+}
+
 /* NBD errors are based on errno numbers, so there is a 1:1 mapping,
  * but only a limited set of errno values is specified in the protocol.
  * Everything else is squashed to EINVAL.
@@ -214,6 +219,11 @@ enum {
 struct NBDExportInfo {
 /* Set by client before nbd_receive_negotiate() */
 bool request_sizes;
+
+/* In-out fields, set by client before nbd_receive_negotiate() and
+ * updated by server results during nbd_receive_negotiate() */
+bool structured_reply;
+
 /* Set by server results during nbd_receive_negotiate() */
 uint64_t size;
 uint16_t flags;
@@ -284,4 +294,6 @@ static inline bool nbd_reply_is_structured(NBDReply *reply)
 return reply->magic == NBD_STRUCTURED_REPLY_MAGIC;
 }

+const char *nbd_reply_type_lookup(uint16_t type);
+
 #endif
diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index b64eb1cc9b..eeff78d3c9 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -104,7 +104,6 @@ const char *nbd_opt_lookup(uint32_t opt);
 const char *nbd_rep_lookup(uint32_t rep);
 const char *nbd_info_lookup(uint16_t info);
 const char *nbd_cmd_lookup(uint16_t info);
-const char *nbd_reply_type_lookup(uint16_t type);
 const char *nbd_err_lookup(int err);

 int nbd_drop(QIOChannel *ioc, size_t size, Error **errp);
diff --git a/block/nbd-client.c b/block/nbd-client.c
index 58493b7ac4..b44d4d4a01 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -93,7 +93,7 @@ static coroutine_fn void nbd_read_reply_entry(void *opaque)
 if (i >= MAX_NBD_REQUESTS ||
 !s->requests[i].coroutine ||
 !s->requests[i].receiving ||
-nbd_reply_is_structured(>reply))
+(nbd_reply_is_structured(>reply) && !s->info.structured_reply))
 {
 break;
 }
@@ -141,6 +141,7 @@ static int nbd_co_send_request(BlockDriverState *bs,
 assert(i < MAX_NBD_REQUESTS);

 s->requests[i].coroutine = qemu_coroutine_self();
+s->requests[i].offset = request->from;
 s->requests[i].receiving = false;

 request->handle = INDEX_TO_HANDLE(s, i);
@@ -181,75 +182,489 @@ err:
 return rc;
 }

-static int nbd_co_receive_reply(NBDClientSession *s,
-uint64_t handle,
-QEMUIOVector *qiov)
+static inline uint16_t payload_advance16(uint8_t **payload)
+{
+*payload += 2;
+return lduw_be_p(*payload - 2);
+}
+
+static inline uint32_t payload_advance32(uint8_t **payload)
+{
+*payload += 4;
+return ldl_be_p(*payload - 4);
+}
+
+static inline uint64_t payload_advance64(uint8_t **payload)
+{
+*payload += 8;
+return ldq_be_p(*payload - 8);
+}
+
+static int nbd_parse_offset_hole_payload(NBDStructuredReplyChunk *chunk,
+ uint8_t *payload, uint64_t 
orig_offset,
+ QEMUIOVector *qiov, Error **errp)
+{
+uint64_t offset;
+uint32_t hole_size;
+
+if (chunk->length != sizeof(offset) + sizeof(hole_size)) {
+error_setg(errp, "Protocol error: invalid payload for "
+ "NBD_REPLY_TYPE_OFFSET_HOLE");
+return -EINVAL;
+}
+
+offset = payload_advance64();
+hole_size = payload_advance32();
+
+if (offset < orig_offset || hole_size > qiov->size ||
+offset > orig_offset + qiov->size -

[Qemu-block] [PULL 01/12] nbd: Include error names in trace messages

2017-10-30 Thread Eric Blake

NBD errors were originally sent over the wire based on Linux errno
values; but not all the world is Linux, and not all platforms share
the same values.  Since a number isn't very easy to decipher on all
platforms, update the trace messages to include the name of NBD
errors being sent/received over the wire.  Tweak the trace messages
to be at the point where we are using the NBD error, not the
translation to the host errno values.

Signed-off-by: Eric Blake 
Reviewed-by: Vladimir Sementsov-Ogievskiy 
Message-Id: <20171027104037.8319-2-ebl...@redhat.com>
---
 nbd/nbd-internal.h |  1 +
 nbd/client.c   |  3 ++-
 nbd/common.c   | 23 +++
 nbd/server.c   |  3 ++-
 nbd/trace-events   |  4 ++--
 5 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/nbd/nbd-internal.h b/nbd/nbd-internal.h
index 11a130d050..4bfe5be884 100644
--- a/nbd/nbd-internal.h
+++ b/nbd/nbd-internal.h
@@ -126,6 +126,7 @@ const char *nbd_opt_lookup(uint32_t opt);
 const char *nbd_rep_lookup(uint32_t rep);
 const char *nbd_info_lookup(uint16_t info);
 const char *nbd_cmd_lookup(uint16_t info);
+const char *nbd_err_lookup(int err);

 int nbd_drop(QIOChannel *ioc, size_t size, Error **errp);

diff --git a/nbd/client.c b/nbd/client.c
index cd5a2c80ac..59d7c9d49f 100644
--- a/nbd/client.c
+++ b/nbd/client.c
@@ -940,6 +940,8 @@ int nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, 
Error **errp)
 reply->error  = ldl_be_p(buf + 4);
 reply->handle = ldq_be_p(buf + 8);

+trace_nbd_receive_reply(magic, reply->error, nbd_err_lookup(reply->error),
+reply->handle);
 reply->error = nbd_errno_to_system_errno(reply->error);

 if (reply->error == ESHUTDOWN) {
@@ -947,7 +949,6 @@ int nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, 
Error **errp)
 error_setg(errp, "server shutting down");
 return -EINVAL;
 }
-trace_nbd_receive_reply(magic, reply->error, reply->handle);

 if (magic != NBD_SIMPLE_REPLY_MAGIC) {
 error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
diff --git a/nbd/common.c b/nbd/common.c
index 59a5316be9..7456021f7e 100644
--- a/nbd/common.c
+++ b/nbd/common.c
@@ -148,3 +148,26 @@ const char *nbd_cmd_lookup(uint16_t cmd)
 return "";
 }
 }
+
+
+const char *nbd_err_lookup(int err)
+{
+switch (err) {
+case NBD_SUCCESS:
+return "success";
+case NBD_EPERM:
+return "EPERM";
+case NBD_EIO:
+return "EIO";
+case NBD_ENOMEM:
+return "ENOMEM";
+case NBD_EINVAL:
+return "EINVAL";
+case NBD_ENOSPC:
+return "ENOSPC";
+case NBD_ESHUTDOWN:
+return "ESHUTDOWN";
+default:
+return "";
+}
+}
diff --git a/nbd/server.c b/nbd/server.c
index 3df3548d6d..459e00c553 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1227,7 +1227,8 @@ static int nbd_co_send_simple_reply(NBDClient *client,
 {.iov_base = data, .iov_len = len}
 };

-trace_nbd_co_send_simple_reply(handle, nbd_err, len);
+trace_nbd_co_send_simple_reply(handle, nbd_err, nbd_err_lookup(nbd_err),
+   len);
 set_be_simple_reply(, nbd_err, handle);

 return nbd_co_send_iov(client, iov, len ? 2 : 1, errp);
diff --git a/nbd/trace-events b/nbd/trace-events
index e27614f050..920c8a0e5e 100644
--- a/nbd/trace-events
+++ b/nbd/trace-events
@@ -29,7 +29,7 @@ nbd_client_loop_ret(int ret, const char *error) "NBD loop 
returned %d: %s"
 nbd_client_clear_queue(void) "Clearing NBD queue"
 nbd_client_clear_socket(void) "Clearing NBD socket"
 nbd_send_request(uint64_t from, uint32_t len, uint64_t handle, uint16_t flags, 
uint16_t type, const char *name) "Sending request to server: { .from = %" 
PRIu64", .len = %" PRIu32 ", .handle = %" PRIu64 ", .flags = 0x%" PRIx16 ", 
.type = %" PRIu16 " (%s) }"
-nbd_receive_reply(uint32_t magic, int32_t error, uint64_t handle) "Got reply: 
{ magic = 0x%" PRIx32 ", .error = % " PRId32 ", handle = %" PRIu64" }"
+nbd_receive_reply(uint32_t magic, int32_t error, const char *errname, uint64_t 
handle) "Got reply: { magic = 0x%" PRIx32 ", .error = %" PRId32 " (%s), handle 
= %" PRIu64" }"

 # nbd/server.c
 nbd_negotiate_send_rep_len(uint32_t opt, const char *optname, uint32_t type, 
const char *typename, uint32_t len) "Reply opt=0x%" PRIx32 " (%s), type=0x%" 
PRIx32 " (%s), len=%" PRIu32
@@ -53,7 +53,7 @@ nbd_negotiate_success(void) "Negotiation succeeded"
 nbd_receive_request(uint32_t magic, uint16_t flags, uint16_t type, uint64_t 
from, uint32_t len) "Got request: { magic = 0x%" PRIx32 ", .flags = 0x%" PRIx16 
", .type = 0x%" PRIx16 ", from = %" PRIu64 ", len = %" PRIu32 " }"
 nbd_blk_aio_attached(const char *name, void *ctx) "Export %s: Attaching 
clients to AIO context %p\n"
 nbd_blk_aio_detach(const char *name, void *ctx) "Export %s: Detaching clients 
from AIO context %p\n"
-nbd_co_send_simple_reply(uint64_t handle, uint32_t error, int len)

Re: [Qemu-block] [PATCH v6 06/12] nbd/server: Refactor zero-length option check

2017-10-30 Thread Eric Blake

On 10/30/2017 09:11 PM, Eric Blake wrote:
> On 10/30/2017 06:22 PM, Vladimir Sementsov-Ogievskiy wrote:
>> 27.10.2017 13:40, Eric Blake wrote:
>>> Consolidate the response for a non-zero-length option payload
>>> into a new function, nbd_reject_length().  This check will
>>> also be used when introducing support for structured replies.
>>>

>>> +    if (length) {
>>> +    /* Unconditionally drop the connection if the client
>>> + * can't start a TLS negotiation correctly */
>>> +    nbd_reject_length(client, length, option, true,
>>> errp);
>>> +    return -EINVAL;
>>
>> why not to return nbd_reject_length's result? this EINVAL may not
>> correspond to errp (about EIO for example)..
> 
> Somewhat true, if nbd_reject_length() fails. But nbd_reject_length() may
> also return 0 without setting errp, in which case, maybe this code
> should set errp rather than just blindly returning -EINVAL.
> 
>>
>> with or without this fixed:
>>
>> Reviewed-by: Vladimir Sementsov-Ogievskiy 
>>
> 
> Okay, I'll squash this in, and include it in my pull request to be sent
> shortly.

D'oh. Long week for me. The whole reason I added a 'bool fatal'
parameter was so that I don't have to worry about nbd_reject_length()
returning 0.  So it is instead better to just do:

> --- i/nbd/server.c
> +++ w/nbd/server.c
> @@ -684,8 +684,13 @@ static int nbd_negotiate_options(NBDClient *client,
> uint16_t myflags,
>  if (length) {
>  /* Unconditionally drop the connection if the client
>   * can't start a TLS negotiation correctly */
> -nbd_reject_length(client, length, option, true, errp);
> -return -EINVAL;
> +return nbd_reject_length(client, length, option, true,
> + errp);

rather than repeating an error message.

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-block] [PATCH v6 06/12] nbd/server: Refactor zero-length option check

2017-10-30 Thread Eric Blake

On 10/30/2017 06:22 PM, Vladimir Sementsov-Ogievskiy wrote:
> 27.10.2017 13:40, Eric Blake wrote:
>> Consolidate the response for a non-zero-length option payload
>> into a new function, nbd_reject_length().  This check will
>> also be used when introducing support for structured replies.
>>
>> Note that STARTTLS response differs based on time: if the connection
>> is still unencrypted, we set fatal to true (a client that can't
>> request TLS correctly may still think that we are ready to start
>> the TLS handshake, so we must disconnect); while if the connection
>> is already encrypted, the client is sending a bogus request but
>> is no longer at risk of being confused by continuing the connection.
>>

>>   switch (option) {
>>   case NBD_OPT_STARTTLS:
>> -    tioc = nbd_negotiate_handle_starttls(client, length,
>> errp);
>> +    if (length) {
>> +    /* Unconditionally drop the connection if the client
>> + * can't start a TLS negotiation correctly */
>> +    nbd_reject_length(client, length, option, true,
>> errp);
>> +    return -EINVAL;
> 
> why not to return nbd_reject_length's result? this EINVAL may not
> correspond to errp (about EIO for example)..

Somewhat true, if nbd_reject_length() fails. But nbd_reject_length() may
also return 0 without setting errp, in which case, maybe this code
should set errp rather than just blindly returning -EINVAL.

> 
> with or without this fixed:
> 
> Reviewed-by: Vladimir Sementsov-Ogievskiy 
> 

Okay, I'll squash this in, and include it in my pull request to be sent
shortly.

diff --git i/nbd/server.c w/nbd/server.c
index a9480e42cd..91f81a0f19 100644
--- i/nbd/server.c
+++ w/nbd/server.c
@@ -684,8 +684,13 @@ static int nbd_negotiate_options(NBDClient *client,
uint16_t myflags,
 if (length) {
 /* Unconditionally drop the connection if the client
  * can't start a TLS negotiation correctly */
-nbd_reject_length(client, length, option, true, errp);
-return -EINVAL;
+ret = nbd_reject_length(client, length, option,
true, errp);
+if (!ret) {
+error_setg(errp, "option '%s' should have zero
length",
+   nbd_opt_lookup(option));
+ret = -EINVAL;
+}
+return ret;
 }
 tioc = nbd_negotiate_handle_starttls(client, errp);
 if (!tioc) {


-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org



signature.asc
Description: OpenPGP digital signature

Re: [Qemu-block] [PATCH v8 00/14] Dirty bitmaps postcopy migration

2017-10-30 Thread Vladimir Sementsov-Ogievskiy


30.10.2017 19:32, Vladimir Sementsov-Ogievskiy wrote:

Hi all!

There is a new version of dirty bitmap postcopy migration series.

v8

clone: tag postcopy-v8 from https://src.openvz.org/scm/~vsementsov/qemu.git
online: https://src.openvz.org/users/vsementsov/repos/qemu/browse?at=postcopy-v8


compilation is broken for s390, so updated version is:

clone: tag postcopy-v8.1 from https://src.openvz.org/scm/~vsementsov/qemu.git
online: 
https://src.openvz.org/users/vsementsov/repos/qemu/browse?at=postcopy-v8.1



--
Best regards,
Vladimir

[Qemu-block] [PATCH v8.1 05/14] migration: introduce postcopy-only pending

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

There would be savevm states (dirty-bitmap) which can migrate only in
postcopy stage. The corresponding pending is introduced here.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Juan Quintela 
---

8.1: add new version for cmma_save_pending too.


 include/migration/register.h | 17 +++--
 migration/savevm.h   |  5 +++--
 hw/s390x/s390-stattrib.c |  7 ---
 migration/block.c|  7 ---
 migration/migration.c| 15 ---
 migration/ram.c  |  9 +
 migration/savevm.c   | 13 -
 migration/trace-events   |  2 +-
 8 files changed, 48 insertions(+), 27 deletions(-)

diff --git a/include/migration/register.h b/include/migration/register.h
index f4f7bdc177..9436a87678 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -37,8 +37,21 @@ typedef struct SaveVMHandlers {
 int (*save_setup)(QEMUFile *f, void *opaque);
 void (*save_live_pending)(QEMUFile *f, void *opaque,
   uint64_t threshold_size,
-  uint64_t *non_postcopiable_pending,
-  uint64_t *postcopiable_pending);
+  uint64_t *res_precopy_only,
+  uint64_t *res_compatible,
+  uint64_t *res_postcopy_only);
+/* Note for save_live_pending:
+ * - res_precopy_only is for data which must be migrated in precopy phase
+ * or in stopped state, in other words - before target vm start
+ * - res_compatible is for data which may be migrated in any phase
+ * - res_postcopy_only is for data which must be migrated in postcopy phase
+ * or in stopped state, in other words - after source vm stop
+ *
+ * Sum of res_postcopy_only, res_compatible and res_postcopy_only is the
+ * whole amount of pending data.
+ */
+
+
 LoadStateHandler *load_state;
 int (*load_setup)(QEMUFile *f, void *opaque);
 int (*load_cleanup)(void *opaque);
diff --git a/migration/savevm.h b/migration/savevm.h
index 295c4a1f2c..cf4f0d37ca 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -38,8 +38,9 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f);
 int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
bool inactivate_disks);
 void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
-   uint64_t *res_non_postcopiable,
-   uint64_t *res_postcopiable);
+   uint64_t *res_precopy_only,
+   uint64_t *res_compatible,
+   uint64_t *res_postcopy_only);
 void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
 void qemu_savevm_send_open_return_path(QEMUFile *f);
 int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len);
diff --git a/hw/s390x/s390-stattrib.c b/hw/s390x/s390-stattrib.c
index 2902f54f11..dd3fbfd1eb 100644
--- a/hw/s390x/s390-stattrib.c
+++ b/hw/s390x/s390-stattrib.c
@@ -183,15 +183,16 @@ static int cmma_save_setup(QEMUFile *f, void *opaque)
 }
 
 static void cmma_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
- uint64_t *non_postcopiable_pending,
- uint64_t *postcopiable_pending)
+  uint64_t *res_precopy_only,
+  uint64_t *res_compatible,
+  uint64_t *res_postcopy_only)
 {
 S390StAttribState *sas = S390_STATTRIB(opaque);
 S390StAttribClass *sac = S390_STATTRIB_GET_CLASS(sas);
 long long res = sac->get_dirtycount(sas);
 
 if (res >= 0) {
-*non_postcopiable_pending += res;
+*res_precopy_only += res;
 }
 }
 
diff --git a/migration/block.c b/migration/block.c
index 3282809583..39dfa567e8 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -865,8 +865,9 @@ static int block_save_complete(QEMUFile *f, void *opaque)
 }
 
 static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
-   uint64_t *non_postcopiable_pending,
-   uint64_t *postcopiable_pending)
+   uint64_t *res_precopy_only,
+   uint64_t *res_compatible,
+   uint64_t *res_postcopy_only)
 {
 /* Estimate pending number of bytes to send */
 uint64_t pending;
@@ -887,7 +888,7 @@ static void block_save_pending(QEMUFile *f, void *opaque, 
uint64_t max_size,
 
 DPRINTF("Enter save live pending  %" PRIu64 "\n", pending);
 /* We don't do postcopy */
-*non_postcopiable_pending += pending;
+*res_precopy_only += pending;
 }
 
 static int block_load(QEMUFile *f, void *opaque, int version_id)
diff --git a/migration/migration.c

Re: [Qemu-block] [PATCH v8 05/14] migration: introduce postcopy-only pending

2017-10-30 Thread Vladimir Sementsov-Ogievskiy


30.10.2017 20:31, Dr. David Alan Gilbert wrote:

* Vladimir Sementsov-Ogievskiy (vsement...@virtuozzo.com) wrote:

There would be savevm states (dirty-bitmap) which can migrate only in
postcopy stage. The corresponding pending is introduced here.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Juan Quintela 

Note the error patchew picked up on this for s390 which has
cmma_save_pending.

Dave


Thanks for pointing here, I'll update the patch.



--
Best regards,
Vladimir

Re: [Qemu-block] [RFC PATCH v6 13/12] tweak test 83 verbosity

2017-10-30 Thread Vladimir Sementsov-Ogievskiy


27.10.2017 13:45, Eric Blake wrote:

Commenting these two lines is enough to avoid the change to 083.out
in 12/12.  That is evidence that we may want these two lines to be
trace points rather than error messages; or maybe we really do like
the extra verbosity in the case of an unexpected communication break.

This patch does not meet coding guidelines, and I'm not proud enough
of it to give S-o-b, but I'm posting it for conversation.


I think more verbosity on fail is not bad, it's rare case. In previous patch
the corresponding change looks big, but in real case it would be just one
more line in log.

However, if you unsure about it, people who want more verbosity may always
enable this particular trace, so tracing is ok too and more flexible.



---
  block/nbd-client.c | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index b44d4d4a01..e063b3fbc0 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -654,7 +654,8 @@ static int nbd_co_request(BlockDriverState *bs, NBDRequest 
*request,

  ret = nbd_co_receive_return_code(client, request->handle, _err);
  if (local_err) {
-error_report_err(local_err);
+assert(ret < 0);
+//error_report_err(local_err);
  }
  return ret;
  }
@@ -682,7 +683,7 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t 
offset,
  ret = nbd_co_receive_cmdread_reply(client, request.handle, offset, qiov,
 _err);
  if (ret < 0) {
-error_report_err(local_err);
+//error_report_err(local_err);
  }
  return ret;
  }



--
Best regards,
Vladimir

Re: [Qemu-block] [PATCH v6 12/12] nbd: Minimal structured read for client

2017-10-30 Thread Vladimir Sementsov-Ogievskiy


27.10.2017 13:40, Eric Blake wrote:

From: Vladimir Sementsov-Ogievskiy 

Minimal implementation: for structured error only error_report error
message.

Note that test 83 is now more verbose, because the implementation
prints more warnings about unexpected communication errors; perhaps
future patches should tone things down by using trace messages
instead of traces, but the common case of successful communication
is no noisier than before.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Eric Blake 


ok for me, thank you!



---
v6: tweak overflow check [Vladimir], fix reads to use absolute offset
from server by tracking original offset, fix talking to old-style server,
tweak iotest 83 output to account for new verbosity
v5: fix payload_advance[32,64], return correct negative error on
structured error, rearrange size checks to not be vulnerable to
overflow, simplify payload to use g_new instead of qemu_memalign,
don't set errp when returning 0, validate that error message
length is sane



--
Best regards,
Vladimir

Re: [Qemu-block] [PATCH v6 09/12] nbd/client: refactor nbd_receive_starttls

2017-10-30 Thread Vladimir Sementsov-Ogievskiy


27.10.2017 13:40, Eric Blake wrote:

From: Vladimir Sementsov-Ogievskiy 

Split out nbd_request_simple_option to be reused for structured reply
option.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Eric Blake 



ok for me.



---
v6: error message tweak [Vladimir]
v5: only check length for ACK responses
v4: reduce redundant traces, typo fix in commit message
---



--
Best regards,
Vladimir

Re: [Qemu-block] [PATCH v8 05/14] migration: introduce postcopy-only pending

2017-10-30 Thread Dr. David Alan Gilbert

* Vladimir Sementsov-Ogievskiy (vsement...@virtuozzo.com) wrote:
> There would be savevm states (dirty-bitmap) which can migrate only in
> postcopy stage. The corresponding pending is introduced here.
> 
> Signed-off-by: Vladimir Sementsov-Ogievskiy 
> Reviewed-by: Juan Quintela 

Note the error patchew picked up on this for s390 which has
cmma_save_pending.

Dave

> ---
>  include/migration/register.h | 17 +++--
>  migration/savevm.h   |  5 +++--
>  migration/block.c|  7 ---
>  migration/migration.c| 15 ---
>  migration/ram.c  |  9 +
>  migration/savevm.c   | 13 -
>  migration/trace-events   |  2 +-
>  7 files changed, 44 insertions(+), 24 deletions(-)
> 
> diff --git a/include/migration/register.h b/include/migration/register.h
> index f4f7bdc177..9436a87678 100644
> --- a/include/migration/register.h
> +++ b/include/migration/register.h
> @@ -37,8 +37,21 @@ typedef struct SaveVMHandlers {
>  int (*save_setup)(QEMUFile *f, void *opaque);
>  void (*save_live_pending)(QEMUFile *f, void *opaque,
>uint64_t threshold_size,
> -  uint64_t *non_postcopiable_pending,
> -  uint64_t *postcopiable_pending);
> +  uint64_t *res_precopy_only,
> +  uint64_t *res_compatible,
> +  uint64_t *res_postcopy_only);
> +/* Note for save_live_pending:
> + * - res_precopy_only is for data which must be migrated in precopy phase
> + * or in stopped state, in other words - before target vm start
> + * - res_compatible is for data which may be migrated in any phase
> + * - res_postcopy_only is for data which must be migrated in postcopy 
> phase
> + * or in stopped state, in other words - after source vm stop
> + *
> + * Sum of res_postcopy_only, res_compatible and res_postcopy_only is the
> + * whole amount of pending data.
> + */
> +
> +
>  LoadStateHandler *load_state;
>  int (*load_setup)(QEMUFile *f, void *opaque);
>  int (*load_cleanup)(void *opaque);
> diff --git a/migration/savevm.h b/migration/savevm.h
> index 295c4a1f2c..cf4f0d37ca 100644
> --- a/migration/savevm.h
> +++ b/migration/savevm.h
> @@ -38,8 +38,9 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f);
>  int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
> bool inactivate_disks);
>  void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
> -   uint64_t *res_non_postcopiable,
> -   uint64_t *res_postcopiable);
> +   uint64_t *res_precopy_only,
> +   uint64_t *res_compatible,
> +   uint64_t *res_postcopy_only);
>  void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
>  void qemu_savevm_send_open_return_path(QEMUFile *f);
>  int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len);
> diff --git a/migration/block.c b/migration/block.c
> index 3282809583..39dfa567e8 100644
> --- a/migration/block.c
> +++ b/migration/block.c
> @@ -865,8 +865,9 @@ static int block_save_complete(QEMUFile *f, void *opaque)
>  }
>  
>  static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
> -   uint64_t *non_postcopiable_pending,
> -   uint64_t *postcopiable_pending)
> +   uint64_t *res_precopy_only,
> +   uint64_t *res_compatible,
> +   uint64_t *res_postcopy_only)
>  {
>  /* Estimate pending number of bytes to send */
>  uint64_t pending;
> @@ -887,7 +888,7 @@ static void block_save_pending(QEMUFile *f, void *opaque, 
> uint64_t max_size,
>  
>  DPRINTF("Enter save live pending  %" PRIu64 "\n", pending);
>  /* We don't do postcopy */
> -*non_postcopiable_pending += pending;
> +*res_precopy_only += pending;
>  }
>  
>  static int block_load(QEMUFile *f, void *opaque, int version_id)
> diff --git a/migration/migration.c b/migration/migration.c
> index 4de3b551fe..e6c9be3cca 100644
> --- a/migration/migration.c
> +++ b/migration/migration.c
> @@ -2216,20 +2216,21 @@ static void *migration_thread(void *opaque)
>  uint64_t pending_size;
>  
>  if (!qemu_file_rate_limit(s->to_dst_file)) {
> -uint64_t pend_post, pend_nonpost;
> +uint64_t pend_pre, pend_compat, pend_post;
>  
> -qemu_savevm_state_pending(s->to_dst_file, threshold_size,
> -  _nonpost, _post);
> -pending_size = pend_nonpost + pend_post;
> +qemu_savevm_state_pending(s->to_dst_file, threshold_size, 
> _pre,
> +

Re: [Qemu-block] [PATCH v6 07/12] nbd: Minimal structured read for server

2017-10-30 Thread Vladimir Sementsov-Ogievskiy


27.10.2017 13:40, Eric Blake wrote:

From: Vladimir Sementsov-Ogievskiy 

Minimal implementation of structured read: one structured reply chunk,
no segmentation.
Minimal structured error implementation: no text message.
Support DF flag, but just ignore it, as there is no segmentation any
way.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Eric Blake 


ok for me.



--
Best regards,
Vladimir

Re: [Qemu-block] [PATCH v6 06/12] nbd/server: Refactor zero-length option check

2017-10-30 Thread Vladimir Sementsov-Ogievskiy


27.10.2017 13:40, Eric Blake wrote:

Consolidate the response for a non-zero-length option payload
into a new function, nbd_reject_length().  This check will
also be used when introducing support for structured replies.

Note that STARTTLS response differs based on time: if the connection
is still unencrypted, we set fatal to true (a client that can't
request TLS correctly may still think that we are ready to start
the TLS handshake, so we must disconnect); while if the connection
is already encrypted, the client is sending a bogus request but
is no longer at risk of being confused by continuing the connection.

Signed-off-by: Eric Blake 

---
v6: split, rework logic to avoid subtle regression on starttls [Vladimir]
v5: new patch
---
  nbd/server.c | 74 +---
  1 file changed, 46 insertions(+), 28 deletions(-)

diff --git a/nbd/server.c b/nbd/server.c
index 6af708662d..a98f5622c9 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -253,21 +253,10 @@ static int nbd_negotiate_send_rep_list(QIOChannel *ioc, 
NBDExport *exp,

  /* Process the NBD_OPT_LIST command, with a potential series of replies.
   * Return -errno on error, 0 on success. */
-static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length,
- Error **errp)
+static int nbd_negotiate_handle_list(NBDClient *client, Error **errp)
  {
  NBDExport *exp;

-if (length) {
-if (nbd_drop(client->ioc, length, errp) < 0) {
-return -EIO;
-}
-return nbd_negotiate_send_rep_err(client->ioc,
-  NBD_REP_ERR_INVALID, NBD_OPT_LIST,
-  errp,
-  "OPT_LIST should not have length");
-}
-
  /* For each export, send a NBD_REP_SERVER reply. */
  QTAILQ_FOREACH(exp, , next) {
  if (nbd_negotiate_send_rep_list(client->ioc, exp, errp)) {
@@ -531,7 +520,6 @@ static int nbd_negotiate_handle_info(NBDClient *client, 
uint32_t length,
  /* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
   * new channel for all further (now-encrypted) communication. */
  static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
- uint32_t length,
   Error **errp)
  {
  QIOChannel *ioc;
@@ -540,15 +528,6 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient 
*client,

  trace_nbd_negotiate_handle_starttls();
  ioc = client->ioc;
-if (length) {
-if (nbd_drop(ioc, length, errp) < 0) {
-return NULL;
-}
-nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS,
-   errp,
-   "OPT_STARTTLS should not have length");
-return NULL;
-}

  if (nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
 NBD_OPT_STARTTLS, errp) < 0) {
@@ -584,6 +563,34 @@ static QIOChannel *nbd_negotiate_handle_starttls(NBDClient 
*client,
  return QIO_CHANNEL(tioc);
  }

+/* nbd_reject_length: Handle any unexpected payload.
+ * @fatal requests that we quit talking to the client, even if we are able
+ * to successfully send an error to the guest.
+ * Return:
+ * -errno  transmission error occurred or @fatal was requested, errp is set
+ * 0   error message successfully sent to client, errp is not set
+ */
+static int nbd_reject_length(NBDClient *client, uint32_t length,
+ uint32_t option, bool fatal, Error **errp)
+{
+int ret;
+
+assert(length);
+if (nbd_drop(client->ioc, length, errp) < 0) {
+return -EIO;
+}
+ret = nbd_negotiate_send_rep_err(client->ioc, NBD_REP_ERR_INVALID,
+ option, errp,
+ "option '%s' should have zero length",
+ nbd_opt_lookup(option));
+if (fatal && !ret) {
+error_setg(errp, "option '%s' should have zero length",
+   nbd_opt_lookup(option));
+return -EINVAL;
+}
+return ret;
+}
+
  /* nbd_negotiate_options
   * Process all NBD_OPT_* client option commands, during fixed newstyle
   * negotiation.
@@ -674,7 +681,13 @@ static int nbd_negotiate_options(NBDClient *client, 
uint16_t myflags,
  }
  switch (option) {
  case NBD_OPT_STARTTLS:
-tioc = nbd_negotiate_handle_starttls(client, length, errp);
+if (length) {
+/* Unconditionally drop the connection if the client
+ * can't start a TLS negotiation correctly */
+nbd_reject_length(client, length, option, true, errp);
+return -EINVAL;


why not to return nbd_reject_length's result? this EINVAL may not

Re: [Qemu-block] [Qemu-devel] [PATCH v4] nvme: Add tracing

2017-10-30 Thread Philippe Mathieu-Daudé

Cc'ing Stefan and Trivial

On 10/30/2017 01:07 PM, Doug Gale wrote:
> From 0e27b5dca8f4f32a1b194e1b3544be77dd4f45d9 Mon Sep 17 00:00:00 2001
> From: Doug Gale 
> Date: Mon, 30 Oct 2017 09:28:43 -0400
> Subject: [PATCH] nvme: Add tracing
> 

^ to remove from commit description, maybe maintainer taking this can
cleanup.
> Add trace output for commands, errors, and undefined behavior.
> Add guest error log output for undefined behavior.
> Report invalid undefined accesses to MMIO.
> Annotate unlikely error checks with unlikely.
> 
> Signed-off-by: Doug Gale 

Reviewed-by: Philippe Mathieu-Daudé 

> ---
>  hw/block/nvme.c   | 349 
> ++
>  hw/block/trace-events |  93 ++
>  2 files changed, 390 insertions(+), 52 deletions(-)

[Qemu-block] [PATCH v8 10/14] migration: add postcopy migration of dirty bitmaps

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

Postcopy migration of dirty bitmaps. Only named dirty bitmaps,
associated with root nodes and non-root named nodes are migrated.

If destination qemu is already containing a dirty bitmap with the same name
as a migrated bitmap (for the same node), then, if their granularities are
the same the migration will be done, otherwise the error will be generated.

If destination qemu doesn't contain such bitmap it will be created.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/migration/misc.h   |   3 +
 migration/migration.h  |   3 +
 migration/block-dirty-bitmap.c | 734 +
 migration/migration.c  |   3 +
 migration/savevm.c |   2 +
 vl.c   |   1 +
 migration/Makefile.objs|   1 +
 migration/trace-events |  14 +
 8 files changed, 761 insertions(+)
 create mode 100644 migration/block-dirty-bitmap.c

diff --git a/include/migration/misc.h b/include/migration/misc.h
index c079b7771b..9cc539e232 100644
--- a/include/migration/misc.h
+++ b/include/migration/misc.h
@@ -55,4 +55,7 @@ bool migration_has_failed(MigrationState *);
 bool migration_in_postcopy_after_devices(MigrationState *);
 void migration_global_dump(Monitor *mon);
 
+/* migration/block-dirty-bitmap.c */
+void dirty_bitmap_mig_init(void);
+
 #endif
diff --git a/migration/migration.h b/migration/migration.h
index 50d1f01346..4e3ad04664 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -211,4 +211,7 @@ void migrate_send_rp_pong(MigrationIncomingState *mis,
 void migrate_send_rp_req_pages(MigrationIncomingState *mis, const char* rbname,
   ram_addr_t start, size_t len);
 
+void dirty_bitmap_mig_before_vm_start(void);
+void init_dirty_bitmap_incoming_migration(void);
+
 #endif
diff --git a/migration/block-dirty-bitmap.c b/migration/block-dirty-bitmap.c
new file mode 100644
index 00..53cb20045d
--- /dev/null
+++ b/migration/block-dirty-bitmap.c
@@ -0,0 +1,734 @@
+/*
+ * Block dirty bitmap postcopy migration
+ *
+ * Copyright IBM, Corp. 2009
+ * Copyright (c) 2016-2017 Parallels International GmbH
+ *
+ * Authors:
+ *  Liran Schour   
+ *  Vladimir Sementsov-Ogievskiy 
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ * This file is derived from migration/block.c, so it's author and IBM 
copyright
+ * are here, although content is quite different.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ *
+ ****
+ *
+ * Here postcopy migration of dirty bitmaps is realized. Only named dirty
+ * bitmaps, associated with root nodes and non-root named nodes are migrated.
+ *
+ * If destination qemu is already containing a dirty bitmap with the same name
+ * as a migrated bitmap (for the same node), then, if their granularities are
+ * the same the migration will be done, otherwise the error will be generated.
+ *
+ * If destination qemu doesn't contain such bitmap it will be created.
+ *
+ * format of migration:
+ *
+ * # Header (shared for different chunk types)
+ * 1, 2 or 4 bytes: flags (see qemu_{put,put}_flags)
+ * [ 1 byte: node name size ] \  flags & DEVICE_NAME
+ * [ n bytes: node name ] /
+ * [ 1 byte: bitmap name size ] \  flags & BITMAP_NAME
+ * [ n bytes: bitmap name ] /
+ *
+ * # Start of bitmap migration (flags & START)
+ * header
+ * be64: granularity
+ * 1 byte: bitmap flags (corresponds to BdrvDirtyBitmap)
+ *   bit 0-  bitmap is enabled
+ *   bit 1-  bitmap is persistent
+ *   bit 2-  bitmap is autoloading
+ *   bits 3-7 - reserved, must be zero
+ *
+ * # Complete of bitmap migration (flags & COMPLETE)
+ * header
+ *
+ * # Data chunk of bitmap migration
+ * header
+ * be64: start sector
+ * be32: number of sectors
+ * [ be64: buffer size  ] \ ! (flags & ZEROES)
+ * [ n bytes: buffer] /
+ *
+ * The last chunk in stream should contain flags & EOS. The chunk may skip
+ * device and/or bitmap names, assuming them to be the same with the previous
+ * chunk.
+ */
+
+#include "qemu/osdep.h"
+#include "block/block.h"
+#include "block/block_int.h"
+#include "sysemu/block-backend.h"
+#include "qemu/main-loop.h"
+#include "qemu/error-report.h"
+#include "migration/misc.h"
+#include "migration/migration.h"
+#include "migration/qemu-file.h"
+#include "migration/vmstate.h"
+#include "migration/register.h"
+#include "qemu/hbitmap.h"
+#include "sysemu/sysemu.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "trace.h"
+
+#define CHUNK_SIZE (1 << 10)
+
+/* Flags occupy one, two or four bytes (Big Endian). The size is determined as
+ * follows:
+ * in first (most significant) byte bit 8 is clear  -->  one byte
+ * in first byte bit 8 is set-->  two or four bytes, depending on second
+ *

[Qemu-block] [PATCH v8 02/14] block/dirty-bitmap: add locked version of bdrv_release_dirty_bitmap

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

It is needed to realize bdrv_dirty_bitmap_release_successor in
the following patch.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/dirty-bitmap.c | 25 -
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 81adbeb6d4..981f99d362 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -326,13 +326,13 @@ static bool bdrv_dirty_bitmap_has_name(BdrvDirtyBitmap 
*bitmap)
 return !!bdrv_dirty_bitmap_name(bitmap);
 }
 
-/* Called with BQL taken.  */
-static void bdrv_do_release_matching_dirty_bitmap(
+/* Called within bdrv_dirty_bitmap_lock..unlock */
+static void bdrv_do_release_matching_dirty_bitmap_locked(
 BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
 bool (*cond)(BdrvDirtyBitmap *bitmap))
 {
 BdrvDirtyBitmap *bm, *next;
-bdrv_dirty_bitmaps_lock(bs);
+
 QLIST_FOREACH_SAFE(bm, >dirty_bitmaps, list, next) {
 if ((!bitmap || bm == bitmap) && (!cond || cond(bm))) {
 assert(!bm->active_iterators);
@@ -344,18 +344,33 @@ static void bdrv_do_release_matching_dirty_bitmap(
 g_free(bm);
 
 if (bitmap) {
-goto out;
+return;
 }
 }
 }
+
 if (bitmap) {
 abort();
 }
+}
 
-out:
+/* Called with BQL taken.  */
+static void bdrv_do_release_matching_dirty_bitmap(
+BlockDriverState *bs, BdrvDirtyBitmap *bitmap,
+bool (*cond)(BdrvDirtyBitmap *bitmap))
+{
+bdrv_dirty_bitmaps_lock(bs);
+bdrv_do_release_matching_dirty_bitmap_locked(bs, bitmap, cond);
 bdrv_dirty_bitmaps_unlock(bs);
 }
 
+/* Called within bdrv_dirty_bitmap_lock..unlock */
+static void bdrv_release_dirty_bitmap_locked(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap)
+{
+bdrv_do_release_matching_dirty_bitmap_locked(bs, bitmap, NULL);
+}
+
 /* Called with BQL taken.  */
 void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap)
 {
-- 
2.11.1

[Qemu-block] [PATCH v8 03/14] block/dirty-bitmap: add bdrv_dirty_bitmap_release_successor

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

To just release successor and unfreeze bitmap without any additional
work.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Signed-off-by: Denis V. Lunev 
---
 include/block/dirty-bitmap.h |  2 ++
 block/dirty-bitmap.c | 14 ++
 2 files changed, 16 insertions(+)

diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 93d4336505..a9e2a92e4f 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -20,6 +20,8 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState 
*bs,
 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap,
Error **errp);
+void bdrv_dirty_bitmap_release_successor(BlockDriverState *bs,
+ BdrvDirtyBitmap *bitmap);
 void bdrv_dirty_bitmap_enable_successor(BdrvDirtyBitmap *bitmap);
 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs,
 const char *name);
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 981f99d362..7578863aa1 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -431,6 +431,20 @@ void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap)
 bitmap->disabled = false;
 }
 
+/* Called with BQL taken. */
+void bdrv_dirty_bitmap_release_successor(BlockDriverState *bs,
+ BdrvDirtyBitmap *parent)
+{
+qemu_mutex_lock(parent->mutex);
+
+if (parent->successor) {
+bdrv_release_dirty_bitmap_locked(bs, parent->successor);
+parent->successor = NULL;
+}
+
+qemu_mutex_unlock(parent->mutex);
+}
+
 BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs)
 {
 BdrvDirtyBitmap *bm;
-- 
2.11.1

[Qemu-block] [PATCH v8 05/14] migration: introduce postcopy-only pending

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

There would be savevm states (dirty-bitmap) which can migrate only in
postcopy stage. The corresponding pending is introduced here.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Juan Quintela 
---
 include/migration/register.h | 17 +++--
 migration/savevm.h   |  5 +++--
 migration/block.c|  7 ---
 migration/migration.c| 15 ---
 migration/ram.c  |  9 +
 migration/savevm.c   | 13 -
 migration/trace-events   |  2 +-
 7 files changed, 44 insertions(+), 24 deletions(-)

diff --git a/include/migration/register.h b/include/migration/register.h
index f4f7bdc177..9436a87678 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -37,8 +37,21 @@ typedef struct SaveVMHandlers {
 int (*save_setup)(QEMUFile *f, void *opaque);
 void (*save_live_pending)(QEMUFile *f, void *opaque,
   uint64_t threshold_size,
-  uint64_t *non_postcopiable_pending,
-  uint64_t *postcopiable_pending);
+  uint64_t *res_precopy_only,
+  uint64_t *res_compatible,
+  uint64_t *res_postcopy_only);
+/* Note for save_live_pending:
+ * - res_precopy_only is for data which must be migrated in precopy phase
+ * or in stopped state, in other words - before target vm start
+ * - res_compatible is for data which may be migrated in any phase
+ * - res_postcopy_only is for data which must be migrated in postcopy phase
+ * or in stopped state, in other words - after source vm stop
+ *
+ * Sum of res_postcopy_only, res_compatible and res_postcopy_only is the
+ * whole amount of pending data.
+ */
+
+
 LoadStateHandler *load_state;
 int (*load_setup)(QEMUFile *f, void *opaque);
 int (*load_cleanup)(void *opaque);
diff --git a/migration/savevm.h b/migration/savevm.h
index 295c4a1f2c..cf4f0d37ca 100644
--- a/migration/savevm.h
+++ b/migration/savevm.h
@@ -38,8 +38,9 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f);
 int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
bool inactivate_disks);
 void qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size,
-   uint64_t *res_non_postcopiable,
-   uint64_t *res_postcopiable);
+   uint64_t *res_precopy_only,
+   uint64_t *res_compatible,
+   uint64_t *res_postcopy_only);
 void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
 void qemu_savevm_send_open_return_path(QEMUFile *f);
 int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len);
diff --git a/migration/block.c b/migration/block.c
index 3282809583..39dfa567e8 100644
--- a/migration/block.c
+++ b/migration/block.c
@@ -865,8 +865,9 @@ static int block_save_complete(QEMUFile *f, void *opaque)
 }
 
 static void block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size,
-   uint64_t *non_postcopiable_pending,
-   uint64_t *postcopiable_pending)
+   uint64_t *res_precopy_only,
+   uint64_t *res_compatible,
+   uint64_t *res_postcopy_only)
 {
 /* Estimate pending number of bytes to send */
 uint64_t pending;
@@ -887,7 +888,7 @@ static void block_save_pending(QEMUFile *f, void *opaque, 
uint64_t max_size,
 
 DPRINTF("Enter save live pending  %" PRIu64 "\n", pending);
 /* We don't do postcopy */
-*non_postcopiable_pending += pending;
+*res_precopy_only += pending;
 }
 
 static int block_load(QEMUFile *f, void *opaque, int version_id)
diff --git a/migration/migration.c b/migration/migration.c
index 4de3b551fe..e6c9be3cca 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -2216,20 +2216,21 @@ static void *migration_thread(void *opaque)
 uint64_t pending_size;
 
 if (!qemu_file_rate_limit(s->to_dst_file)) {
-uint64_t pend_post, pend_nonpost;
+uint64_t pend_pre, pend_compat, pend_post;
 
-qemu_savevm_state_pending(s->to_dst_file, threshold_size,
-  _nonpost, _post);
-pending_size = pend_nonpost + pend_post;
+qemu_savevm_state_pending(s->to_dst_file, threshold_size, 
_pre,
+  _compat, _post);
+pending_size = pend_pre + pend_compat + pend_post;
 trace_migrate_pending(pending_size, threshold_size,
-  pend_post, pend_nonpost);
+  pend_pre, pend_compat, pend_post);
 if (pending_size && pending_size >= threshold_size)

[Qemu-block] [PATCH v8 00/14] Dirty bitmaps postcopy migration

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

Hi all!

There is a new version of dirty bitmap postcopy migration series.

v8

clone: tag postcopy-v8 from https://src.openvz.org/scm/~vsementsov/qemu.git
online: https://src.openvz.org/users/vsementsov/repos/qemu/browse?at=postcopy-v8

- rebased on master
- patches 01-03 from v7 are already merged to master
- patch order is changed to make it possible to merge block/dirty-bitmap patches
  in separate if is needed
01: new patch
03: fixed to use _locked version of bdrv_release_dirty_bitmap
06: qapi-schema.json -> qapi/migration.json
2.9 -> 2.11
10: protocol changed a bit:
  instead of 1 byte "bitmap enabled flag" this byte becomes just "flags"
  and have "enabled", "persistent" and "autoloading" flags inside.
  also, make all migrated bitmaps to be not persistent (to prevent their
  storing on source vm)
14: new patch


patches status:
01-04 - are only about block/dirty-bitmap and have no r-b. Fam, John, Paolo 
(about bitmap lock),
please look at. These patches are ok to be merged in separate (but before 
05-14)
other patches are about migration
05-09 has Juan's r-b (and some of them has John's and Eric's r-bs)
10 - the main patch (dirty bitmaps migration), has no r-b.
11 - preparation for tests, not related to migration directly, has Max's r-b, 
ok to be merged
separately (but before 12-14)
12-14 - tests, 12 and 13 have Max's r-b, 14 is new


v7

clone: tag postcopy-v7 from https://src.openvz.org/scm/~vsementsov/qemu.git
online: https://src.openvz.org/users/vsementsov/repos/qemu/browse?at=postcopy-v7

- rebased on dirty-bitmap byte-based interfaces
(based on git://repo.or.cz/qemu/ericb.git branch nbd-byte-dirty-v4)
- migration of persistent bitmaps should fail for shared storage migration for 
now,
  as persistent dirty bitmaps are stored/load on inactivate/invalidate-cache.
  also, even for non-shared storage migration there would be useless saving of 
dirty
  bitmaps on source. This all will be optimized later.

01: staff from include/migration/vmstate.h moved to 
include/migration/register.h (rebase)
03: some structural changes due to rebase - drop r-b
04: staff from include/migration/vmstate.h moved to 
include/migration/register.h (rebase)
staff from include/sysemu/sysemu.h moved to migration/savevm.h (rebase)
05: fix patch header: block -> block/dirty-bitmap
add locking, drop r-b
06: staff from include/migration/migration.h moved to migration/migration.h 
(rebase)
07: add locking, drop r-b
09: staff from include/migration/qemu-file.h moved to migration/qemu-file.h 
(rebase)
10: staff from include/migration/vmstate.h moved to 
include/migration/register.h (rebase)
11: new patch
12: a lot of changes/fixes (mostly by Fam's comments) + rebase
header-definition movement
remove include 
add some includes
fix/refactor bitmap flags send
byte-based interface for dirty bitmaps (rebase)
froze bitmaps on source
init_dirty_bitmap_migration can return error, if some of bitmaps are already
  frozen
bdrv_ref drives with bitmaps
fprintf -> error_report
check version_id in _load function

v6:

clone: tag postcopy-v6 from https://src.openvz.org/scm/~vsementsov/qemu.git
online: https://src.openvz.org/users/vsementsov/repos/qemu/browse?at=postcopy-v6

rebase on master.

03 - tiny contextual change

12 - little change, but it should be reviewed. Call of 
init_dirty_bitmap_incoming_migration()
(which only initialize mutex) moved from start of 
process_incoming_migration_co (it was
immediately after "mis = migration_incoming_state_new(f)") to 
migration_incoming_get_current()
to stay with initialization code.
I remove r-b's, but hope that this will not be a problem. The only change 
in this patch - is moved
call of init_dirty_bitmap_incoming_migration.
I do so because of recent

commit b4b076daf324894dd288cbdb67ff1e3c7434df7b
Author: Juan Quintela 
Date:   Mon Jan 23 22:32:06 2017 +0100

migration: create Migration Incoming State at init time

15 - add Max's r-b

v5:

clone: tag postcopy-v5 from https://src.openvz.org/scm/~vsementsov/qemu.git
online: https://src.openvz.org/users/vsementsov/repos/qemu/browse?at=postcopy-v5

- move 'signed-off' over 'reviewed-by' in patches.

03,04 - add comments. Hope they will be ok for you, so add Juan's r-b.
If not ok - let me know and I'll resend.

06,08,12 - add Max's r-b
07,09,10,11,12 - add Juan's r-b

14 - used last version of this patch from qcow2-bitmap series with 
 Max's r-b. It has contextual changes due to different base.

15 - fix 041 iotest, add default node-name only if path is specified and
 node-name is not specified
16 - handle whitespaces
   s/"exec: cat " + fifo/"exec: cat '" + fifo + "'"/
 fix indentation
 add Max's r-b
17 - fix typos, wrong size in comment, s/md5/sha256/
 add Max's r-b

v4:

clone: tag postcopy-v4 from https://src.openvz.org/scm/~vsementsov/qemu.git
online:

[Qemu-block] [PATCH v8 13/14] iotests: add dirty bitmap postcopy test

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

Test
- start two vms (vm_a, vm_b)

- in a
- do writes from set A
- do writes from set B
- fix bitmap sha256
- clear bitmap
- do writes from set A
- start migration
- than, in b
- wait vm start (postcopy should start)
- do writes from set B
- check bitmap sha256

The test should verify postcopy migration and then merging with delta
(changes in target, during postcopy process).

Reduce supported cache modes to only 'none', because with cache on time
from source.STOP to target.RESUME is unpredictable and we can fail with
timout while waiting for target.RESUME.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 tests/qemu-iotests/169| 74 +--
 tests/qemu-iotests/169.out|  4 +--
 tests/qemu-iotests/iotests.py |  7 +++-
 3 files changed, 72 insertions(+), 13 deletions(-)

diff --git a/tests/qemu-iotests/169 b/tests/qemu-iotests/169
index 7630ecbe51..4ecef2f23f 100755
--- a/tests/qemu-iotests/169
+++ b/tests/qemu-iotests/169
@@ -29,8 +29,14 @@ fifo = os.path.join(iotests.test_dir, 'mig_fifo')
 
 class TestDirtyBitmapMigration(iotests.QMPTestCase):
 
-def setUp(self):
-size = 0x4 # 1G
+def tearDown(self):
+self.vm_a.shutdown()
+self.vm_b.shutdown()
+os.remove(disk_a)
+os.remove(disk_b)
+os.remove(fifo)
+
+def init(self, size):
 os.mkfifo(fifo)
 qemu_img('create', '-f', iotests.imgfmt, disk_a, str(size))
 qemu_img('create', '-f', iotests.imgfmt, disk_b, str(size))
@@ -40,14 +46,8 @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
 self.vm_a.launch()
 self.vm_b.launch()
 
-def tearDown(self):
-self.vm_a.shutdown()
-self.vm_b.shutdown()
-os.remove(disk_a)
-os.remove(disk_b)
-os.remove(fifo)
-
 def test_migration(self):
+self.init(0x4) # 1G
 granularity = 512
 regions = [
 { 'start': 0,   'count': 0x10 },
@@ -81,6 +81,60 @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
node='drive0', name='bitmap')
 self.assert_qmp(result, 'return/sha256', sha256);
 
+def test_postcopy(self):
+self.init(0x40) # 256G
+write_size = 0x4000
+granularity = 512
+chunk = 4096
+
+result = self.vm_a.qmp('block-dirty-bitmap-add', node='drive0',
+   name='bitmap', granularity=granularity)
+self.assert_qmp(result, 'return', {});
+
+s = 0
+while s < write_size:
+self.vm_a.hmp_qemu_io('drive0', 'write %d %d' % (s, chunk))
+s += 0x1
+s = 0x8000
+while s < write_size:
+self.vm_a.hmp_qemu_io('drive0', 'write %d %d' % (s, chunk))
+s += 0x1
+
+result = self.vm_a.qmp('x-debug-block-dirty-bitmap-sha256',
+   node='drive0', name='bitmap')
+sha256 = result['return']['sha256']
+
+result = self.vm_a.qmp('block-dirty-bitmap-clear', node='drive0',
+   name='bitmap')
+self.assert_qmp(result, 'return', {});
+s = 0
+while s < write_size:
+self.vm_a.hmp_qemu_io('drive0', 'write %d %d' % (s, chunk))
+s += 0x1
+
+result = self.vm_a.qmp('migrate-set-capabilities',
+   capabilities=[{'capability': 'dirty-bitmaps',
+  'state': True}])
+self.assert_qmp(result, 'return', {})
+
+result = self.vm_a.qmp('migrate', uri='exec:cat>' + fifo)
+self.assertNotEqual(self.vm_a.event_wait("STOP"), None)
+self.assertNotEqual(self.vm_b.event_wait("RESUME"), None)
+
+s = 0x8000
+while s < write_size:
+self.vm_b.hmp_qemu_io('drive0', 'write %d %d' % (s, chunk))
+s += 0x1
+
+result = self.vm_b.qmp('query-block');
+while len(result['return'][0]['dirty-bitmaps']) > 1:
+time.sleep(2)
+result = self.vm_b.qmp('query-block');
+
+result = self.vm_b.qmp('x-debug-block-dirty-bitmap-sha256',
+   node='drive0', name='bitmap')
+
+self.assert_qmp(result, 'return/sha256', sha256);
 
 if __name__ == '__main__':
-iotests.main()
+iotests.main(supported_fmts=['qcow2'], supported_cache_modes=['none'])
diff --git a/tests/qemu-iotests/169.out b/tests/qemu-iotests/169.out
index ae1213e6f8..fbc63e62f8 100644
--- a/tests/qemu-iotests/169.out
+++ b/tests/qemu-iotests/169.out
@@ -1,5 +1,5 @@
-.
+..
 --
-Ran 1 tests
+Ran 2 tests
 
 OK
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 95454c1893..88f73d6441 100644
--- a/tests/qemu-iotests/iotests.py
+++

Re: [Qemu-block] [PATCH v6 05/12] nbd/server: Simplify nbd_negotiate_options loop

2017-10-30 Thread Vladimir Sementsov-Ogievskiy


27.10.2017 13:40, Eric Blake wrote:

Instead of making each caller check whether a transmission error
occurred, we can sink a common error check to the end of the loop.

Signed-off-by: Eric Blake 


Reviewed-by: Vladimir Sementsov-Ogievskiy 

--
Best regards,
Vladimir

[Qemu-block] [PATCH v8 09/14] migration: add is_active_iterate handler

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

Only-postcopy savevm states (dirty-bitmap) don't need live iteration, so
to disable them and stop transporting empty sections there is a new
savevm handler.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Juan Quintela 
---
 include/migration/register.h | 1 +
 migration/savevm.c   | 5 +
 2 files changed, 6 insertions(+)

diff --git a/include/migration/register.h b/include/migration/register.h
index 9436a87678..cafbeb64b5 100644
--- a/include/migration/register.h
+++ b/include/migration/register.h
@@ -25,6 +25,7 @@ typedef struct SaveVMHandlers {
 /* This runs both outside and inside the iothread lock.  */
 bool (*is_active)(void *opaque);
 bool (*has_postcopy)(void *opaque);
+bool (*is_active_iterate)(void *opaque);
 
 /* This runs outside the iothread lock in the migration case, and
  * within the lock in the savevm case.  The callback had better only
diff --git a/migration/savevm.c b/migration/savevm.c
index f6b62cb202..9bbfb3fa1b 100644
--- a/migration/savevm.c
+++ b/migration/savevm.c
@@ -1026,6 +1026,11 @@ int qemu_savevm_state_iterate(QEMUFile *f, bool postcopy)
 continue;
 }
 }
+if (se->ops && se->ops->is_active_iterate) {
+if (!se->ops->is_active_iterate(se->opaque)) {
+continue;
+}
+}
 /*
  * In the postcopy phase, any device that doesn't know how to
  * do postcopy should have saved it's state in the _complete
-- 
2.11.1

[Qemu-block] [PATCH v8 12/14] iotests: add dirty bitmap migration test

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

The test starts two vms (vm_a, vm_b), create dirty bitmap in
the first one, do several writes to corresponding device and
then migrate vm_a to vm_b with dirty bitmaps.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 tests/qemu-iotests/169 | 86 ++
 tests/qemu-iotests/169.out |  5 +++
 tests/qemu-iotests/group   |  1 +
 3 files changed, 92 insertions(+)
 create mode 100755 tests/qemu-iotests/169
 create mode 100644 tests/qemu-iotests/169.out

diff --git a/tests/qemu-iotests/169 b/tests/qemu-iotests/169
new file mode 100755
index 00..7630ecbe51
--- /dev/null
+++ b/tests/qemu-iotests/169
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+#
+# Tests for dirty bitmaps migration.
+#
+# Copyright (C) Vladimir Sementsov-Ogievskiy 2015-2016
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see .
+#
+
+import os
+import iotests
+import time
+from iotests import qemu_img
+
+disk_a = os.path.join(iotests.test_dir, 'disk_a')
+disk_b = os.path.join(iotests.test_dir, 'disk_b')
+fifo = os.path.join(iotests.test_dir, 'mig_fifo')
+
+class TestDirtyBitmapMigration(iotests.QMPTestCase):
+
+def setUp(self):
+size = 0x4 # 1G
+os.mkfifo(fifo)
+qemu_img('create', '-f', iotests.imgfmt, disk_a, str(size))
+qemu_img('create', '-f', iotests.imgfmt, disk_b, str(size))
+self.vm_a = iotests.VM(path_suffix='a').add_drive(disk_a)
+self.vm_b = iotests.VM(path_suffix='b').add_drive(disk_b)
+self.vm_b.add_incoming("exec: cat '" + fifo + "'")
+self.vm_a.launch()
+self.vm_b.launch()
+
+def tearDown(self):
+self.vm_a.shutdown()
+self.vm_b.shutdown()
+os.remove(disk_a)
+os.remove(disk_b)
+os.remove(fifo)
+
+def test_migration(self):
+granularity = 512
+regions = [
+{ 'start': 0,   'count': 0x10 },
+{ 'start': 0x1, 'count': 0x20  },
+{ 'start': 0x39990, 'count': 0x10  }
+]
+
+result = self.vm_a.qmp('block-dirty-bitmap-add', node='drive0',
+   name='bitmap', granularity=granularity)
+self.assert_qmp(result, 'return', {});
+
+for r in regions:
+self.vm_a.hmp_qemu_io('drive0',
+  'write %d %d' % (r['start'], r['count']))
+
+result = self.vm_a.qmp('x-debug-block-dirty-bitmap-sha256',
+   node='drive0', name='bitmap')
+sha256 = result['return']['sha256']
+
+result = self.vm_a.qmp('migrate-set-capabilities',
+   capabilities=[{'capability': 'dirty-bitmaps',
+  'state': True}])
+self.assert_qmp(result, 'return', {})
+
+result = self.vm_a.qmp('migrate', uri='exec:cat>' + fifo)
+self.assertNotEqual(self.vm_a.event_wait("STOP"), None)
+self.assertNotEqual(self.vm_b.event_wait("RESUME"), None)
+time.sleep(2)
+
+result = self.vm_b.qmp('x-debug-block-dirty-bitmap-sha256',
+   node='drive0', name='bitmap')
+self.assert_qmp(result, 'return/sha256', sha256);
+
+
+if __name__ == '__main__':
+iotests.main()
diff --git a/tests/qemu-iotests/169.out b/tests/qemu-iotests/169.out
new file mode 100644
index 00..ae1213e6f8
--- /dev/null
+++ b/tests/qemu-iotests/169.out
@@ -0,0 +1,5 @@
+.
+--
+Ran 1 tests
+
+OK
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index 24e5ad1b79..96e173abb3 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -169,6 +169,7 @@
 162 auto quick
 163 rw auto quick
 165 rw auto quick
+169 rw auto quick
 170 rw auto quick
 171 rw auto quick
 172 auto
-- 
2.11.1

[Qemu-block] [PATCH v8 07/14] migration: include migrate_dirty_bitmaps in migrate_postcopy

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

Enable postcopy if dirty bitmap migration is endabled.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Juan Quintela 
---
 migration/migration.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/migration/migration.c b/migration/migration.c
index 1526cd4bff..e973837bfd 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1487,7 +1487,7 @@ bool migrate_postcopy_ram(void)
 
 bool migrate_postcopy(void)
 {
-return migrate_postcopy_ram();
+return migrate_postcopy_ram() || migrate_dirty_bitmaps();
 }
 
 bool migrate_auto_converge(void)
-- 
2.11.1

[Qemu-block] [PATCH v8 06/14] qapi: add dirty-bitmaps migration capability

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: John Snow 
Reviewed-by: Eric Blake 
Reviewed-by: Juan Quintela 
---
 qapi/migration.json   | 6 +-
 migration/migration.h | 1 +
 migration/migration.c | 9 +
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/qapi/migration.json b/qapi/migration.json
index bbc4671ded..ee0ad0b3ad 100644
--- a/qapi/migration.json
+++ b/qapi/migration.json
@@ -352,12 +352,16 @@
 #
 # @x-multifd: Use more than one fd for migration (since 2.11)
 #
+# @dirty-bitmaps: If enabled, QEMU will migrate named dirty bitmaps.
+# (since 2.11)
+#
 # Since: 1.2
 ##
 { 'enum': 'MigrationCapability',
   'data': ['xbzrle', 'rdma-pin-all', 'auto-converge', 'zero-blocks',
'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram',
-   'block', 'return-path', 'pause-before-switchover', 'x-multifd' ] }
+   'block', 'return-path', 'pause-before-switchover', 'x-multifd',
+   'dirty-bitmaps' ] }
 
 ##
 # @MigrationCapabilityStatus:
diff --git a/migration/migration.h b/migration/migration.h
index 663415fe48..50d1f01346 100644
--- a/migration/migration.h
+++ b/migration/migration.h
@@ -181,6 +181,7 @@ bool migrate_postcopy(void);
 bool migrate_release_ram(void);
 bool migrate_postcopy_ram(void);
 bool migrate_zero_blocks(void);
+bool migrate_dirty_bitmaps(void);
 
 bool migrate_auto_converge(void);
 bool migrate_use_multifd(void);
diff --git a/migration/migration.c b/migration/migration.c
index e6c9be3cca..1526cd4bff 100644
--- a/migration/migration.c
+++ b/migration/migration.c
@@ -1544,6 +1544,15 @@ int migrate_decompress_threads(void)
 return s->parameters.decompress_threads;
 }
 
+bool migrate_dirty_bitmaps(void)
+{
+MigrationState *s;
+
+s = migrate_get_current();
+
+return s->enabled_capabilities[MIGRATION_CAPABILITY_DIRTY_BITMAPS];
+}
+
 bool migrate_use_events(void)
 {
 MigrationState *s;
-- 
2.11.1

[Qemu-block] [PATCH v8 14/14] iotests: add persistent bitmap migration test

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

Test that persistent bitmap migrates and its persistance property
migrates too.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 tests/qemu-iotests/169 | 21 ++---
 tests/qemu-iotests/169.out |  4 ++--
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/tests/qemu-iotests/169 b/tests/qemu-iotests/169
index 4ecef2f23f..b0270f40f7 100755
--- a/tests/qemu-iotests/169
+++ b/tests/qemu-iotests/169
@@ -46,7 +46,7 @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
 self.vm_a.launch()
 self.vm_b.launch()
 
-def test_migration(self):
+def do_test_migration(self, persistent=False):
 self.init(0x4) # 1G
 granularity = 512
 regions = [
@@ -55,8 +55,13 @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
 { 'start': 0x39990, 'count': 0x10  }
 ]
 
-result = self.vm_a.qmp('block-dirty-bitmap-add', node='drive0',
-   name='bitmap', granularity=granularity)
+if persistent:
+result = self.vm_a.qmp('block-dirty-bitmap-add', node='drive0',
+   name='bitmap', granularity=granularity,
+   persistent=True, autoload=True)
+else:
+result = self.vm_a.qmp('block-dirty-bitmap-add', node='drive0',
+   name='bitmap', granularity=granularity)
 self.assert_qmp(result, 'return', {});
 
 for r in regions:
@@ -77,10 +82,20 @@ class TestDirtyBitmapMigration(iotests.QMPTestCase):
 self.assertNotEqual(self.vm_b.event_wait("RESUME"), None)
 time.sleep(2)
 
+if persistent:
+self.vm_b.shutdown()
+self.vm_b.launch()
+
 result = self.vm_b.qmp('x-debug-block-dirty-bitmap-sha256',
node='drive0', name='bitmap')
 self.assert_qmp(result, 'return/sha256', sha256);
 
+def test_migration(self):
+self.do_test_migration()
+
+def test_migration_persistent(self):
+self.do_test_migration(persistent=True)
+
 def test_postcopy(self):
 self.init(0x40) # 256G
 write_size = 0x4000
diff --git a/tests/qemu-iotests/169.out b/tests/qemu-iotests/169.out
index fbc63e62f8..8d7e996700 100644
--- a/tests/qemu-iotests/169.out
+++ b/tests/qemu-iotests/169.out
@@ -1,5 +1,5 @@
-..
+...
 --
-Ran 2 tests
+Ran 3 tests
 
 OK
-- 
2.11.1

[Qemu-block] [PATCH v8 01/14] block/dirty-bitmap: add bdrv_dirty_bitmap_enable_successor()

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

Enabling bitmap successor is necessary to enable successors of bitmaps
being migrated before target vm start.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/block/dirty-bitmap.h | 1 +
 block/dirty-bitmap.c | 8 
 2 files changed, 9 insertions(+)

diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index 3579a7597c..93d4336505 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -20,6 +20,7 @@ BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState 
*bs,
 BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs,
BdrvDirtyBitmap *bitmap,
Error **errp);
+void bdrv_dirty_bitmap_enable_successor(BdrvDirtyBitmap *bitmap);
 BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs,
 const char *name);
 void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap);
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index bd04e991b1..81adbeb6d4 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -237,6 +237,14 @@ int bdrv_dirty_bitmap_create_successor(BlockDriverState 
*bs,
 return 0;
 }
 
+/* Called with BQL taken. */
+void bdrv_dirty_bitmap_enable_successor(BdrvDirtyBitmap *bitmap)
+{
+qemu_mutex_lock(bitmap->mutex);
+bdrv_enable_dirty_bitmap(bitmap->successor);
+qemu_mutex_unlock(bitmap->mutex);
+}
+
 /**
  * For a bitmap with a successor, yield our name to the successor,
  * delete the old bitmap, and return a handle to the new bitmap.
-- 
2.11.1

[Qemu-block] [PATCH v8 04/14] block/dirty-bitmap: add bdrv_dirty_bitmap_set_frozen

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

Make it possible to set bitmap 'frozen' without a successor.
This is needed to protect the bitmap during outgoing bitmap postcopy
migration.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 include/block/dirty-bitmap.h |  1 +
 block/dirty-bitmap.c | 22 --
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/include/block/dirty-bitmap.h b/include/block/dirty-bitmap.h
index a9e2a92e4f..ae6d697850 100644
--- a/include/block/dirty-bitmap.h
+++ b/include/block/dirty-bitmap.h
@@ -39,6 +39,7 @@ uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState 
*bs);
 uint32_t bdrv_dirty_bitmap_granularity(const BdrvDirtyBitmap *bitmap);
 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap);
 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap);
+void bdrv_dirty_bitmap_set_frozen(BdrvDirtyBitmap *bitmap, bool frozen);
 const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap *bitmap);
 int64_t bdrv_dirty_bitmap_size(const BdrvDirtyBitmap *bitmap);
 DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap);
diff --git a/block/dirty-bitmap.c b/block/dirty-bitmap.c
index 7578863aa1..67fc6bd6e0 100644
--- a/block/dirty-bitmap.c
+++ b/block/dirty-bitmap.c
@@ -40,6 +40,8 @@ struct BdrvDirtyBitmap {
 QemuMutex *mutex;
 HBitmap *bitmap;/* Dirty bitmap implementation */
 HBitmap *meta;  /* Meta dirty bitmap */
+bool frozen;/* Bitmap is frozen, it can't be modified
+   through QMP */
 BdrvDirtyBitmap *successor; /* Anonymous child; implies frozen status */
 char *name; /* Optional non-empty unique ID */
 int64_t size;   /* Size of the bitmap, in bytes */
@@ -183,13 +185,22 @@ const char *bdrv_dirty_bitmap_name(const BdrvDirtyBitmap 
*bitmap)
 /* Called with BQL taken.  */
 bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap)
 {
-return bitmap->successor;
+return bitmap->frozen;
+}
+
+/* Called with BQL taken.  */
+void bdrv_dirty_bitmap_set_frozen(BdrvDirtyBitmap *bitmap, bool frozen)
+{
+qemu_mutex_lock(bitmap->mutex);
+assert(bitmap->successor == NULL);
+bitmap->frozen = frozen;
+qemu_mutex_unlock(bitmap->mutex);
 }
 
 /* Called with BQL taken.  */
 bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap)
 {
-return !(bitmap->disabled || bitmap->successor);
+return !(bitmap->disabled || (bitmap->successor != NULL));
 }
 
 /* Called with BQL taken.  */
@@ -234,6 +245,7 @@ int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs,
 
 /* Install the successor and freeze the parent */
 bitmap->successor = child;
+bitmap->frozen = true;
 return 0;
 }
 
@@ -266,6 +278,8 @@ BdrvDirtyBitmap 
*bdrv_dirty_bitmap_abdicate(BlockDriverState *bs,
 name = bitmap->name;
 bitmap->name = NULL;
 successor->name = name;
+assert(bitmap->frozen);
+bitmap->frozen = false;
 bitmap->successor = NULL;
 successor->persistent = bitmap->persistent;
 bitmap->persistent = false;
@@ -298,6 +312,8 @@ BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState 
*bs,
 return NULL;
 }
 bdrv_release_dirty_bitmap(bs, successor);
+assert(parent->frozen);
+parent->frozen = false;
 parent->successor = NULL;
 
 return parent;
@@ -439,6 +455,8 @@ void bdrv_dirty_bitmap_release_successor(BlockDriverState 
*bs,
 
 if (parent->successor) {
 bdrv_release_dirty_bitmap_locked(bs, parent->successor);
+assert(parent->frozen);
+parent->frozen = false;
 parent->successor = NULL;
 }
 
-- 
2.11.1

[Qemu-block] [PATCH v8 08/14] migration/qemu-file: add qemu_put_counted_string()

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

Add function opposite to qemu_get_counted_string.
qemu_put_counted_string puts one-byte length of the string (string
should not be longer than 255 characters), and then it puts the string,
without last zero byte.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: John Snow 
Reviewed-by: Juan Quintela 
---
 migration/qemu-file.h |  2 ++
 migration/qemu-file.c | 13 +
 2 files changed, 15 insertions(+)

diff --git a/migration/qemu-file.h b/migration/qemu-file.h
index aae4e5ed36..f4f356ab12 100644
--- a/migration/qemu-file.h
+++ b/migration/qemu-file.h
@@ -174,4 +174,6 @@ size_t ram_control_save_page(QEMUFile *f, ram_addr_t 
block_offset,
  ram_addr_t offset, size_t size,
  uint64_t *bytes_sent);
 
+void qemu_put_counted_string(QEMUFile *f, const char *name);
+
 #endif
diff --git a/migration/qemu-file.c b/migration/qemu-file.c
index 2ab2bf362d..e85f501f86 100644
--- a/migration/qemu-file.c
+++ b/migration/qemu-file.c
@@ -734,6 +734,19 @@ size_t qemu_get_counted_string(QEMUFile *f, char buf[256])
 }
 
 /*
+ * Put a string with one preceding byte containing its length. The length of
+ * the string should be less than 256.
+ */
+void qemu_put_counted_string(QEMUFile *f, const char *str)
+{
+size_t len = strlen(str);
+
+assert(len < 256);
+qemu_put_byte(f, len);
+qemu_put_buffer(f, (const uint8_t *)str, len);
+}
+
+/*
  * Set the blocking state of the QEMUFile.
  * Note: On some transports the OS only keeps a single blocking state for
  *   both directions, and thus changing the blocking on the main
-- 
2.11.1

[Qemu-block] [PATCH v8 11/14] iotests: add default node-name

2017-10-30 Thread Vladimir Sementsov-Ogievskiy

When testing migration, auto-generated by qemu node-names differs in
source and destination qemu and migration fails. After this patch,
auto-generated by iotest nodenames will be the same.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
Reviewed-by: Max Reitz 
---
 tests/qemu-iotests/iotests.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index 6f057904a9..95454c1893 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -216,6 +216,8 @@ class VM(qtest.QEMUQtestMachine):
 options.append('file=%s' % path)
 options.append('format=%s' % format)
 options.append('cache=%s' % cachemode)
+if 'node-name' not in opts:
+options.append('node-name=drivenode%d' % self._num_drives)
 
 if opts:
 options.append(opts)
-- 
2.11.1

[Qemu-block] [PATCH v4] nvme: Add tracing

2017-10-30 Thread Doug Gale

>From 0e27b5dca8f4f32a1b194e1b3544be77dd4f45d9 Mon Sep 17 00:00:00 2001
From: Doug Gale 
Date: Mon, 30 Oct 2017 09:28:43 -0400
Subject: [PATCH] nvme: Add tracing

Add trace output for commands, errors, and undefined behavior.
Add guest error log output for undefined behavior.
Report invalid undefined accesses to MMIO.
Annotate unlikely error checks with unlikely.

Signed-off-by: Doug Gale 
---
 hw/block/nvme.c   | 349 ++
 hw/block/trace-events |  93 ++
 2 files changed, 390 insertions(+), 52 deletions(-)

diff --git a/hw/block/nvme.c b/hw/block/nvme.c
index 441e21ed1f..4d98ed9fba 100644
--- a/hw/block/nvme.c
+++ b/hw/block/nvme.c
@@ -34,8 +34,17 @@
 #include "qapi/visitor.h"
 #include "sysemu/block-backend.h"

+#include "qemu/log.h"
+#include "trace.h"
 #include "nvme.h"

+#define NVME_GUEST_ERR(trace, fmt, ...) \
+do { \
+(trace_##trace)(__VA_ARGS__); \
+qemu_log_mask(LOG_GUEST_ERROR, #trace \
+" in %s: " fmt "\n", __func__, ## __VA_ARGS__); \
+} while (0)
+
 static void nvme_process_sq(void *opaque);

 static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
@@ -86,10 +95,14 @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq)
 {
 if (cq->irq_enabled) {
 if (msix_enabled(&(n->parent_obj))) {
+trace_nvme_irq_msix(cq->vector);
 msix_notify(&(n->parent_obj), cq->vector);
 } else {
+trace_nvme_irq_pin();
 pci_irq_pulse(>parent_obj);
 }
+} else {
+trace_nvme_irq_masked();
 }
 }

@@ -100,7 +113,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg,
QEMUIOVector *iov, uint64_t prp1,
 trans_len = MIN(len, trans_len);
 int num_prps = (len >> n->page_bits) + 1;

-if (!prp1) {
+if (unlikely(!prp1)) {
+trace_nvme_err_invalid_prp();
 return NVME_INVALID_FIELD | NVME_DNR;
 } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
@@ -113,7 +127,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg,
QEMUIOVector *iov, uint64_t prp1,
 }
 len -= trans_len;
 if (len) {
-if (!prp2) {
+if (unlikely(!prp2)) {
+trace_nvme_err_invalid_prp2_missing();
 goto unmap;
 }
 if (len > n->page_size) {
@@ -128,7 +143,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg,
QEMUIOVector *iov, uint64_t prp1,
 uint64_t prp_ent = le64_to_cpu(prp_list[i]);

 if (i == n->max_prp_ents - 1 && len > n->page_size) {
-if (!prp_ent || prp_ent & (n->page_size - 1)) {
+if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
+trace_nvme_err_invalid_prplist_ent(prp_ent);
 goto unmap;
 }

@@ -140,7 +156,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg,
QEMUIOVector *iov, uint64_t prp1,
 prp_ent = le64_to_cpu(prp_list[i]);
 }

-if (!prp_ent || prp_ent & (n->page_size - 1)) {
+if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
+trace_nvme_err_invalid_prplist_ent(prp_ent);
 goto unmap;
 }

@@ -154,7 +171,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg,
QEMUIOVector *iov, uint64_t prp1,
 i++;
 }
 } else {
-if (prp2 & (n->page_size - 1)) {
+if (unlikely(prp2 & (n->page_size - 1))) {
+trace_nvme_err_invalid_prp2_align(prp2);
 goto unmap;
 }
 if (qsg->nsg) {
@@ -178,16 +196,20 @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n,
uint8_t *ptr, uint32_t len,
 QEMUIOVector iov;
 uint16_t status = NVME_SUCCESS;

+trace_nvme_dma_read(prp1, prp2);
+
 if (nvme_map_prp(, , prp1, prp2, len, n)) {
 return NVME_INVALID_FIELD | NVME_DNR;
 }
 if (qsg.nsg > 0) {
-if (dma_buf_read(ptr, len, )) {
+if (unlikely(dma_buf_read(ptr, len, ))) {
+trace_nvme_err_invalid_dma();
 status = NVME_INVALID_FIELD | NVME_DNR;
 }
 qemu_sglist_destroy();
 } else {
-if (qemu_iovec_to_buf(, 0, ptr, len) != len) {
+if (unlikely(qemu_iovec_to_buf(, 0, ptr, len) != len)) {
+trace_nvme_err_invalid_dma();
 status = NVME_INVALID_FIELD | NVME_DNR;
 }
 qemu_iovec_destroy();
@@ -273,7 +295,8 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n,
NvmeNamespace *ns, NvmeCmd *cmd,
 uint64_t aio_slba = slba << (data_shift - BDRV_SECTOR_BITS);
 uint32_t aio_nlb = nlb << (data_shift - BDRV_SECTOR_BITS);

-if (slba + nlb > ns->id_ns.nsze) {
+if (unlikely(slba + nlb > ns->id_ns.nsze)) {
+trace_nvme_err_invalid_lba_range(slba, nlb, ns->id_ns.nsze);
 return

Re: [Qemu-block] [PATCH] qemu-iotests: Use -nographic in 182

2017-10-30 Thread Jeff Cody

On Sun, Oct 29, 2017 at 01:53:38PM +0100, Kevin Wolf wrote:
> This avoids that random UI frontend error messages end up in the output.
> In particular, we were seeing this line in CI error logs:
> 
> +Unable to init server: Could not connect: Connection refused
> 
> Signed-off-by: Kevin Wolf 
> ---
>  tests/qemu-iotests/182 | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/tests/qemu-iotests/182 b/tests/qemu-iotests/182
> index 2e078ceed8..4b31592fb8 100755
> --- a/tests/qemu-iotests/182
> +++ b/tests/qemu-iotests/182
> @@ -62,7 +62,7 @@ _launch_qemu -drive 
> file=$TEST_IMG,if=none,id=drive0,file.locking=on \
>  
>  echo
>  echo "Starting a second QEMU using the same image should fail"
> -echo 'quit' | $QEMU -monitor stdio \
> +echo 'quit' | $QEMU -nographic -monitor stdio \
>  -drive file=$TEST_IMG,if=none,id=drive0,file.locking=on \
>  -device $virtioblk,drive=drive0 2>&1 | _filter_testdir 2>&1 |
>  _filter_qemu |
> -- 
> 2.13.6
> 
>

Reviewed-by: Jeff Cody

[Qemu-block] ping Re: loading bitmaps in invalidate_cache fails

2017-10-30 Thread Vladimir Sementsov-Ogievskiy


Hi!

Any ideas?

05.10.2017 12:41, Vladimir Sementsov-Ogievskiy wrote:

12.09.2017 12:46, Kevin Wolf wrote:

Am 11.09.2017 um 18:51 hat Vladimir Sementsov-Ogievskiy geschrieben:

Hi Kevin!

I'm confused with relations of permissions and invalidation, can you 
please

help?

Now dirty bitmaps are loaded in invalidate_cache. Here is a problem 
with

migration:

1. destination starts (inactive)

2. load bitmaps readonly

...

3. invalidate_cache: here we should make our loaded bitmaps RW, ie set
BdrvDirtyBitmap->readonly

   to false and set IN_USE bit in the image. But the latter goes into
"bdrv_aligned_pwritev: Assertion `child->perm & BLK_PERM_WRITE' 
failed",


   because in bdrv_invalidate_cache we call bdrv_set_perm after
drv->bdrv_invalidate_cache.


What is the true way of fixing this?

It's all still a bit of a mess. :-(

I think it makes a lot of sense that we should activate the lower layers
first, so the order in bdrv_invalidate_cache() looks wrong. It should be
something like this:

1. invalidate_cache() for the children
2. Update permissions for non-BDRV_O_INACTIVE
3. Call drv->bdrv_invalidate_cache()

I'm currently working on some fixes related to bdrv_reopen() where
things become tricky because the updated permissions shouldn't depend on
the current state, but on the state after the operation has finished.

You get something similar here, but I think just making sure that we
clear BDRV_O_INACTIVE before updating the permissions is enough here.
The only thing to be careful is that in error cases, we need to revert
both the flag and the permissions then.

Kevin


I have experimented a bit. Permission for qcow2 bs is actually set in 
"parent->role->activate(parent, _err);",


so a patch like this fixes the bug (of course, some roll back of 
set_perm and activate should be added to error path)


@@ -4126,21 +4129,6 @@ void bdrv_invalidate_cache(BlockDriverState 
*bs, Error **errp)

 }

 bs->open_flags &= ~BDRV_O_INACTIVE;
-    if (bs->drv->bdrv_invalidate_cache) {
-    bs->drv->bdrv_invalidate_cache(bs, _err);
-    if (local_err) {
-    bs->open_flags |= BDRV_O_INACTIVE;
-    error_propagate(errp, local_err);
-    return;
-    }
-    }
-
-    ret = refresh_total_sectors(bs, bs->total_sectors);
-    if (ret < 0) {
-    bs->open_flags |= BDRV_O_INACTIVE;
-    error_setg_errno(errp, -ret, "Could not refresh total sector 
count");

-    return;
-    }

 /* Update permissions, they may differ for inactive nodes */
 bdrv_get_cumulative_perm(bs, , _perm);
@@ -4161,6 +4149,22 @@ void bdrv_invalidate_cache(BlockDriverState 
*bs, Error **errp)

 }
 }
 }
+
+    if (bs->drv->bdrv_invalidate_cache) {
+    bs->drv->bdrv_invalidate_cache(bs, _err);
+    if (local_err) {
+    bs->open_flags |= BDRV_O_INACTIVE;
+    error_propagate(errp, local_err);
+    return;
+    }
+    }
+
+    ret = refresh_total_sectors(bs, bs->total_sectors);
+    if (ret < 0) {
+    bs->open_flags |= BDRV_O_INACTIVE;
+    error_setg_errno(errp, -ret, "Could not refresh total sector 
count");

+    return;
+    }
 }


however, I doubt that it is safe to call ->activate before 
invalidate_cache... I have no more ideas, just a patch that shows a 
regression :(





--
Best regards,
Vladimir

Re: [Qemu-block] [PATCH v6 09/25] block: Fix bdrv_find_backing_image()

2017-10-30 Thread Alberto Garcia

On Fri 29 Sep 2017 06:53:31 PM CEST, Max Reitz wrote:
> @@ -4096,22 +4086,31 @@ BlockDriverState 
> *bdrv_find_backing_image(BlockDriverState *bs,
>  } else {
>  /* If not an absolute filename path, make it relative to the 
> current
>   * image's filename path */
> -path_combine_deprecated(filename_tmp, PATH_MAX, 
> curr_bs->filename,
> -backing_file);
> +filename_tmp = bdrv_make_absolute_filename(curr_bs, backing_file,
> +   NULL);
> +if (!filename_tmp) {
> +continue;
> +}
>  
>  /* We are going to compare absolute pathnames */
>  if (!realpath(filename_tmp, filename_full)) {
> +g_free(filename_tmp);
>  continue;
>  }
> +g_free(filename_tmp);

This feels a bit too verbose, doesn't it? (especially because you're
doing the same thing twice, see below). It could be made a bit shorter,
something like:

bool have_filename = filename_tmp && realpath(filename_tmp, filename_full);
g_free(filename_tmp);
if (!have_filename) {
 continue;
}

> -path_combine_deprecated(filename_tmp, PATH_MAX, 
> curr_bs->filename,
> -curr_bs->backing_file);
> +filename_tmp = bdrv_get_full_backing_filename(curr_bs, NULL);
> +if (!filename_tmp) {
> +continue;
> +}
>  
>  if (!realpath(filename_tmp, backing_file_full)) {
> +g_free(filename_tmp);
>  continue;
>  }
> +g_free(filename_tmp);

Berto

Re: [Qemu-block] [PATCH v6 08/25] block: Add bdrv_make_absolute_filename()

2017-10-30 Thread Alberto Garcia

On Fri 29 Sep 2017 06:53:30 PM CEST, Max Reitz wrote:
> This is a general function for making a filename that is relative to a
> certain BDS absolute.
>
> It calls bdrv_get_full_backing_filename_from_filename() for now, but
> that will be changed in a follow-up patch.
>
> Signed-off-by: Max Reitz 

Reviewed-by: Alberto Garcia 

Berto

Re: [Qemu-block] [Qemu-devel] [PATCH] qemu-iotests: Use -nographic in 182

2017-10-30 Thread Kashyap Chamarthy

On Mon, Oct 30, 2017 at 11:13:21AM +0100, Kevin Wolf wrote:
> Am 30.10.2017 um 09:39 hat Kashyap Chamarthy geschrieben:
> > On Sun, Oct 29, 2017 at 01:53:38PM +0100, Kevin Wolf wrote:

[...]

> > >  echo
> > >  echo "Starting a second QEMU using the same image should fail"
> > > -echo 'quit' | $QEMU -monitor stdio \
> > > +echo 'quit' | $QEMU -nographic -monitor stdio \
> > 
> > Isn't "-display none" preferred instead of "-nographic"?

[...]

> It's not the same thing, -nographic does more than just that, like
> redirecting the serial port to stdio.

True, because in this test case the monitor is being sent explicitly to
'stdio'.  To your point, I now vaguely recall Paolo saying on IRC that
'-nographic' is a shorthand for:

'-serial mon:stdio -machine graphics=off -display none'

> In this specific case, -display
> none should actually be enough, but for consistency I just did the same
> thing as other qemu-iotest cases are doing.

Ah, okay.  At any rate:

Reviewed-by: Kashyap Chamarthy 

-- 
/kashyap

[Qemu-block] [PATCH v2 3/3] qemu-iotests: update unsupported image formats in 194

2017-10-30 Thread Jeff Cody

Test 194 checks for 'luks' to exclude as an unsupported format,
However, most formats are unsupported, due to migration blockers.

Rather than specifying a blacklist of unsupported formats, whitelist
supported formats (specifically, qcow2, qed, raw, dmg).

Signed-off-by: Jeff Cody 
---
 tests/qemu-iotests/194 | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/qemu-iotests/194 b/tests/qemu-iotests/194
index 8d973b440f..1d4214aca3 100755
--- a/tests/qemu-iotests/194
+++ b/tests/qemu-iotests/194
@@ -21,7 +21,7 @@
 
 import iotests
 
-iotests.verify_image_format(unsupported_fmts=['luks'])
+iotests.verify_image_format(supported_fmts=['qcow2', 'qed', 'raw', 'dmg'])
 iotests.verify_platform(['linux'])
 
 with iotests.FilePath('source.img') as source_img_path, \
-- 
2.13.6

[Qemu-block] [PATCH v2 1/3] block/vhdx.c: Don't blindly update the header

2017-10-30 Thread Jeff Cody

The VHDX specification requires that before user data modification of
the vhdx image, the VHDX header file and data GUIDs need to be updated.
In vhdx_open(), if the image is set to RDWR, we go ahead and update the
header.

However, just because the image is set to RDWR does not mean we can go
ahead and write at this point - specifically, if the QEMU run state is
INMIGRATE, the underlying file BS may be set to inactive via the BDS
open flag of BDRV_O_INACTIVE.  Attempting to write under this condition
will cause an assert in bdrv_co_pwritev().

We can alternatively latch the first time the image is written.  And lo
and behold, we do just that, via vhdx_user_visible_write() in
vhdx_co_writev().  This means the call to vhdx_update_headers() in
vhdx_open() is likely just vestigial, and can be removed.

Reported-by: Alexey Kardashevskiy 
Signed-off-by: Jeff Cody 
---
 block/vhdx.c | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/block/vhdx.c b/block/vhdx.c
index 7ae4589879..9956933da6 100644
--- a/block/vhdx.c
+++ b/block/vhdx.c
@@ -1008,13 +1008,6 @@ static int vhdx_open(BlockDriverState *bs, QDict 
*options, int flags,
 goto fail;
 }
 
-if (flags & BDRV_O_RDWR) {
-ret = vhdx_update_headers(bs, s, false, NULL);
-if (ret < 0) {
-goto fail;
-}
-}
-
 /* TODO: differencing files */
 
 return 0;
-- 
2.13.6

[Qemu-block] [PATCH v2 2/3] block/parallals: Do not update header or truncate image when INMIGRATE

2017-10-30 Thread Jeff Cody

If we write or modify the image file while the QEMU run state is
INMIGRATE, then the BDRV_O_INACTIVE BDS flag is set.  This will cause
an assert, since the image is marked inactive.  Make sure we obey this
flag.

Signed-off-by: Jeff Cody 
---
 block/parallels.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/block/parallels.c b/block/parallels.c
index 2b6c6e5709..7b7a3efa1d 100644
--- a/block/parallels.c
+++ b/block/parallels.c
@@ -708,7 +708,7 @@ static int parallels_open(BlockDriverState *bs, QDict 
*options, int flags,
 s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE;
 }
 
-if (flags & BDRV_O_RDWR) {
+if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_INACTIVE)) {
 s->header->inuse = cpu_to_le32(HEADER_INUSE_MAGIC);
 ret = parallels_update_header(bs);
 if (ret < 0) {
@@ -741,12 +741,9 @@ static void parallels_close(BlockDriverState *bs)
 {
 BDRVParallelsState *s = bs->opaque;
 
-if (bs->open_flags & BDRV_O_RDWR) {
+if ((bs->open_flags & BDRV_O_RDWR) && !(bs->open_flags & BDRV_O_INACTIVE)) 
{
 s->header->inuse = 0;
 parallels_update_header(bs);
-}
-
-if (bs->open_flags & BDRV_O_RDWR) {
 bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS,
   PREALLOC_MODE_OFF, NULL);
 }
-- 
2.13.6

[Qemu-block] [PATCH v2 0/3] Don't write headers if BDS is INACTIVE

2017-10-30 Thread Jeff Cody


Changes from v1->v2:

* Drop previous parallels patches, just check BDRV_O_INACTIVE now
  (Kevin)

git-backport-diff -r qemu/master.. -u github/master
Key:
[] : patches are identical
[] : number of functional differences between upstream/downstream patch
[down] : patch is downstream-only
The flags [FC] indicate (F)unctional and (C)ontextual differences, respectively

001/3:[] [--] 'block/vhdx.c: Don't blindly update the header'
002/3:[down] 'block/parallals: Do not update header or truncate image when 
INMIGRATE'
003/3:[] [--] 'qemu-iotests: update unsupported image formats in 194'


v1:

VHDX and Parallels both blindly write headers to the image file
if the images are opened R/W.  This causes an assert if the QEMU run
state is INMIGRATE.

Jeff Cody (3):
  block/vhdx.c: Don't blindly update the header
  block/parallals: Do not update header or truncate image when INMIGRATE
  qemu-iotests: update unsupported image formats in 194

 block/parallels.c  | 7 ++-
 block/vhdx.c   | 7 ---
 tests/qemu-iotests/194 | 2 +-
 3 files changed, 3 insertions(+), 13 deletions(-)

-- 
2.13.6

Re: [Qemu-block] [RFC PATCH v6 13/12] tweak test 83 verbosity

2017-10-30 Thread Eric Blake

On 10/30/2017 01:34 PM, Vladimir Sementsov-Ogievskiy wrote:
> 27.10.2017 13:45, Eric Blake wrote:
>> Commenting these two lines is enough to avoid the change to 083.out
>> in 12/12.  That is evidence that we may want these two lines to be
>> trace points rather than error messages; or maybe we really do like
>> the extra verbosity in the case of an unexpected communication break.
>>
>> This patch does not meet coding guidelines, and I'm not proud enough
>> of it to give S-o-b, but I'm posting it for conversation.
>>

>>   if (ret < 0) {
>> -    error_report_err(local_err);
>> +    //error_report_err(local_err);
>>   }
>>   return ret;
>>   }
> 
> commenting is not ok, you should call error_free instead.

Indeed, and that's why I marked this RFC and used the wrong style
comment, to make it obvious that if we aren't going to print an error
message, there are better ways to go about doing that which don't leak
memory.  The question is whether the output is useful, or whether it
should be only via a trace (as it is, we aren't yet tracing anything in
block/nbd-client.c), or whether it is always just extra noise (in which
case passing the errp parameter around isn't helpful).

-- 
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

signature.asc
Description: OpenPGP digital signature

Re: [Qemu-block] [RFC PATCH v6 13/12] tweak test 83 verbosity

2017-10-30 Thread Vladimir Sementsov-Ogievskiy


27.10.2017 13:45, Eric Blake wrote:

Commenting these two lines is enough to avoid the change to 083.out
in 12/12.  That is evidence that we may want these two lines to be
trace points rather than error messages; or maybe we really do like
the extra verbosity in the case of an unexpected communication break.

This patch does not meet coding guidelines, and I'm not proud enough
of it to give S-o-b, but I'm posting it for conversation.

---
  block/nbd-client.c | 5 +++--
  1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/block/nbd-client.c b/block/nbd-client.c
index b44d4d4a01..e063b3fbc0 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -654,7 +654,8 @@ static int nbd_co_request(BlockDriverState *bs, NBDRequest 
*request,

  ret = nbd_co_receive_return_code(client, request->handle, _err);
  if (local_err) {
-error_report_err(local_err);
+assert(ret < 0);
+//error_report_err(local_err);
  }
  return ret;
  }
@@ -682,7 +683,7 @@ int nbd_client_co_preadv(BlockDriverState *bs, uint64_t 
offset,
  ret = nbd_co_receive_cmdread_reply(client, request.handle, offset, qiov,
 _err);
  if (ret < 0) {
-error_report_err(local_err);
+//error_report_err(local_err);
  }
  return ret;
  }


commenting is not ok, you should call error_free instead.

--
Best regards,
Vladimir

Re: [Qemu-block] [Qemu-devel] nvme: Add tracing v3

2017-10-30 Thread Philippe Mathieu-Daudé

Hi Doug,

On 10/12/2017 04:07 PM, Doug Gale wrote:
> From c7f12a5949458fdd195b5e0b52f158e8f114f203 Mon Sep 17 00:00:00 2001
> From: Doug Gale 
> Date: Thu, 12 Oct 2017 14:29:07 -0400
> Subject: [PATCH] nvme: Add tracing
> 
> Add trace output for commands, errors, and undefined behavior.
> Add guest error log output for undefined behavior.
> Report and ignore invalid undefined accesses to MMIO.
> Annotate unlikely error checks with unlikely.
> 
> Signed-off-by: Doug Gale 
> ---
>  hw/block/nvme.c   | 347 
> ++
>  hw/block/trace-events |  93 ++
>  2 files changed, 387 insertions(+), 53 deletions(-)
> 
> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
> index 9aa32692a3..adac19f5b0 100644
> --- a/hw/block/nvme.c
> +++ b/hw/block/nvme.c
> @@ -34,8 +34,17 @@
>  #include "qapi/visitor.h"
>  #include "sysemu/block-backend.h"
> 
> +#include "qemu/log.h"
> +#include "trace.h"
>  #include "nvme.h"
> 
> +#define NVME_GUEST_ERR(trace, fmt, ...) \
> +do { \
> +(trace_##trace)(__VA_ARGS__); \
> +qemu_log_mask(LOG_GUEST_ERROR, #trace \
> +" in %s: " fmt "\n", __func__, ## __VA_ARGS__); \
> +} while (0)
> +
>  static void nvme_process_sq(void *opaque);
> 
>  static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
> @@ -86,10 +95,14 @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue *cq)
>  {
>  if (cq->irq_enabled) {
>  if (msix_enabled(&(n->parent_obj))) {
> +trace_nvme_irq_msix(cq->vector);
>  msix_notify(&(n->parent_obj), cq->vector);
>  } else {
> +trace_nvme_irq_pin();
>  pci_irq_pulse(>parent_obj);
>  }
> +} else {
> +trace_nvme_irq_masked();
>  }
>  }
> 
> @@ -100,7 +113,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg,
> QEMUIOVector *iov, uint64_t prp1,
>  trans_len = MIN(len, trans_len);
>  int num_prps = (len >> n->page_bits) + 1;
> 
> -if (!prp1) {
> +if (unlikely(!prp1)) {
> +trace_nvme_err_invalid_prp();
>  return NVME_INVALID_FIELD | NVME_DNR;
>  } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
> prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size)) {
> @@ -113,7 +127,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg,
> QEMUIOVector *iov, uint64_t prp1,
>  }
>  len -= trans_len;
>  if (len) {
> -if (!prp2) {
> +if (unlikely(!prp2)) {
> +trace_nvme_err_invalid_prp2_missing();
>  goto unmap;
>  }
>  if (len > n->page_size) {
> @@ -128,7 +143,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg,
> QEMUIOVector *iov, uint64_t prp1,
>  uint64_t prp_ent = le64_to_cpu(prp_list[i]);
> 
>  if (i == n->max_prp_ents - 1 && len > n->page_size) {
> -if (!prp_ent || prp_ent & (n->page_size - 1)) {
> +if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
> +trace_nvme_err_invalid_prplist_ent(prp_ent);
>  goto unmap;
>  }
> 
> @@ -140,7 +156,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg,
> QEMUIOVector *iov, uint64_t prp1,
>  prp_ent = le64_to_cpu(prp_list[i]);
>  }
> 
> -if (!prp_ent || prp_ent & (n->page_size - 1)) {
> +if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
> +trace_nvme_err_invalid_prplist_ent(prp_ent);
>  goto unmap;
>  }
> 
> @@ -154,7 +171,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg,
> QEMUIOVector *iov, uint64_t prp1,
>  i++;
>  }
>  } else {
> -if (prp2 & (n->page_size - 1)) {
> +if (unlikely(prp2 & (n->page_size - 1))) {
> +trace_nvme_err_invalid_prp2_align(prp2);
>  goto unmap;
>  }
>  if (qsg->nsg) {
> @@ -178,16 +196,20 @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n,
> uint8_t *ptr, uint32_t len,
>  QEMUIOVector iov;
>  uint16_t status = NVME_SUCCESS;
> 
> +trace_nvme_dma_read(prp1, prp2);
> +
>  if (nvme_map_prp(, , prp1, prp2, len, n)) {
>  return NVME_INVALID_FIELD | NVME_DNR;
>  }
>  if (qsg.nsg > 0) {
> -if (dma_buf_read(ptr, len, )) {
> +if (unlikely(dma_buf_read(ptr, len, ))) {
> +trace_nvme_err_invalid_dma();
>  status = NVME_INVALID_FIELD | NVME_DNR;
>  }
>  qemu_sglist_destroy();
>  } else {
> -if (qemu_iovec_to_buf(, 0, ptr, len) != len) {
> +if (unlikely(qemu_iovec_to_buf(, 0, ptr, len) != len)) {
> +trace_nvme_err_invalid_dma();
>  status = NVME_INVALID_FIELD | NVME_DNR;
>  }
>  qemu_iovec_destroy();
> @@ -273,7 +295,8 @@ static uint16_t nvme_write_zeros(NvmeCtrl *n,
>

Re: [Qemu-block] [Qemu-devel] [PATCH] qemu-iotests: Use -nographic in 182

2017-10-30 Thread Kevin Wolf

Am 30.10.2017 um 09:39 hat Kashyap Chamarthy geschrieben:
> On Sun, Oct 29, 2017 at 01:53:38PM +0100, Kevin Wolf wrote:
> > This avoids that random UI frontend error messages end up in the output.
> > In particular, we were seeing this line in CI error logs:
> > 
> > +Unable to init server: Could not connect: Connection refused
> > 
> > Signed-off-by: Kevin Wolf 
> > ---
> >  tests/qemu-iotests/182 | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/tests/qemu-iotests/182 b/tests/qemu-iotests/182
> > index 2e078ceed8..4b31592fb8 100755
> > --- a/tests/qemu-iotests/182
> > +++ b/tests/qemu-iotests/182
> > @@ -62,7 +62,7 @@ _launch_qemu -drive 
> > file=$TEST_IMG,if=none,id=drive0,file.locking=on \
> >  
> >  echo
> >  echo "Starting a second QEMU using the same image should fail"
> > -echo 'quit' | $QEMU -monitor stdio \
> > +echo 'quit' | $QEMU -nographic -monitor stdio \
> 
> Isn't "-display none" preferred instead of "-nographic"?
> 
> Assuming you'll change it to "-display none":
> 
> Reviewed-by: Kashyap CHamarthy 

It's not the same thing, -nographic does more than just that, like
redirecting the serial port to stdio. In this specific case, -display
none should actually be enough, but for consistency I just did the same
thing as other qemu-iotest cases are doing.

Kevin

Re: [Qemu-block] nvme: Add tracing v3

2017-10-30 Thread Doug Gale

On Fri, Oct 20, 2017 at 4:11 PM, Doug Gale  wrote:

> On Thu, Oct 12, 2017 at 3:07 PM, Doug Gale  wrote:
>
>> From c7f12a5949458fdd195b5e0b52f158e8f114f203 Mon Sep 17 00:00:00 2001
>> From: Doug Gale 
>> Date: Thu, 12 Oct 2017 14:29:07 -0400
>> Subject: [PATCH] nvme: Add tracing
>>
>> Add trace output for commands, errors, and undefined behavior.
>> Add guest error log output for undefined behavior.
>> Report and ignore invalid undefined accesses to MMIO.
>> Annotate unlikely error checks with unlikely.
>>
>> Signed-off-by: Doug Gale 
>> ---
>>  hw/block/nvme.c   | 347 ++
>> 
>>  hw/block/trace-events |  93 ++
>>  2 files changed, 387 insertions(+), 53 deletions(-)
>>
>> diff --git a/hw/block/nvme.c b/hw/block/nvme.c
>> index 9aa32692a3..adac19f5b0 100644
>> --- a/hw/block/nvme.c
>> +++ b/hw/block/nvme.c
>> @@ -34,8 +34,17 @@
>>  #include "qapi/visitor.h"
>>  #include "sysemu/block-backend.h"
>>
>> +#include "qemu/log.h"
>> +#include "trace.h"
>>  #include "nvme.h"
>>
>> +#define NVME_GUEST_ERR(trace, fmt, ...) \
>> +do { \
>> +(trace_##trace)(__VA_ARGS__); \
>> +qemu_log_mask(LOG_GUEST_ERROR, #trace \
>> +" in %s: " fmt "\n", __func__, ## __VA_ARGS__); \
>> +} while (0)
>> +
>>  static void nvme_process_sq(void *opaque);
>>
>>  static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
>> @@ -86,10 +95,14 @@ static void nvme_isr_notify(NvmeCtrl *n, NvmeCQueue
>> *cq)
>>  {
>>  if (cq->irq_enabled) {
>>  if (msix_enabled(&(n->parent_obj))) {
>> +trace_nvme_irq_msix(cq->vector);
>>  msix_notify(&(n->parent_obj), cq->vector);
>>  } else {
>> +trace_nvme_irq_pin();
>>  pci_irq_pulse(>parent_obj);
>>  }
>> +} else {
>> +trace_nvme_irq_masked();
>>  }
>>  }
>>
>> @@ -100,7 +113,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg,
>> QEMUIOVector *iov, uint64_t prp1,
>>  trans_len = MIN(len, trans_len);
>>  int num_prps = (len >> n->page_bits) + 1;
>>
>> -if (!prp1) {
>> +if (unlikely(!prp1)) {
>> +trace_nvme_err_invalid_prp();
>>  return NVME_INVALID_FIELD | NVME_DNR;
>>  } else if (n->cmbsz && prp1 >= n->ctrl_mem.addr &&
>> prp1 < n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size))
>> {
>> @@ -113,7 +127,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg,
>> QEMUIOVector *iov, uint64_t prp1,
>>  }
>>  len -= trans_len;
>>  if (len) {
>> -if (!prp2) {
>> +if (unlikely(!prp2)) {
>> +trace_nvme_err_invalid_prp2_missing();
>>  goto unmap;
>>  }
>>  if (len > n->page_size) {
>> @@ -128,7 +143,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg,
>> QEMUIOVector *iov, uint64_t prp1,
>>  uint64_t prp_ent = le64_to_cpu(prp_list[i]);
>>
>>  if (i == n->max_prp_ents - 1 && len > n->page_size) {
>> -if (!prp_ent || prp_ent & (n->page_size - 1)) {
>> +if (unlikely(!prp_ent || prp_ent & (n->page_size -
>> 1))) {
>> +trace_nvme_err_invalid_prplist_ent(prp_ent);
>>  goto unmap;
>>  }
>>
>> @@ -140,7 +156,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg,
>> QEMUIOVector *iov, uint64_t prp1,
>>  prp_ent = le64_to_cpu(prp_list[i]);
>>  }
>>
>> -if (!prp_ent || prp_ent & (n->page_size - 1)) {
>> +if (unlikely(!prp_ent || prp_ent & (n->page_size - 1))) {
>> +trace_nvme_err_invalid_prplist_ent(prp_ent);
>>  goto unmap;
>>  }
>>
>> @@ -154,7 +171,8 @@ static uint16_t nvme_map_prp(QEMUSGList *qsg,
>> QEMUIOVector *iov, uint64_t prp1,
>>  i++;
>>  }
>>  } else {
>> -if (prp2 & (n->page_size - 1)) {
>> +if (unlikely(prp2 & (n->page_size - 1))) {
>> +trace_nvme_err_invalid_prp2_align(prp2);
>>  goto unmap;
>>  }
>>  if (qsg->nsg) {
>> @@ -178,16 +196,20 @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n,
>> uint8_t *ptr, uint32_t len,
>>  QEMUIOVector iov;
>>  uint16_t status = NVME_SUCCESS;
>>
>> +trace_nvme_dma_read(prp1, prp2);
>> +
>>  if (nvme_map_prp(, , prp1, prp2, len, n)) {
>>  return NVME_INVALID_FIELD | NVME_DNR;
>>  }
>>  if (qsg.nsg > 0) {
>> -if (dma_buf_read(ptr, len, )) {
>> +if (unlikely(dma_buf_read(ptr, len, ))) {
>> +trace_nvme_err_invalid_dma();
>>  status = NVME_INVALID_FIELD | NVME_DNR;
>>  }
>>  qemu_sglist_destroy();
>>  } else {
>> -if (qemu_iovec_to_buf(, 0, ptr, len) != len) {
>> +if (unlikely(qemu_iovec_to_buf(, 0, ptr, len) != len)) {
>> +

Re: [Qemu-block] [Qemu-devel] [PATCH] qemu-iotests: Use -nographic in 182

2017-10-30 Thread Kashyap Chamarthy

On Sun, Oct 29, 2017 at 01:53:38PM +0100, Kevin Wolf wrote:
> This avoids that random UI frontend error messages end up in the output.
> In particular, we were seeing this line in CI error logs:
> 
> +Unable to init server: Could not connect: Connection refused
> 
> Signed-off-by: Kevin Wolf 
> ---
>  tests/qemu-iotests/182 | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/tests/qemu-iotests/182 b/tests/qemu-iotests/182
> index 2e078ceed8..4b31592fb8 100755
> --- a/tests/qemu-iotests/182
> +++ b/tests/qemu-iotests/182
> @@ -62,7 +62,7 @@ _launch_qemu -drive 
> file=$TEST_IMG,if=none,id=drive0,file.locking=on \
>  
>  echo
>  echo "Starting a second QEMU using the same image should fail"
> -echo 'quit' | $QEMU -monitor stdio \
> +echo 'quit' | $QEMU -nographic -monitor stdio \

Isn't "-display none" preferred instead of "-nographic"?

Assuming you'll change it to "-display none":

Reviewed-by: Kashyap CHamarthy 

[...]

-- 
/kashyap

57 matches

Mail list logo