Re: [Qemu-block] [PATCH v3 10/11] block/nbd-client: nbd reconnect

2018-06-12 Thread Vladimir Sementsov-Ogievskiy

09.06.2018 18:32, Vladimir Sementsov-Ogievskiy wrote:

Implement reconnect. To achieve this:

1. Move from quit bool variable to state. 4 states are introduced:
connecting-wait: means, that reconnecting is in progress, and there
  were small number of reconnect attempts, so all requests are
  waiting for the connection.
connecting-nowait: reconnecting is in progress, there were a lot of
  attempts of reconnect, all requests will return errors.
connected: normal state
quit: exiting after fatal error or on close

Possible transitions are:

* -> quit
connecting-* -> connected
connecting-wait -> connecting-nowait
connected -> connecting-wait

2. Implement reconnect in connection_co. So, in connecting-* mode,
 connection_co, tries to reconnect every NBD_RECONNECT_NS.
 Configuring of this parameter (as well as NBD_RECONNECT_ATTEMPTS,
 which specifies bound of transition from connecting-wait to
 connecting-nowait) may be done as a follow-up patch.

3. Retry nbd queries on channel error, if we are in connecting-wait
 state.

4. In init, wait until for connection until transition to
 connecting-nowait. So, NBD_RECONNECT_ATTEMPTS is a bound of fail
 for initial connection too.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
  block/nbd-client.h |   2 +
  block/nbd-client.c | 170 ++---
  2 files changed, 123 insertions(+), 49 deletions(-)

diff --git a/block/nbd-client.h b/block/nbd-client.h
index 2561e1ea42..1249f2eb52 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -44,6 +44,8 @@ typedef struct NBDClientSession {
  bool receiving;
  int connect_status;
  Error *connect_err;
+int connect_attempts;
+bool wait_in_flight;
  
  NBDClientRequest requests[MAX_NBD_REQUESTS];

  NBDReply reply;
diff --git a/block/nbd-client.c b/block/nbd-client.c
index f22ed7f404..49b1f67047 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -41,10 +41,16 @@ static int nbd_client_connect(BlockDriverState *bs,
const char *hostname,
Error **errp);
  
-/* @ret would be used for reconnect in future */

  static void nbd_channel_error(NBDClientSession *s, int ret)
  {
-s->state = NBD_CLIENT_QUIT;
+if (ret == -EIO) {
+if (s->state == NBD_CLIENT_CONNECTED) {
+s->state = NBD_CLIENT_CONNECTING_WAIT;
+s->connect_attempts = 0;
+}
+} else {
+s->state = NBD_CLIENT_QUIT;
+}
  }
  
  static void nbd_recv_coroutines_wake_all(NBDClientSession *s)

@@ -90,6 +96,19 @@ typedef struct NBDConnection {
  uint64_t reconnect_timeout;
  } NBDConnection;
  
+static bool nbd_client_connecting(NBDClientSession *client)

+{
+return client->state == NBD_CLIENT_CONNECTING_WAIT ||
+   client->state == NBD_CLIENT_CONNECTING_NOWAIT ||
+   client->state == NBD_CLIENT_CONNECTING_INIT;
+}
+
+static bool nbd_client_connecting_wait(NBDClientSession *client)
+{
+return client->state == NBD_CLIENT_CONNECTING_WAIT ||
+   client->state == NBD_CLIENT_CONNECTING_INIT;
+}
+
  static coroutine_fn void nbd_connection_entry(void *opaque)
  {
  NBDConnection *con = opaque;
@@ -98,26 +117,55 @@ static coroutine_fn void nbd_connection_entry(void *opaque)
  int ret = 0;
  Error *local_err = NULL;
  
-if (con->reconnect_attempts != 0) {

-error_setg(>connect_err, "Reconnect is not supported yet");
-s->connect_status = -EINVAL;
-nbd_channel_error(s, s->connect_status);
-return;
-}
+while (s->state != NBD_CLIENT_QUIT) {
+assert(s->reply.handle == 0);
  
-s->connect_status = nbd_client_connect(con->bs, con->saddr,

-   con->export, con->tlscreds,
-   con->hostname, >connect_err);
-if (s->connect_status < 0) {
-nbd_channel_error(s, s->connect_status);
-return;
-}
+if (nbd_client_connecting(s)) {
+if (s->connect_attempts == con->reconnect_attempts) {
+s->state = NBD_CLIENT_CONNECTING_NOWAIT;
+qemu_co_queue_restart_all(>free_sema);
+}
  
-/* successfully connected */

-s->state = NBD_CLIENT_CONNECTED;
+qemu_co_mutex_lock(>send_mutex);
+
+while (s->in_flight > 0) {
+qemu_co_mutex_unlock(>send_mutex);
+nbd_recv_coroutines_wake_all(s);
+s->wait_in_flight = true;
+qemu_coroutine_yield();
+s->wait_in_flight = false;
+qemu_co_mutex_lock(>send_mutex);
+}
+
+qemu_co_mutex_unlock(>send_mutex);
+
+/* Now we are sure, that nobody accessing the channel now and 
nobody
+ * will try to access the channel, until we set state to CONNECTED
+ */
+
+

Re: [Qemu-block] [PATCH v3 10/11] block/nbd-client: nbd reconnect

2018-06-12 Thread Vladimir Sementsov-Ogievskiy

09.06.2018 18:32, Vladimir Sementsov-Ogievskiy wrote:

Implement reconnect. To achieve this:

1. Move from quit bool variable to state. 4 states are introduced:
connecting-wait: means, that reconnecting is in progress, and there
  were small number of reconnect attempts, so all requests are
  waiting for the connection.
connecting-nowait: reconnecting is in progress, there were a lot of
  attempts of reconnect, all requests will return errors.
connected: normal state
quit: exiting after fatal error or on close

Possible transitions are:

* -> quit
connecting-* -> connected
connecting-wait -> connecting-nowait
connected -> connecting-wait

2. Implement reconnect in connection_co. So, in connecting-* mode,
 connection_co, tries to reconnect every NBD_RECONNECT_NS.
 Configuring of this parameter (as well as NBD_RECONNECT_ATTEMPTS,
 which specifies bound of transition from connecting-wait to
 connecting-nowait) may be done as a follow-up patch.

3. Retry nbd queries on channel error, if we are in connecting-wait
 state.

4. In init, wait until for connection until transition to
 connecting-nowait. So, NBD_RECONNECT_ATTEMPTS is a bound of fail
 for initial connection too.

Signed-off-by: Vladimir Sementsov-Ogievskiy 


squash:

@@ -616,7 +617,10 @@ static coroutine_fn int nbd_co_receive_one_chunk(
 s->reply.handle = 0;
 }

-    if (s->connection_co) {
+    if (s->connection_co && !s->wait_in_flight) {
+    /* We must check s->wait_in_flight, because we may entered by
+ * nbd_recv_coroutines_wake_all(), int this case we should not
+ * wake connection_co here, it will woken by last request. */
 aio_co_wake(s->connection_co);
 }



--
Best regards,
Vladimir




[Qemu-block] [PATCH v3 10/11] block/nbd-client: nbd reconnect

2018-06-09 Thread Vladimir Sementsov-Ogievskiy
Implement reconnect. To achieve this:

1. Move from quit bool variable to state. 4 states are introduced:
   connecting-wait: means, that reconnecting is in progress, and there
 were small number of reconnect attempts, so all requests are
 waiting for the connection.
   connecting-nowait: reconnecting is in progress, there were a lot of
 attempts of reconnect, all requests will return errors.
   connected: normal state
   quit: exiting after fatal error or on close

Possible transitions are:

   * -> quit
   connecting-* -> connected
   connecting-wait -> connecting-nowait
   connected -> connecting-wait

2. Implement reconnect in connection_co. So, in connecting-* mode,
connection_co, tries to reconnect every NBD_RECONNECT_NS.
Configuring of this parameter (as well as NBD_RECONNECT_ATTEMPTS,
which specifies bound of transition from connecting-wait to
connecting-nowait) may be done as a follow-up patch.

3. Retry nbd queries on channel error, if we are in connecting-wait
state.

4. In init, wait until for connection until transition to
connecting-nowait. So, NBD_RECONNECT_ATTEMPTS is a bound of fail
for initial connection too.

Signed-off-by: Vladimir Sementsov-Ogievskiy 
---
 block/nbd-client.h |   2 +
 block/nbd-client.c | 170 ++---
 2 files changed, 123 insertions(+), 49 deletions(-)

diff --git a/block/nbd-client.h b/block/nbd-client.h
index 2561e1ea42..1249f2eb52 100644
--- a/block/nbd-client.h
+++ b/block/nbd-client.h
@@ -44,6 +44,8 @@ typedef struct NBDClientSession {
 bool receiving;
 int connect_status;
 Error *connect_err;
+int connect_attempts;
+bool wait_in_flight;
 
 NBDClientRequest requests[MAX_NBD_REQUESTS];
 NBDReply reply;
diff --git a/block/nbd-client.c b/block/nbd-client.c
index f22ed7f404..49b1f67047 100644
--- a/block/nbd-client.c
+++ b/block/nbd-client.c
@@ -41,10 +41,16 @@ static int nbd_client_connect(BlockDriverState *bs,
   const char *hostname,
   Error **errp);
 
-/* @ret would be used for reconnect in future */
 static void nbd_channel_error(NBDClientSession *s, int ret)
 {
-s->state = NBD_CLIENT_QUIT;
+if (ret == -EIO) {
+if (s->state == NBD_CLIENT_CONNECTED) {
+s->state = NBD_CLIENT_CONNECTING_WAIT;
+s->connect_attempts = 0;
+}
+} else {
+s->state = NBD_CLIENT_QUIT;
+}
 }
 
 static void nbd_recv_coroutines_wake_all(NBDClientSession *s)
@@ -90,6 +96,19 @@ typedef struct NBDConnection {
 uint64_t reconnect_timeout;
 } NBDConnection;
 
+static bool nbd_client_connecting(NBDClientSession *client)
+{
+return client->state == NBD_CLIENT_CONNECTING_WAIT ||
+   client->state == NBD_CLIENT_CONNECTING_NOWAIT ||
+   client->state == NBD_CLIENT_CONNECTING_INIT;
+}
+
+static bool nbd_client_connecting_wait(NBDClientSession *client)
+{
+return client->state == NBD_CLIENT_CONNECTING_WAIT ||
+   client->state == NBD_CLIENT_CONNECTING_INIT;
+}
+
 static coroutine_fn void nbd_connection_entry(void *opaque)
 {
 NBDConnection *con = opaque;
@@ -98,26 +117,55 @@ static coroutine_fn void nbd_connection_entry(void *opaque)
 int ret = 0;
 Error *local_err = NULL;
 
-if (con->reconnect_attempts != 0) {
-error_setg(>connect_err, "Reconnect is not supported yet");
-s->connect_status = -EINVAL;
-nbd_channel_error(s, s->connect_status);
-return;
-}
+while (s->state != NBD_CLIENT_QUIT) {
+assert(s->reply.handle == 0);
 
-s->connect_status = nbd_client_connect(con->bs, con->saddr,
-   con->export, con->tlscreds,
-   con->hostname, >connect_err);
-if (s->connect_status < 0) {
-nbd_channel_error(s, s->connect_status);
-return;
-}
+if (nbd_client_connecting(s)) {
+if (s->connect_attempts == con->reconnect_attempts) {
+s->state = NBD_CLIENT_CONNECTING_NOWAIT;
+qemu_co_queue_restart_all(>free_sema);
+}
 
-/* successfully connected */
-s->state = NBD_CLIENT_CONNECTED;
+qemu_co_mutex_lock(>send_mutex);
+
+while (s->in_flight > 0) {
+qemu_co_mutex_unlock(>send_mutex);
+nbd_recv_coroutines_wake_all(s);
+s->wait_in_flight = true;
+qemu_coroutine_yield();
+s->wait_in_flight = false;
+qemu_co_mutex_lock(>send_mutex);
+}
+
+qemu_co_mutex_unlock(>send_mutex);
+
+/* Now we are sure, that nobody accessing the channel now and 
nobody
+ * will try to access the channel, until we set state to CONNECTED
+ */
+
+s->connect_status = nbd_client_connect(con->bs, con->saddr,
+