Re: [ovs-dev] [PATCH v2] ovsdb: provide raft and command interfaces with priority

2021-06-23 Thread Anton Ivanov

I just sent an updated version:

1. Logic fixed in the outer (remotes) loop. Inner (sessions) was mostly OK.

2. Appropriate measures have been taken to ensure that the "skip to" pointer is 
always valid.

3. Tested by forcing <10ms timeouts on processing and/or mandating "skip" on 
every iteration. All tests pass (see 4)

4. I had to disable skipping for replay. They seem to be incompatible.

Brgds,

A.

On 21/06/2021 20:29, Ilya Maximets wrote:

On 6/11/21 5:42 PM, anton.iva...@cambridgegreys.com wrote:

From: Anton Ivanov 

Set a soft time limit of "raft election timer"/2 on ovsdb
processing.

This improves behaviour in large heavily loaded clusters.
While it cannot fully eliminate spurious raft elections
under heavy load, it significantly decreases their number.

Processing is (to the extent possible) restarted where it
stopped on the previous iteration to ensure that sessions
towards the tail of the session list are not starved.

Signed-off-by: Anton Ivanov 
---

Hey, Anton.  Thanks for the patch!
This is not a complete review, but a few things that I noticed.
See inline.

Best regards, Ilya Maximets.


  ovsdb/jsonrpc-server.c | 80 +++---
  ovsdb/jsonrpc-server.h |  2 +-
  ovsdb/ovsdb-server.c   | 15 +++-
  ovsdb/raft.c   |  5 +++
  ovsdb/raft.h   |  3 ++
  ovsdb/storage.c|  8 +
  ovsdb/storage.h|  2 ++
  7 files changed, 109 insertions(+), 6 deletions(-)

diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c
index 4e2dfc3d7..84e0f69b5 100644
--- a/ovsdb/jsonrpc-server.c
+++ b/ovsdb/jsonrpc-server.c
@@ -60,7 +60,8 @@ static struct ovsdb_jsonrpc_session 
*ovsdb_jsonrpc_session_create(
  struct ovsdb_jsonrpc_remote *, struct jsonrpc_session *, bool);
  static void ovsdb_jsonrpc_session_preremove_db(struct ovsdb_jsonrpc_remote *,
 struct ovsdb *);
-static void ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *);
+static void ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *,
+  uint64_t limit);
  static void ovsdb_jsonrpc_session_wait_all(struct ovsdb_jsonrpc_remote *);
  static void ovsdb_jsonrpc_session_get_memory_usage_all(
  const struct ovsdb_jsonrpc_remote *, struct simap *usage);
@@ -128,6 +129,8 @@ struct ovsdb_jsonrpc_server {
  bool read_only;/* This server is does not accept any
transactions that can modify the database. 
*/
  struct shash remotes;  /* Contains "struct ovsdb_jsonrpc_remote *"s. 
*/
+struct ovsdb_jsonrpc_remote *skip_to;
+bool yield_immediately;

'yield' doesn't seem to be a right word here.  Maybe 'wake_up' or something
similar?

Also, both fields above needs a comment.

OTOH, do we really need this filed?  I mean, if we didn't process some session,
shouldn't next session_wait() wake us up?


  };
  
  /* A configured remote.  This is either a passive stream listener plus a list

@@ -137,6 +140,7 @@ struct ovsdb_jsonrpc_remote {
  struct ovsdb_jsonrpc_server *server;
  struct pstream *listener;   /* Listener, if passive. */
  struct ovs_list sessions;   /* List of "struct ovsdb_jsonrpc_session"s. */
+struct ovsdb_jsonrpc_session *skip_to;
  uint8_t dscp;
  bool read_only;
  char *role;
@@ -159,6 +163,8 @@ ovsdb_jsonrpc_server_create(bool read_only)
  ovsdb_server_init(&server->up);
  shash_init(&server->remotes);
  server->read_only = read_only;
+server->yield_immediately = false;
+server->skip_to = NULL;
  return server;
  }
  
@@ -279,6 +285,7 @@ ovsdb_jsonrpc_server_add_remote(struct ovsdb_jsonrpc_server *svr,

  remote->dscp = options->dscp;
  remote->read_only = options->read_only;
  remote->role = nullable_xstrdup(options->role);
+remote->skip_to = NULL;
  shash_add(&svr->remotes, name, remote);
  
  if (!listener) {

@@ -378,12 +385,26 @@ ovsdb_jsonrpc_server_set_read_only(struct 
ovsdb_jsonrpc_server *svr,
  }
  
  void

-ovsdb_jsonrpc_server_run(struct ovsdb_jsonrpc_server *svr)
+ovsdb_jsonrpc_server_run(struct ovsdb_jsonrpc_server *svr, uint64_t limit)
  {
  struct shash_node *node;
+uint64_t elapsed = 0, start_time = 0;
+
+if (limit) {
+start_time = time_now();

Why this function uses time_now() while others are using time_msec() ?
time_now() returns seconds while 'limit' is in milliseconds.


+}
+
+svr->yield_immediately = false;
  
  SHASH_FOR_EACH (node, &svr->remotes) {

  struct ovsdb_jsonrpc_remote *remote = node->data;
+if (svr->skip_to) {
+if (remote != svr->skip_to) {
+continue;

What if 'skip_to' is already removed from the list?  We will, probably,
never process any remotes again.

Also, we didn't process first N remotes here and we're not setting
'yield_immediately'.  This is inconsistent, at least.  But, yes,
it's unclear if 'yield_immediate

Re: [ovs-dev] [PATCH v2] ovsdb: provide raft and command interfaces with priority

2021-06-22 Thread Anton Ivanov



On 21/06/2021 20:29, Ilya Maximets wrote:

On 6/11/21 5:42 PM, anton.iva...@cambridgegreys.com wrote:

From: Anton Ivanov 

Set a soft time limit of "raft election timer"/2 on ovsdb
processing.

This improves behaviour in large heavily loaded clusters.
While it cannot fully eliminate spurious raft elections
under heavy load, it significantly decreases their number.

Processing is (to the extent possible) restarted where it
stopped on the previous iteration to ensure that sessions
towards the tail of the session list are not starved.

Signed-off-by: Anton Ivanov 
---

Hey, Anton.  Thanks for the patch!
This is not a complete review, but a few things that I noticed.
See inline.

Best regards, Ilya Maximets.


  ovsdb/jsonrpc-server.c | 80 +++---
  ovsdb/jsonrpc-server.h |  2 +-
  ovsdb/ovsdb-server.c   | 15 +++-
  ovsdb/raft.c   |  5 +++
  ovsdb/raft.h   |  3 ++
  ovsdb/storage.c|  8 +
  ovsdb/storage.h|  2 ++
  7 files changed, 109 insertions(+), 6 deletions(-)

diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c
index 4e2dfc3d7..84e0f69b5 100644
--- a/ovsdb/jsonrpc-server.c
+++ b/ovsdb/jsonrpc-server.c
@@ -60,7 +60,8 @@ static struct ovsdb_jsonrpc_session 
*ovsdb_jsonrpc_session_create(
  struct ovsdb_jsonrpc_remote *, struct jsonrpc_session *, bool);
  static void ovsdb_jsonrpc_session_preremove_db(struct ovsdb_jsonrpc_remote *,
 struct ovsdb *);
-static void ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *);
+static void ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *,
+  uint64_t limit);
  static void ovsdb_jsonrpc_session_wait_all(struct ovsdb_jsonrpc_remote *);
  static void ovsdb_jsonrpc_session_get_memory_usage_all(
  const struct ovsdb_jsonrpc_remote *, struct simap *usage);
@@ -128,6 +129,8 @@ struct ovsdb_jsonrpc_server {
  bool read_only;/* This server is does not accept any
transactions that can modify the database. 
*/
  struct shash remotes;  /* Contains "struct ovsdb_jsonrpc_remote *"s. 
*/
+struct ovsdb_jsonrpc_remote *skip_to;
+bool yield_immediately;

'yield' doesn't seem to be a right word here.  Maybe 'wake_up' or something
similar?

Also, both fields above needs a comment.

OTOH, do we really need this filed?  I mean, if we didn't process some session,
shouldn't next session_wait() wake us up?


  };
  
  /* A configured remote.  This is either a passive stream listener plus a list

@@ -137,6 +140,7 @@ struct ovsdb_jsonrpc_remote {
  struct ovsdb_jsonrpc_server *server;
  struct pstream *listener;   /* Listener, if passive. */
  struct ovs_list sessions;   /* List of "struct ovsdb_jsonrpc_session"s. */
+struct ovsdb_jsonrpc_session *skip_to;
  uint8_t dscp;
  bool read_only;
  char *role;
@@ -159,6 +163,8 @@ ovsdb_jsonrpc_server_create(bool read_only)
  ovsdb_server_init(&server->up);
  shash_init(&server->remotes);
  server->read_only = read_only;
+server->yield_immediately = false;
+server->skip_to = NULL;
  return server;
  }
  
@@ -279,6 +285,7 @@ ovsdb_jsonrpc_server_add_remote(struct ovsdb_jsonrpc_server *svr,

  remote->dscp = options->dscp;
  remote->read_only = options->read_only;
  remote->role = nullable_xstrdup(options->role);
+remote->skip_to = NULL;
  shash_add(&svr->remotes, name, remote);
  
  if (!listener) {

@@ -378,12 +385,26 @@ ovsdb_jsonrpc_server_set_read_only(struct 
ovsdb_jsonrpc_server *svr,
  }
  
  void

-ovsdb_jsonrpc_server_run(struct ovsdb_jsonrpc_server *svr)
+ovsdb_jsonrpc_server_run(struct ovsdb_jsonrpc_server *svr, uint64_t limit)
  {
  struct shash_node *node;
+uint64_t elapsed = 0, start_time = 0;
+
+if (limit) {
+start_time = time_now();

Why this function uses time_now() while others are using time_msec() ?
time_now() returns seconds while 'limit' is in milliseconds.


That was daft.

Revised patch coming up shortly.




+}
+
+svr->yield_immediately = false;
  
  SHASH_FOR_EACH (node, &svr->remotes) {

  struct ovsdb_jsonrpc_remote *remote = node->data;
+if (svr->skip_to) {
+if (remote != svr->skip_to) {
+continue;

What if 'skip_to' is already removed from the list?  We will, probably,
never process any remotes again.


I tried to replicate the handling for list here which is correct (see below) 
and failed. Same as above actually - the loop in the session_list uses 
time_msec()

A revised patch with fixed handling coming up shortly.



Also, we didn't process first N remotes here and we're not setting
'yield_immediately'.  This is inconsistent, at least.  But, yes,
it's unclear if 'yield_immediately' needed at all.


+} else {
+svr->skip_to = NULL;
+}
+}
  
  if (remote->

Re: [ovs-dev] [PATCH v2] ovsdb: provide raft and command interfaces with priority

2021-06-21 Thread Ilya Maximets
On 6/11/21 5:42 PM, anton.iva...@cambridgegreys.com wrote:
> From: Anton Ivanov 
> 
> Set a soft time limit of "raft election timer"/2 on ovsdb
> processing.
> 
> This improves behaviour in large heavily loaded clusters.
> While it cannot fully eliminate spurious raft elections
> under heavy load, it significantly decreases their number.
> 
> Processing is (to the extent possible) restarted where it
> stopped on the previous iteration to ensure that sessions
> towards the tail of the session list are not starved.
> 
> Signed-off-by: Anton Ivanov 
> ---

Hey, Anton.  Thanks for the patch!
This is not a complete review, but a few things that I noticed.
See inline.

Best regards, Ilya Maximets.

>  ovsdb/jsonrpc-server.c | 80 +++---
>  ovsdb/jsonrpc-server.h |  2 +-
>  ovsdb/ovsdb-server.c   | 15 +++-
>  ovsdb/raft.c   |  5 +++
>  ovsdb/raft.h   |  3 ++
>  ovsdb/storage.c|  8 +
>  ovsdb/storage.h|  2 ++
>  7 files changed, 109 insertions(+), 6 deletions(-)
> 
> diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c
> index 4e2dfc3d7..84e0f69b5 100644
> --- a/ovsdb/jsonrpc-server.c
> +++ b/ovsdb/jsonrpc-server.c
> @@ -60,7 +60,8 @@ static struct ovsdb_jsonrpc_session 
> *ovsdb_jsonrpc_session_create(
>  struct ovsdb_jsonrpc_remote *, struct jsonrpc_session *, bool);
>  static void ovsdb_jsonrpc_session_preremove_db(struct ovsdb_jsonrpc_remote *,
> struct ovsdb *);
> -static void ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *);
> +static void ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *,
> +  uint64_t limit);
>  static void ovsdb_jsonrpc_session_wait_all(struct ovsdb_jsonrpc_remote *);
>  static void ovsdb_jsonrpc_session_get_memory_usage_all(
>  const struct ovsdb_jsonrpc_remote *, struct simap *usage);
> @@ -128,6 +129,8 @@ struct ovsdb_jsonrpc_server {
>  bool read_only;/* This server is does not accept any
>transactions that can modify the database. 
> */
>  struct shash remotes;  /* Contains "struct ovsdb_jsonrpc_remote *"s. 
> */
> +struct ovsdb_jsonrpc_remote *skip_to;
> +bool yield_immediately;

'yield' doesn't seem to be a right word here.  Maybe 'wake_up' or something
similar?

Also, both fields above needs a comment.

OTOH, do we really need this filed?  I mean, if we didn't process some session,
shouldn't next session_wait() wake us up?

>  };
>  
>  /* A configured remote.  This is either a passive stream listener plus a list
> @@ -137,6 +140,7 @@ struct ovsdb_jsonrpc_remote {
>  struct ovsdb_jsonrpc_server *server;
>  struct pstream *listener;   /* Listener, if passive. */
>  struct ovs_list sessions;   /* List of "struct ovsdb_jsonrpc_session"s. 
> */
> +struct ovsdb_jsonrpc_session *skip_to;
>  uint8_t dscp;
>  bool read_only;
>  char *role;
> @@ -159,6 +163,8 @@ ovsdb_jsonrpc_server_create(bool read_only)
>  ovsdb_server_init(&server->up);
>  shash_init(&server->remotes);
>  server->read_only = read_only;
> +server->yield_immediately = false;
> +server->skip_to = NULL;
>  return server;
>  }
>  
> @@ -279,6 +285,7 @@ ovsdb_jsonrpc_server_add_remote(struct 
> ovsdb_jsonrpc_server *svr,
>  remote->dscp = options->dscp;
>  remote->read_only = options->read_only;
>  remote->role = nullable_xstrdup(options->role);
> +remote->skip_to = NULL;
>  shash_add(&svr->remotes, name, remote);
>  
>  if (!listener) {
> @@ -378,12 +385,26 @@ ovsdb_jsonrpc_server_set_read_only(struct 
> ovsdb_jsonrpc_server *svr,
>  }
>  
>  void
> -ovsdb_jsonrpc_server_run(struct ovsdb_jsonrpc_server *svr)
> +ovsdb_jsonrpc_server_run(struct ovsdb_jsonrpc_server *svr, uint64_t limit)
>  {
>  struct shash_node *node;
> +uint64_t elapsed = 0, start_time = 0;
> +
> +if (limit) {
> +start_time = time_now();

Why this function uses time_now() while others are using time_msec() ?
time_now() returns seconds while 'limit' is in milliseconds.

> +}
> +
> +svr->yield_immediately = false;
>  
>  SHASH_FOR_EACH (node, &svr->remotes) {
>  struct ovsdb_jsonrpc_remote *remote = node->data;
> +if (svr->skip_to) {
> +if (remote != svr->skip_to) {
> +continue;

What if 'skip_to' is already removed from the list?  We will, probably,
never process any remotes again.

Also, we didn't process first N remotes here and we're not setting
'yield_immediately'.  This is inconsistent, at least.  But, yes,
it's unclear if 'yield_immediately' needed at all.

> +} else {
> +svr->skip_to = NULL;
> +}
> +}
>  
>  if (remote->listener) {
>  struct stream *stream;
> @@ -403,7 +424,16 @@ ovsdb_jsonrpc_server_run(struct ovsdb_jsonrpc_server 
> *svr)
>  }
>  }
> 

[ovs-dev] [PATCH v2] ovsdb: provide raft and command interfaces with priority

2021-06-11 Thread anton . ivanov
From: Anton Ivanov 

Set a soft time limit of "raft election timer"/2 on ovsdb
processing.

This improves behaviour in large heavily loaded clusters.
While it cannot fully eliminate spurious raft elections
under heavy load, it significantly decreases their number.

Processing is (to the extent possible) restarted where it
stopped on the previous iteration to ensure that sessions
towards the tail of the session list are not starved.

Signed-off-by: Anton Ivanov 
---
 ovsdb/jsonrpc-server.c | 80 +++---
 ovsdb/jsonrpc-server.h |  2 +-
 ovsdb/ovsdb-server.c   | 15 +++-
 ovsdb/raft.c   |  5 +++
 ovsdb/raft.h   |  3 ++
 ovsdb/storage.c|  8 +
 ovsdb/storage.h|  2 ++
 7 files changed, 109 insertions(+), 6 deletions(-)

diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c
index 4e2dfc3d7..84e0f69b5 100644
--- a/ovsdb/jsonrpc-server.c
+++ b/ovsdb/jsonrpc-server.c
@@ -60,7 +60,8 @@ static struct ovsdb_jsonrpc_session 
*ovsdb_jsonrpc_session_create(
 struct ovsdb_jsonrpc_remote *, struct jsonrpc_session *, bool);
 static void ovsdb_jsonrpc_session_preremove_db(struct ovsdb_jsonrpc_remote *,
struct ovsdb *);
-static void ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *);
+static void ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *,
+  uint64_t limit);
 static void ovsdb_jsonrpc_session_wait_all(struct ovsdb_jsonrpc_remote *);
 static void ovsdb_jsonrpc_session_get_memory_usage_all(
 const struct ovsdb_jsonrpc_remote *, struct simap *usage);
@@ -128,6 +129,8 @@ struct ovsdb_jsonrpc_server {
 bool read_only;/* This server is does not accept any
   transactions that can modify the database. */
 struct shash remotes;  /* Contains "struct ovsdb_jsonrpc_remote *"s. */
+struct ovsdb_jsonrpc_remote *skip_to;
+bool yield_immediately;
 };
 
 /* A configured remote.  This is either a passive stream listener plus a list
@@ -137,6 +140,7 @@ struct ovsdb_jsonrpc_remote {
 struct ovsdb_jsonrpc_server *server;
 struct pstream *listener;   /* Listener, if passive. */
 struct ovs_list sessions;   /* List of "struct ovsdb_jsonrpc_session"s. */
+struct ovsdb_jsonrpc_session *skip_to;
 uint8_t dscp;
 bool read_only;
 char *role;
@@ -159,6 +163,8 @@ ovsdb_jsonrpc_server_create(bool read_only)
 ovsdb_server_init(&server->up);
 shash_init(&server->remotes);
 server->read_only = read_only;
+server->yield_immediately = false;
+server->skip_to = NULL;
 return server;
 }
 
@@ -279,6 +285,7 @@ ovsdb_jsonrpc_server_add_remote(struct ovsdb_jsonrpc_server 
*svr,
 remote->dscp = options->dscp;
 remote->read_only = options->read_only;
 remote->role = nullable_xstrdup(options->role);
+remote->skip_to = NULL;
 shash_add(&svr->remotes, name, remote);
 
 if (!listener) {
@@ -378,12 +385,26 @@ ovsdb_jsonrpc_server_set_read_only(struct 
ovsdb_jsonrpc_server *svr,
 }
 
 void
-ovsdb_jsonrpc_server_run(struct ovsdb_jsonrpc_server *svr)
+ovsdb_jsonrpc_server_run(struct ovsdb_jsonrpc_server *svr, uint64_t limit)
 {
 struct shash_node *node;
+uint64_t elapsed = 0, start_time = 0;
+
+if (limit) {
+start_time = time_now();
+}
+
+svr->yield_immediately = false;
 
 SHASH_FOR_EACH (node, &svr->remotes) {
 struct ovsdb_jsonrpc_remote *remote = node->data;
+if (svr->skip_to) {
+if (remote != svr->skip_to) {
+continue;
+} else {
+svr->skip_to = NULL;
+}
+}
 
 if (remote->listener) {
 struct stream *stream;
@@ -403,7 +424,16 @@ ovsdb_jsonrpc_server_run(struct ovsdb_jsonrpc_server *svr)
 }
 }
 
-ovsdb_jsonrpc_session_run_all(remote);
+ovsdb_jsonrpc_session_run_all(remote, limit - elapsed);
+
+if (limit) {
+elapsed = start_time - time_now();
+if (elapsed >= limit) {
+svr->yield_immediately = true;
+svr->skip_to = remote;
+break;
+}
+}
 }
 }
 
@@ -412,6 +442,12 @@ ovsdb_jsonrpc_server_wait(struct ovsdb_jsonrpc_server *svr)
 {
 struct shash_node *node;
 
+if (svr->yield_immediately) {
+poll_immediate_wake();
+svr->yield_immediately = false;
+return;
+}
+
 SHASH_FOR_EACH (node, &svr->remotes) {
 struct ovsdb_jsonrpc_remote *remote = node->data;
 
@@ -583,15 +619,51 @@ ovsdb_jsonrpc_session_set_options(struct 
ovsdb_jsonrpc_session *session,
 }
 
 static void
-ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *remote)
+ovsdb_jsonrpc_session_run_all(struct ovsdb_jsonrpc_remote *remote,
+  uint64_t limit)
 {
 struct ovsdb_jsonrpc_session *s, *next;
+uint64_t start_time;
+
+