Re: [ovs-dev] [PATCH 3/5] raft: Set threshold on backlog for raft connections.
On 10/28/20 11:49 AM, Dumitru Ceara wrote: > On 10/26/20 2:42 AM, Ilya Maximets wrote: >> RAFT messages could be fairly big. If something abnormal happens to >> one of the servers in a cluster it may not be able to process all the >> incoming messages in a timely manner. This results in jsonrpc backlog >> growth on the sender's side. For example if follower gets many new >> clients at once that it needs to serve, or it decides to take a >> snapshot in a period of high number of database changes. >> If backlog grows large enough it becomes harder and harder for follower >> to process incoming raft messages, it sends outdated replies and >> starts receiving snapshots and the whole raft log from the leader. >> Sometimes backlog grows too high (60GB in this example): >> >> jsonrpc|INFO|excessive sending backlog, jsonrpc: ssl:, >>num of msgs: 15370, backlog: 61731060773. >> >> In this case OS might actually decide to kill the sender to free some >> memory. Anyway, It could take a lot of time for such a server to catch >> up with the rest of the cluster if it has so much data to receive and >> process. >> >> Introducing backlog thresholds for jsonrpc connections. >> If sending backlog will exceed particular values (500 messages or >> 4GB in size), connection will be dropped and re-created. This will >> allow to drop all the current backlog and start over increasing >> chances of cluster recovery. >> >> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=129 >> Signed-off-by: Ilya Maximets >> --- >> NEWS | 2 ++ >> lib/jsonrpc.c | 57 ++- >> lib/jsonrpc.h | 6 ++ >> ovsdb/raft.c | 5 + >> 4 files changed, 69 insertions(+), 1 deletion(-) >> >> diff --git a/NEWS b/NEWS >> index 2860a8e9c..ebdf8758b 100644 >> --- a/NEWS >> +++ b/NEWS >> @@ -6,6 +6,8 @@ Post-v2.14.0 >> * New unixctl command 'ovsdb-server/memory-trim-on-compaction on|off'. >> If turned on, ovsdb-server will try to reclaim all the unused memory >> after every DB compaction back to OS. Disabled by default. >> + * Maximum backlog on RAFT connections limited to 500 messages or 4GB. >> + Once threshold reached, connection is dropped (and re-established). >> - DPDK: >> * Removed support for vhost-user dequeue zero-copy. >> - The environment variable OVS_UNBOUND_CONF, if set, is now used >> diff --git a/lib/jsonrpc.c b/lib/jsonrpc.c >> index ecbc939fe..435824844 100644 >> --- a/lib/jsonrpc.c >> +++ b/lib/jsonrpc.c >> @@ -50,6 +50,10 @@ struct jsonrpc { >> struct ovs_list output; /* Contains "struct ofpbuf"s. */ >> size_t output_count;/* Number of elements in "output". */ >> size_t backlog; >> + >> +/* Limits. */ >> +size_t max_output; /* 'output_count' disconnection threshold. >> */ >> +size_t max_backlog; /* 'backlog' disconnection threshold. */ >> }; >> >> /* Rate limit for error messages. */ >> @@ -178,6 +182,17 @@ jsonrpc_get_backlog(const struct jsonrpc *rpc) >> return rpc->status ? 0 : rpc->backlog; >> } >> >> +/* Sets thresholds for send backlog. If send backlog contains more than >> + * 'max_n_msgs' messages or larger than 'max_backlog_bytes' bytes, >> connection >> + * will be dropped. */ >> +void >> +jsonrpc_set_backlog_threshold(struct jsonrpc *rpc, >> + size_t max_n_msgs, size_t max_backlog_bytes) >> +{ >> +rpc->max_output = max_n_msgs; >> +rpc->max_backlog = max_backlog_bytes; >> +} >> + >> /* Returns the number of bytes that have been received on 'rpc''s underlying >> * stream. (The value wraps around if it exceeds UINT_MAX.) */ >> unsigned int >> @@ -261,9 +276,26 @@ jsonrpc_send(struct jsonrpc *rpc, struct jsonrpc_msg >> *msg) >> rpc->backlog += length; >> >> if (rpc->output_count >= 50) { >> -VLOG_INFO_RL(, "excessive sending backlog, jsonrpc: %s, num of" >> +static struct vlog_rate_limit bl_rl = VLOG_RATE_LIMIT_INIT(5, 5); >> +bool disconnect = false; >> + >> +VLOG_INFO_RL(_rl, "excessive sending backlog, jsonrpc: %s, num >> of" >> " msgs: %"PRIuSIZE", backlog: %"PRIuSIZE".", rpc->name, >> rpc->output_count, rpc->backlog); >> +if (rpc->max_output && rpc->output_count > rpc->max_output) { >> +disconnect = true; >> +VLOG_WARN("sending backlog exceeded maximum number of messages >> (%" >> + PRIuSIZE" > %"PRIuSIZE"), disconnecting, jsonrpc: >> %s.", >> + rpc->output_count, rpc->max_output, rpc->name); >> +} else if (rpc->max_backlog && rpc->backlog > rpc->max_backlog) { >> +disconnect = true; >> +VLOG_WARN("sending backlog exceeded maximum size (%"PRIuSIZE" > >> %" >> + PRIuSIZE" bytes), disconnecting, jsonrpc: %s.", >> + rpc->backlog,
Re: [ovs-dev] [PATCH 3/5] raft: Set threshold on backlog for raft connections.
On 10/26/20 2:42 AM, Ilya Maximets wrote: > RAFT messages could be fairly big. If something abnormal happens to > one of the servers in a cluster it may not be able to process all the > incoming messages in a timely manner. This results in jsonrpc backlog > growth on the sender's side. For example if follower gets many new > clients at once that it needs to serve, or it decides to take a > snapshot in a period of high number of database changes. > If backlog grows large enough it becomes harder and harder for follower > to process incoming raft messages, it sends outdated replies and > starts receiving snapshots and the whole raft log from the leader. > Sometimes backlog grows too high (60GB in this example): > > jsonrpc|INFO|excessive sending backlog, jsonrpc: ssl:, >num of msgs: 15370, backlog: 61731060773. > > In this case OS might actually decide to kill the sender to free some > memory. Anyway, It could take a lot of time for such a server to catch > up with the rest of the cluster if it has so much data to receive and > process. > > Introducing backlog thresholds for jsonrpc connections. > If sending backlog will exceed particular values (500 messages or > 4GB in size), connection will be dropped and re-created. This will > allow to drop all the current backlog and start over increasing > chances of cluster recovery. > > Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=129 > Signed-off-by: Ilya Maximets > --- > NEWS | 2 ++ > lib/jsonrpc.c | 57 ++- > lib/jsonrpc.h | 6 ++ > ovsdb/raft.c | 5 + > 4 files changed, 69 insertions(+), 1 deletion(-) > > diff --git a/NEWS b/NEWS > index 2860a8e9c..ebdf8758b 100644 > --- a/NEWS > +++ b/NEWS > @@ -6,6 +6,8 @@ Post-v2.14.0 > * New unixctl command 'ovsdb-server/memory-trim-on-compaction on|off'. > If turned on, ovsdb-server will try to reclaim all the unused memory > after every DB compaction back to OS. Disabled by default. > + * Maximum backlog on RAFT connections limited to 500 messages or 4GB. > + Once threshold reached, connection is dropped (and re-established). > - DPDK: > * Removed support for vhost-user dequeue zero-copy. > - The environment variable OVS_UNBOUND_CONF, if set, is now used > diff --git a/lib/jsonrpc.c b/lib/jsonrpc.c > index ecbc939fe..435824844 100644 > --- a/lib/jsonrpc.c > +++ b/lib/jsonrpc.c > @@ -50,6 +50,10 @@ struct jsonrpc { > struct ovs_list output; /* Contains "struct ofpbuf"s. */ > size_t output_count;/* Number of elements in "output". */ > size_t backlog; > + > +/* Limits. */ > +size_t max_output; /* 'output_count' disconnection threshold. */ > +size_t max_backlog; /* 'backlog' disconnection threshold. */ > }; > > /* Rate limit for error messages. */ > @@ -178,6 +182,17 @@ jsonrpc_get_backlog(const struct jsonrpc *rpc) > return rpc->status ? 0 : rpc->backlog; > } > > +/* Sets thresholds for send backlog. If send backlog contains more than > + * 'max_n_msgs' messages or larger than 'max_backlog_bytes' bytes, connection > + * will be dropped. */ > +void > +jsonrpc_set_backlog_threshold(struct jsonrpc *rpc, > + size_t max_n_msgs, size_t max_backlog_bytes) > +{ > +rpc->max_output = max_n_msgs; > +rpc->max_backlog = max_backlog_bytes; > +} > + > /* Returns the number of bytes that have been received on 'rpc''s underlying > * stream. (The value wraps around if it exceeds UINT_MAX.) */ > unsigned int > @@ -261,9 +276,26 @@ jsonrpc_send(struct jsonrpc *rpc, struct jsonrpc_msg > *msg) > rpc->backlog += length; > > if (rpc->output_count >= 50) { > -VLOG_INFO_RL(, "excessive sending backlog, jsonrpc: %s, num of" > +static struct vlog_rate_limit bl_rl = VLOG_RATE_LIMIT_INIT(5, 5); > +bool disconnect = false; > + > +VLOG_INFO_RL(_rl, "excessive sending backlog, jsonrpc: %s, num of" > " msgs: %"PRIuSIZE", backlog: %"PRIuSIZE".", rpc->name, > rpc->output_count, rpc->backlog); > +if (rpc->max_output && rpc->output_count > rpc->max_output) { > +disconnect = true; > +VLOG_WARN("sending backlog exceeded maximum number of messages > (%" > + PRIuSIZE" > %"PRIuSIZE"), disconnecting, jsonrpc: %s.", > + rpc->output_count, rpc->max_output, rpc->name); > +} else if (rpc->max_backlog && rpc->backlog > rpc->max_backlog) { > +disconnect = true; > +VLOG_WARN("sending backlog exceeded maximum size (%"PRIuSIZE" > > %" > + PRIuSIZE" bytes), disconnecting, jsonrpc: %s.", > + rpc->backlog, rpc->max_backlog, rpc->name); > +} > +if (disconnect) { > +jsonrpc_error(rpc, E2BIG); > +} > } > > if (rpc->backlog