Re: [ovs-dev] [PATCH 3/5] raft: Set threshold on backlog for raft connections.

2020-11-03 Thread Ilya Maximets
On 10/28/20 11:49 AM, Dumitru Ceara wrote:
> On 10/26/20 2:42 AM, Ilya Maximets wrote:
>> RAFT messages could be fairly big.  If something abnormal happens to
>> one of the servers in a cluster it may not be able to process all the
>> incoming messages in a timely manner.  This results in jsonrpc backlog
>> growth on the sender's side.  For example if follower gets many new
>> clients at once that it needs to serve, or it decides to take a
>> snapshot in a period of high number of database changes.
>> If backlog grows large enough it becomes harder and harder for follower
>> to process incoming raft messages, it sends outdated replies and
>> starts receiving snapshots and the whole raft log from the leader.
>> Sometimes backlog grows too high (60GB in this example):
>>
>>   jsonrpc|INFO|excessive sending backlog, jsonrpc: ssl:,
>>num of msgs: 15370, backlog: 61731060773.
>>
>> In this case OS might actually decide to kill the sender to free some
>> memory.  Anyway, It could take a lot of time for such a server to catch
>> up with the rest of the cluster if it has so much data to receive and
>> process.
>>
>> Introducing backlog thresholds for jsonrpc connections.
>> If sending backlog will exceed particular values (500 messages or
>> 4GB in size), connection will be dropped and re-created.  This will
>> allow to drop all the current backlog and start over increasing
>> chances of cluster recovery.
>>
>> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=129
>> Signed-off-by: Ilya Maximets 
>> ---
>>  NEWS  |  2 ++
>>  lib/jsonrpc.c | 57 ++-
>>  lib/jsonrpc.h |  6 ++
>>  ovsdb/raft.c  |  5 +
>>  4 files changed, 69 insertions(+), 1 deletion(-)
>>
>> diff --git a/NEWS b/NEWS
>> index 2860a8e9c..ebdf8758b 100644
>> --- a/NEWS
>> +++ b/NEWS
>> @@ -6,6 +6,8 @@ Post-v2.14.0
>>   * New unixctl command 'ovsdb-server/memory-trim-on-compaction on|off'.
>> If turned on, ovsdb-server will try to reclaim all the unused memory
>> after every DB compaction back to OS.  Disabled by default.
>> + * Maximum backlog on RAFT connections limited to 500 messages or 4GB.
>> +   Once threshold reached, connection is dropped (and re-established).
>> - DPDK:
>>   * Removed support for vhost-user dequeue zero-copy.
>> - The environment variable OVS_UNBOUND_CONF, if set, is now used
>> diff --git a/lib/jsonrpc.c b/lib/jsonrpc.c
>> index ecbc939fe..435824844 100644
>> --- a/lib/jsonrpc.c
>> +++ b/lib/jsonrpc.c
>> @@ -50,6 +50,10 @@ struct jsonrpc {
>>  struct ovs_list output; /* Contains "struct ofpbuf"s. */
>>  size_t output_count;/* Number of elements in "output". */
>>  size_t backlog;
>> +
>> +/* Limits. */
>> +size_t max_output;  /* 'output_count' disconnection threshold. 
>> */
>> +size_t max_backlog; /* 'backlog' disconnection threshold. */
>>  };
>>  
>>  /* Rate limit for error messages. */
>> @@ -178,6 +182,17 @@ jsonrpc_get_backlog(const struct jsonrpc *rpc)
>>  return rpc->status ? 0 : rpc->backlog;
>>  }
>>  
>> +/* Sets thresholds for send backlog.  If send backlog contains more than
>> + * 'max_n_msgs' messages or larger than 'max_backlog_bytes' bytes, 
>> connection
>> + * will be dropped. */
>> +void
>> +jsonrpc_set_backlog_threshold(struct jsonrpc *rpc,
>> +  size_t max_n_msgs, size_t max_backlog_bytes)
>> +{
>> +rpc->max_output = max_n_msgs;
>> +rpc->max_backlog = max_backlog_bytes;
>> +}
>> +
>>  /* Returns the number of bytes that have been received on 'rpc''s underlying
>>   * stream.  (The value wraps around if it exceeds UINT_MAX.) */
>>  unsigned int
>> @@ -261,9 +276,26 @@ jsonrpc_send(struct jsonrpc *rpc, struct jsonrpc_msg 
>> *msg)
>>  rpc->backlog += length;
>>  
>>  if (rpc->output_count >= 50) {
>> -VLOG_INFO_RL(, "excessive sending backlog, jsonrpc: %s, num of"
>> +static struct vlog_rate_limit bl_rl = VLOG_RATE_LIMIT_INIT(5, 5);
>> +bool disconnect = false;
>> +
>> +VLOG_INFO_RL(_rl, "excessive sending backlog, jsonrpc: %s, num 
>> of"
>>   " msgs: %"PRIuSIZE", backlog: %"PRIuSIZE".", rpc->name,
>>   rpc->output_count, rpc->backlog);
>> +if (rpc->max_output && rpc->output_count > rpc->max_output) {
>> +disconnect = true;
>> +VLOG_WARN("sending backlog exceeded maximum number of messages 
>> (%"
>> +  PRIuSIZE" > %"PRIuSIZE"), disconnecting, jsonrpc: 
>> %s.",
>> +  rpc->output_count, rpc->max_output, rpc->name);
>> +} else if (rpc->max_backlog && rpc->backlog > rpc->max_backlog) {
>> +disconnect = true;
>> +VLOG_WARN("sending backlog exceeded maximum size (%"PRIuSIZE" > 
>> %"
>> +  PRIuSIZE" bytes), disconnecting, jsonrpc: %s.",
>> +  rpc->backlog, 

Re: [ovs-dev] [PATCH 3/5] raft: Set threshold on backlog for raft connections.

2020-10-28 Thread Dumitru Ceara
On 10/26/20 2:42 AM, Ilya Maximets wrote:
> RAFT messages could be fairly big.  If something abnormal happens to
> one of the servers in a cluster it may not be able to process all the
> incoming messages in a timely manner.  This results in jsonrpc backlog
> growth on the sender's side.  For example if follower gets many new
> clients at once that it needs to serve, or it decides to take a
> snapshot in a period of high number of database changes.
> If backlog grows large enough it becomes harder and harder for follower
> to process incoming raft messages, it sends outdated replies and
> starts receiving snapshots and the whole raft log from the leader.
> Sometimes backlog grows too high (60GB in this example):
> 
>   jsonrpc|INFO|excessive sending backlog, jsonrpc: ssl:,
>num of msgs: 15370, backlog: 61731060773.
> 
> In this case OS might actually decide to kill the sender to free some
> memory.  Anyway, It could take a lot of time for such a server to catch
> up with the rest of the cluster if it has so much data to receive and
> process.
> 
> Introducing backlog thresholds for jsonrpc connections.
> If sending backlog will exceed particular values (500 messages or
> 4GB in size), connection will be dropped and re-created.  This will
> allow to drop all the current backlog and start over increasing
> chances of cluster recovery.
> 
> Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=129
> Signed-off-by: Ilya Maximets 
> ---
>  NEWS  |  2 ++
>  lib/jsonrpc.c | 57 ++-
>  lib/jsonrpc.h |  6 ++
>  ovsdb/raft.c  |  5 +
>  4 files changed, 69 insertions(+), 1 deletion(-)
> 
> diff --git a/NEWS b/NEWS
> index 2860a8e9c..ebdf8758b 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -6,6 +6,8 @@ Post-v2.14.0
>   * New unixctl command 'ovsdb-server/memory-trim-on-compaction on|off'.
> If turned on, ovsdb-server will try to reclaim all the unused memory
> after every DB compaction back to OS.  Disabled by default.
> + * Maximum backlog on RAFT connections limited to 500 messages or 4GB.
> +   Once threshold reached, connection is dropped (and re-established).
> - DPDK:
>   * Removed support for vhost-user dequeue zero-copy.
> - The environment variable OVS_UNBOUND_CONF, if set, is now used
> diff --git a/lib/jsonrpc.c b/lib/jsonrpc.c
> index ecbc939fe..435824844 100644
> --- a/lib/jsonrpc.c
> +++ b/lib/jsonrpc.c
> @@ -50,6 +50,10 @@ struct jsonrpc {
>  struct ovs_list output; /* Contains "struct ofpbuf"s. */
>  size_t output_count;/* Number of elements in "output". */
>  size_t backlog;
> +
> +/* Limits. */
> +size_t max_output;  /* 'output_count' disconnection threshold. */
> +size_t max_backlog; /* 'backlog' disconnection threshold. */
>  };
>  
>  /* Rate limit for error messages. */
> @@ -178,6 +182,17 @@ jsonrpc_get_backlog(const struct jsonrpc *rpc)
>  return rpc->status ? 0 : rpc->backlog;
>  }
>  
> +/* Sets thresholds for send backlog.  If send backlog contains more than
> + * 'max_n_msgs' messages or larger than 'max_backlog_bytes' bytes, connection
> + * will be dropped. */
> +void
> +jsonrpc_set_backlog_threshold(struct jsonrpc *rpc,
> +  size_t max_n_msgs, size_t max_backlog_bytes)
> +{
> +rpc->max_output = max_n_msgs;
> +rpc->max_backlog = max_backlog_bytes;
> +}
> +
>  /* Returns the number of bytes that have been received on 'rpc''s underlying
>   * stream.  (The value wraps around if it exceeds UINT_MAX.) */
>  unsigned int
> @@ -261,9 +276,26 @@ jsonrpc_send(struct jsonrpc *rpc, struct jsonrpc_msg 
> *msg)
>  rpc->backlog += length;
>  
>  if (rpc->output_count >= 50) {
> -VLOG_INFO_RL(, "excessive sending backlog, jsonrpc: %s, num of"
> +static struct vlog_rate_limit bl_rl = VLOG_RATE_LIMIT_INIT(5, 5);
> +bool disconnect = false;
> +
> +VLOG_INFO_RL(_rl, "excessive sending backlog, jsonrpc: %s, num of"
>   " msgs: %"PRIuSIZE", backlog: %"PRIuSIZE".", rpc->name,
>   rpc->output_count, rpc->backlog);
> +if (rpc->max_output && rpc->output_count > rpc->max_output) {
> +disconnect = true;
> +VLOG_WARN("sending backlog exceeded maximum number of messages 
> (%"
> +  PRIuSIZE" > %"PRIuSIZE"), disconnecting, jsonrpc: %s.",
> +  rpc->output_count, rpc->max_output, rpc->name);
> +} else if (rpc->max_backlog && rpc->backlog > rpc->max_backlog) {
> +disconnect = true;
> +VLOG_WARN("sending backlog exceeded maximum size (%"PRIuSIZE" > 
> %"
> +  PRIuSIZE" bytes), disconnecting, jsonrpc: %s.",
> +  rpc->backlog, rpc->max_backlog, rpc->name);
> +}
> +if (disconnect) {
> +jsonrpc_error(rpc, E2BIG);
> +}
>  }
>  
>  if (rpc->backlog