Re: [ovs-dev] [PATCH v2] dpif-netlink: distribute polling to discreet handlers

2020-08-07 Thread Aaron Conole
Flavio Leitner  writes:

> Hi Aaron,
>
> Thanks for the patch. I ran some basic tests here
> and they passed. I could see only one handler thread
> becoming active with a single upcall.
>
> See my comment below.
>
> On Tue, Jul 21, 2020 at 07:27:41PM -0400, Aaron Conole wrote:
>> Currently, the channel handlers are polled globally.  On some
>> systems, this causes a thundering herd issue where multiple
>> handler threads become active, only to do no work and immediately
>> sleep.
>> 
>> The approach here is to push the netlink socket channels to discreet
>> handler threads to process, rather than polling on every thread.
>> This will eliminate the need to wake multiple threads.
>> 
>> To check:
>> 
>>   ip netns add left
>>   ip netns add right
>>   ip link add center-left type veth peer name left0
>>   ip link add center-right type veth peer name right0
>>   ip link set left0 netns left
>>   ip link set right0 netns right
>>   ip link set center-left up
>>   ip link set center-right up
>>   ip -n left ip link set left0 up
>>   ip -n left ip addr add 172.31.110.10/24 dev left0
>>   ip -n right ip link set right0 up
>>   ip -n right ip addr add 172.31.110.11/24 dev right0
>> 
>>   ovs-vsctl add-br br0
>>   ovs-vsctl add-port br0 center-right
>>   ovs-vsctl add-port br0 center-left
>> 
>>   # in one terminal
>>   perf record -e sched:sched_wakeup,irq:softirq_entry -ag
>> 
>>   # in a separate terminal
>>   ip netns exec left arping -I left0 -c 1 172.31.110.11
>> 
>>   # in the perf terminal after exiting
>>   perf script
>> 
>> Look for the number of 'handler' threads which were made active.
>> 
>> Suggested-by: Ben Pfaff 
>> Reported-by: David Ahern 
>> Reported-at: 
>> https://mail.openvswitch.org/pipermail/ovs-dev/2019-December/365857.html
>> Cc: Matteo Croce 
>> Cc: Flavio Leitner 
>> Fixes: 69c51582f ("dpif-netlink: don't allocate per thread netlink sockets")
>> Signed-off-by: Aaron Conole 
>> ---
>> v2: Oops - forgot to commit my whitespace cleanups.
>> 
>> lib/dpif-netlink.c | 289 -
>>  1 file changed, 179 insertions(+), 110 deletions(-)
>> 
>> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>> index 7da4fb54d9..71d2805427 100644
>> --- a/lib/dpif-netlink.c
>> +++ b/lib/dpif-netlink.c
>> @@ -183,6 +183,10 @@ struct dpif_handler {
>>  int n_events; /* Num events returned by epoll_wait(). */
>>  int event_offset; /* Offset into 'epoll_events'. */
>>  
>> +struct dpif_channel *channels; /* Array of channels for each port. */
>> +uint32_t *port_idx_map;/* Port idx to channel idx. */
>> +size_t n_channels;
>> +
>>  #ifdef _WIN32
>>  /* Pool of sockets. */
>>  struct dpif_windows_vport_sock *vport_sock_pool;
>> @@ -202,8 +206,6 @@ struct dpif_netlink {
>>  struct dpif_handler *handlers;
>>  uint32_t n_handlers;   /* Num of upcall handlers. */
>>  struct dpif_channel *channels; /* Array of channels for each port. */
>
>
> Shouldn't the above channels be removed?

Yes.

>> -int uc_array_size; /* Size of 'handler->channels' and */
>> -   /* 'handler->epoll_events'. */
>>  
>>  /* Change notification. */
>>  struct nl_sock *port_notifier; /* vport multicast group subscriber. */
>> @@ -287,6 +289,55 @@ close_nl_sock(struct nl_sock *sock)
>>  #endif
>>  }
>>  
>> +static uint32_t
>> +dpif_handler_port_idx_max(const struct dpif_handler *handler,
>> +  struct dpif_channel **out_chn)
>
> Looks like there is no caller passing out_chn, so why
> that is needed?

It was from an earlier rework of the patch.  I dropped it.

>> +{
>> +uint32_t max = 0;
>> +size_t i;
>> +
>> +for (i = 0; i < handler->n_channels; ++i) {
>
> OVS does have some code using '++i', but the majority of
> it uses 'i++', can you please swap them?

Sure.

>
>> +if (handler->channels[i].sock &&
>> +handler->port_idx_map[i] > max) {
>> +max = handler->port_idx_map[i];
>> +if (out_chn) {
>> +*out_chn = >channels[i];
>> +}
>> +}
>> +}
>> +
>> +return max;
>> +}
>> +
>> +static size_t
>> +dpif_handler_active_channels(const struct dpif_handler *handler)
>> +{
>> +size_t i, ret = 0;
>
> Perhaps 'count' or 'num'  would be a better name.

'chans'

>> +
>> +for (i = 0; i < handler->n_channels; ++i) {
>> +if (handler->channels[i].sock) {
>> +ret++;
>> +}
>> +}
>> +
>> +return ret;
>> +}
>> +
>> +static struct dpif_channel *
>> +dpif_handler_has_port_idx(const struct dpif_handler *handler, uint32_t idx)
>> +{
>> +size_t i;
>> +
>> +for (i = 0; i < handler->n_channels; ++i) {
>> +if (handler->channels[i].sock &&
>> +handler->port_idx_map[i] == idx) {
>> +return >channels[i];
>> +}
>> +}
>> +
>> +return NULL;
>> +}
>> +
>>  static struct dpif_netlink *
>>  

Re: [ovs-dev] [PATCH v2] dpif-netlink: distribute polling to discreet handlers

2020-08-06 Thread Flavio Leitner


Hi Aaron,

Thanks for the patch. I ran some basic tests here
and they passed. I could see only one handler thread
becoming active with a single upcall.

See my comment below.

On Tue, Jul 21, 2020 at 07:27:41PM -0400, Aaron Conole wrote:
> Currently, the channel handlers are polled globally.  On some
> systems, this causes a thundering herd issue where multiple
> handler threads become active, only to do no work and immediately
> sleep.
> 
> The approach here is to push the netlink socket channels to discreet
> handler threads to process, rather than polling on every thread.
> This will eliminate the need to wake multiple threads.
> 
> To check:
> 
>   ip netns add left
>   ip netns add right
>   ip link add center-left type veth peer name left0
>   ip link add center-right type veth peer name right0
>   ip link set left0 netns left
>   ip link set right0 netns right
>   ip link set center-left up
>   ip link set center-right up
>   ip -n left ip link set left0 up
>   ip -n left ip addr add 172.31.110.10/24 dev left0
>   ip -n right ip link set right0 up
>   ip -n right ip addr add 172.31.110.11/24 dev right0
> 
>   ovs-vsctl add-br br0
>   ovs-vsctl add-port br0 center-right
>   ovs-vsctl add-port br0 center-left
> 
>   # in one terminal
>   perf record -e sched:sched_wakeup,irq:softirq_entry -ag
> 
>   # in a separate terminal
>   ip netns exec left arping -I left0 -c 1 172.31.110.11
> 
>   # in the perf terminal after exiting
>   perf script
> 
> Look for the number of 'handler' threads which were made active.
> 
> Suggested-by: Ben Pfaff 
> Reported-by: David Ahern 
> Reported-at: 
> https://mail.openvswitch.org/pipermail/ovs-dev/2019-December/365857.html
> Cc: Matteo Croce 
> Cc: Flavio Leitner 
> Fixes: 69c51582f ("dpif-netlink: don't allocate per thread netlink sockets")
> Signed-off-by: Aaron Conole 
> ---
> v2: Oops - forgot to commit my whitespace cleanups.
> 
> lib/dpif-netlink.c | 289 -
>  1 file changed, 179 insertions(+), 110 deletions(-)
> 
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> index 7da4fb54d9..71d2805427 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -183,6 +183,10 @@ struct dpif_handler {
>  int n_events; /* Num events returned by epoll_wait(). */
>  int event_offset; /* Offset into 'epoll_events'. */
>  
> +struct dpif_channel *channels; /* Array of channels for each port. */
> +uint32_t *port_idx_map;/* Port idx to channel idx. */
> +size_t n_channels;
> +
>  #ifdef _WIN32
>  /* Pool of sockets. */
>  struct dpif_windows_vport_sock *vport_sock_pool;
> @@ -202,8 +206,6 @@ struct dpif_netlink {
>  struct dpif_handler *handlers;
>  uint32_t n_handlers;   /* Num of upcall handlers. */
>  struct dpif_channel *channels; /* Array of channels for each port. */


Shouldn't the above channels be removed?

> -int uc_array_size; /* Size of 'handler->channels' and */
> -   /* 'handler->epoll_events'. */
>  
>  /* Change notification. */
>  struct nl_sock *port_notifier; /* vport multicast group subscriber. */
> @@ -287,6 +289,55 @@ close_nl_sock(struct nl_sock *sock)
>  #endif
>  }
>  
> +static uint32_t
> +dpif_handler_port_idx_max(const struct dpif_handler *handler,
> +  struct dpif_channel **out_chn)

Looks like there is no caller passing out_chn, so why
that is needed?


> +{
> +uint32_t max = 0;
> +size_t i;
> +
> +for (i = 0; i < handler->n_channels; ++i) {

OVS does have some code using '++i', but the majority of
it uses 'i++', can you please swap them?


> +if (handler->channels[i].sock &&
> +handler->port_idx_map[i] > max) {
> +max = handler->port_idx_map[i];
> +if (out_chn) {
> +*out_chn = >channels[i];
> +}
> +}
> +}
> +
> +return max;
> +}
> +
> +static size_t
> +dpif_handler_active_channels(const struct dpif_handler *handler)
> +{
> +size_t i, ret = 0;

Perhaps 'count' or 'num'  would be a better name.

> +
> +for (i = 0; i < handler->n_channels; ++i) {
> +if (handler->channels[i].sock) {
> +ret++;
> +}
> +}
> +
> +return ret;
> +}
> +
> +static struct dpif_channel *
> +dpif_handler_has_port_idx(const struct dpif_handler *handler, uint32_t idx)
> +{
> +size_t i;
> +
> +for (i = 0; i < handler->n_channels; ++i) {
> +if (handler->channels[i].sock &&
> +handler->port_idx_map[i] == idx) {
> +return >channels[i];
> +}
> +}
> +
> +return NULL;
> +}
> +
>  static struct dpif_netlink *
>  dpif_netlink_cast(const struct dpif *dpif)
>  {
> @@ -452,90 +503,86 @@ static bool
>  vport_get_pid(struct dpif_netlink *dpif, uint32_t port_idx,
>uint32_t *upcall_pid)
>  {
> -/* Since the nl_sock can only be assigned in either all
> - * or 

Re: [ovs-dev] [PATCH v2] dpif-netlink: distribute polling to discreet handlers

2020-07-22 Thread Matteo Croce
On Wed, Jul 22, 2020 at 1:27 AM Aaron Conole  wrote:
> To check:
>
>   ip netns add left
>   ip netns add right
>   ip link add center-left type veth peer name left0
>   ip link add center-right type veth peer name right0
>   ip link set left0 netns left
>   ip link set right0 netns right

Nit: ip can set the peer netns upon veth creation:

  ip link add center-left type veth peer name left0 netns left
  ip link add center-right type veth peer name right0 netns right

> +static uint32_t
> +dpif_handler_port_idx_max(const struct dpif_handler *handler,
> +  struct dpif_channel **out_chn)
> +{
> +uint32_t max = 0;
> +size_t i;
> +
> +for (i = 0; i < handler->n_channels; ++i) {

Any reason for using the prefix operator everywhere?
They should be equal nowadays.

> --
> 2.25.4
>

Good job!
Did you have the chance to run the same tests I did in 69c51582ff78?
I'm curious to see how much it improves.

Bye,
-- 
Matteo Croce

perl -e 'for($t=0;;$t++){print chr($t*($t>>8|$t>>13)&255)}' |aplay
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev