Re: [ovs-dev] [PATCH v2] dpif-netlink: distribute polling to discreet handlers

2020-08-07 Thread Aaron Conole
Flavio Leitner  writes:

> Hi Aaron,
>
> Thanks for the patch. I ran some basic tests here
> and they passed. I could see only one handler thread
> becoming active with a single upcall.
>
> See my comment below.
>
> On Tue, Jul 21, 2020 at 07:27:41PM -0400, Aaron Conole wrote:
>> Currently, the channel handlers are polled globally.  On some
>> systems, this causes a thundering herd issue where multiple
>> handler threads become active, only to do no work and immediately
>> sleep.
>> 
>> The approach here is to push the netlink socket channels to discreet
>> handler threads to process, rather than polling on every thread.
>> This will eliminate the need to wake multiple threads.
>> 
>> To check:
>> 
>>   ip netns add left
>>   ip netns add right
>>   ip link add center-left type veth peer name left0
>>   ip link add center-right type veth peer name right0
>>   ip link set left0 netns left
>>   ip link set right0 netns right
>>   ip link set center-left up
>>   ip link set center-right up
>>   ip -n left ip link set left0 up
>>   ip -n left ip addr add 172.31.110.10/24 dev left0
>>   ip -n right ip link set right0 up
>>   ip -n right ip addr add 172.31.110.11/24 dev right0
>> 
>>   ovs-vsctl add-br br0
>>   ovs-vsctl add-port br0 center-right
>>   ovs-vsctl add-port br0 center-left
>> 
>>   # in one terminal
>>   perf record -e sched:sched_wakeup,irq:softirq_entry -ag
>> 
>>   # in a separate terminal
>>   ip netns exec left arping -I left0 -c 1 172.31.110.11
>> 
>>   # in the perf terminal after exiting
>>   perf script
>> 
>> Look for the number of 'handler' threads which were made active.
>> 
>> Suggested-by: Ben Pfaff 
>> Reported-by: David Ahern 
>> Reported-at: 
>> https://mail.openvswitch.org/pipermail/ovs-dev/2019-December/365857.html
>> Cc: Matteo Croce 
>> Cc: Flavio Leitner 
>> Fixes: 69c51582f ("dpif-netlink: don't allocate per thread netlink sockets")
>> Signed-off-by: Aaron Conole 
>> ---
>> v2: Oops - forgot to commit my whitespace cleanups.
>> 
>> lib/dpif-netlink.c | 289 -
>>  1 file changed, 179 insertions(+), 110 deletions(-)
>> 
>> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>> index 7da4fb54d9..71d2805427 100644
>> --- a/lib/dpif-netlink.c
>> +++ b/lib/dpif-netlink.c
>> @@ -183,6 +183,10 @@ struct dpif_handler {
>>  int n_events; /* Num events returned by epoll_wait(). */
>>  int event_offset; /* Offset into 'epoll_events'. */
>>  
>> +struct dpif_channel *channels; /* Array of channels for each port. */
>> +uint32_t *port_idx_map;/* Port idx to channel idx. */
>> +size_t n_channels;
>> +
>>  #ifdef _WIN32
>>  /* Pool of sockets. */
>>  struct dpif_windows_vport_sock *vport_sock_pool;
>> @@ -202,8 +206,6 @@ struct dpif_netlink {
>>  struct dpif_handler *handlers;
>>  uint32_t n_handlers;   /* Num of upcall handlers. */
>>  struct dpif_channel *channels; /* Array of channels for each port. */
>
>
> Shouldn't the above channels be removed?

Yes.

>> -int uc_array_size; /* Size of 'handler->channels' and */
>> -   /* 'handler->epoll_events'. */
>>  
>>  /* Change notification. */
>>  struct nl_sock *port_notifier; /* vport multicast group subscriber. */
>> @@ -287,6 +289,55 @@ close_nl_sock(struct nl_sock *sock)
>>  #endif
>>  }
>>  
>> +static uint32_t
>> +dpif_handler_port_idx_max(const struct dpif_handler *handler,
>> +  struct dpif_channel **out_chn)
>
> Looks like there is no caller passing out_chn, so why
> that is needed?

It was from an earlier rework of the patch.  I dropped it.

>> +{
>> +uint32_t max = 0;
>> +size_t i;
>> +
>> +for (i = 0; i < handler->n_channels; ++i) {
>
> OVS does have some code using '++i', but the majority of
> it uses 'i++', can you please swap them?

Sure.

>
>> +if (handler->channels[i].sock &&
>> +handler->port_idx_map[i] > max) {
>> +max = handler->port_idx_map[i];
>> +if (out_chn) {
>> +*out_chn = &handler->channels[i];
>> +}
>> +}
>> +}
>> +
>> +return max;
>> +}
>> +
>> +static size_t
>> +dpif_handler_active_channels(const struct dpif_handler *handler)
>> +{
>> +size_t i, ret = 0;
>
> Perhaps 'count' or 'num'  would be a better name.

'chans'

>> +
>> +for (i = 0; i < handler->n_channels; ++i) {
>> +if (handler->channels[i].sock) {
>> +ret++;
>> +}
>> +}
>> +
>> +return ret;
>> +}
>> +
>> +static struct dpif_channel *
>> +dpif_handler_has_port_idx(const struct dpif_handler *handler, uint32_t idx)
>> +{
>> +size_t i;
>> +
>> +for (i = 0; i < handler->n_channels; ++i) {
>> +if (handler->channels[i].sock &&
>> +handler->port_idx_map[i] == idx) {
>> +return &handler->channels[i];
>> +}
>> +}
>> +
>> +return NULL;
>> +}
>> +
>>  static struct dpi

Re: [ovs-dev] [PATCH v2] dpif-netlink: distribute polling to discreet handlers

2020-08-06 Thread Flavio Leitner


Hi Aaron,

Thanks for the patch. I ran some basic tests here
and they passed. I could see only one handler thread
becoming active with a single upcall.

See my comment below.

On Tue, Jul 21, 2020 at 07:27:41PM -0400, Aaron Conole wrote:
> Currently, the channel handlers are polled globally.  On some
> systems, this causes a thundering herd issue where multiple
> handler threads become active, only to do no work and immediately
> sleep.
> 
> The approach here is to push the netlink socket channels to discreet
> handler threads to process, rather than polling on every thread.
> This will eliminate the need to wake multiple threads.
> 
> To check:
> 
>   ip netns add left
>   ip netns add right
>   ip link add center-left type veth peer name left0
>   ip link add center-right type veth peer name right0
>   ip link set left0 netns left
>   ip link set right0 netns right
>   ip link set center-left up
>   ip link set center-right up
>   ip -n left ip link set left0 up
>   ip -n left ip addr add 172.31.110.10/24 dev left0
>   ip -n right ip link set right0 up
>   ip -n right ip addr add 172.31.110.11/24 dev right0
> 
>   ovs-vsctl add-br br0
>   ovs-vsctl add-port br0 center-right
>   ovs-vsctl add-port br0 center-left
> 
>   # in one terminal
>   perf record -e sched:sched_wakeup,irq:softirq_entry -ag
> 
>   # in a separate terminal
>   ip netns exec left arping -I left0 -c 1 172.31.110.11
> 
>   # in the perf terminal after exiting
>   perf script
> 
> Look for the number of 'handler' threads which were made active.
> 
> Suggested-by: Ben Pfaff 
> Reported-by: David Ahern 
> Reported-at: 
> https://mail.openvswitch.org/pipermail/ovs-dev/2019-December/365857.html
> Cc: Matteo Croce 
> Cc: Flavio Leitner 
> Fixes: 69c51582f ("dpif-netlink: don't allocate per thread netlink sockets")
> Signed-off-by: Aaron Conole 
> ---
> v2: Oops - forgot to commit my whitespace cleanups.
> 
> lib/dpif-netlink.c | 289 -
>  1 file changed, 179 insertions(+), 110 deletions(-)
> 
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> index 7da4fb54d9..71d2805427 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -183,6 +183,10 @@ struct dpif_handler {
>  int n_events; /* Num events returned by epoll_wait(). */
>  int event_offset; /* Offset into 'epoll_events'. */
>  
> +struct dpif_channel *channels; /* Array of channels for each port. */
> +uint32_t *port_idx_map;/* Port idx to channel idx. */
> +size_t n_channels;
> +
>  #ifdef _WIN32
>  /* Pool of sockets. */
>  struct dpif_windows_vport_sock *vport_sock_pool;
> @@ -202,8 +206,6 @@ struct dpif_netlink {
>  struct dpif_handler *handlers;
>  uint32_t n_handlers;   /* Num of upcall handlers. */
>  struct dpif_channel *channels; /* Array of channels for each port. */


Shouldn't the above channels be removed?

> -int uc_array_size; /* Size of 'handler->channels' and */
> -   /* 'handler->epoll_events'. */
>  
>  /* Change notification. */
>  struct nl_sock *port_notifier; /* vport multicast group subscriber. */
> @@ -287,6 +289,55 @@ close_nl_sock(struct nl_sock *sock)
>  #endif
>  }
>  
> +static uint32_t
> +dpif_handler_port_idx_max(const struct dpif_handler *handler,
> +  struct dpif_channel **out_chn)

Looks like there is no caller passing out_chn, so why
that is needed?


> +{
> +uint32_t max = 0;
> +size_t i;
> +
> +for (i = 0; i < handler->n_channels; ++i) {

OVS does have some code using '++i', but the majority of
it uses 'i++', can you please swap them?


> +if (handler->channels[i].sock &&
> +handler->port_idx_map[i] > max) {
> +max = handler->port_idx_map[i];
> +if (out_chn) {
> +*out_chn = &handler->channels[i];
> +}
> +}
> +}
> +
> +return max;
> +}
> +
> +static size_t
> +dpif_handler_active_channels(const struct dpif_handler *handler)
> +{
> +size_t i, ret = 0;

Perhaps 'count' or 'num'  would be a better name.

> +
> +for (i = 0; i < handler->n_channels; ++i) {
> +if (handler->channels[i].sock) {
> +ret++;
> +}
> +}
> +
> +return ret;
> +}
> +
> +static struct dpif_channel *
> +dpif_handler_has_port_idx(const struct dpif_handler *handler, uint32_t idx)
> +{
> +size_t i;
> +
> +for (i = 0; i < handler->n_channels; ++i) {
> +if (handler->channels[i].sock &&
> +handler->port_idx_map[i] == idx) {
> +return &handler->channels[i];
> +}
> +}
> +
> +return NULL;
> +}
> +
>  static struct dpif_netlink *
>  dpif_netlink_cast(const struct dpif *dpif)
>  {
> @@ -452,90 +503,86 @@ static bool
>  vport_get_pid(struct dpif_netlink *dpif, uint32_t port_idx,
>uint32_t *upcall_pid)
>  {
> -/* Since the nl_sock can only be assigned in either all

Re: [ovs-dev] [PATCH v2] dpif-netlink: distribute polling to discreet handlers

2020-07-22 Thread Matteo Croce
On Wed, Jul 22, 2020 at 1:27 AM Aaron Conole  wrote:
> To check:
>
>   ip netns add left
>   ip netns add right
>   ip link add center-left type veth peer name left0
>   ip link add center-right type veth peer name right0
>   ip link set left0 netns left
>   ip link set right0 netns right

Nit: ip can set the peer netns upon veth creation:

  ip link add center-left type veth peer name left0 netns left
  ip link add center-right type veth peer name right0 netns right

> +static uint32_t
> +dpif_handler_port_idx_max(const struct dpif_handler *handler,
> +  struct dpif_channel **out_chn)
> +{
> +uint32_t max = 0;
> +size_t i;
> +
> +for (i = 0; i < handler->n_channels; ++i) {

Any reason for using the prefix operator everywhere?
They should be equal nowadays.

> --
> 2.25.4
>

Good job!
Did you have the chance to run the same tests I did in 69c51582ff78?
I'm curious to see how much it improves.

Bye,
-- 
Matteo Croce

perl -e 'for($t=0;;$t++){print chr($t*($t>>8|$t>>13)&255)}' |aplay
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2] dpif-netlink: distribute polling to discreet handlers

2020-07-21 Thread Aaron Conole
Currently, the channel handlers are polled globally.  On some
systems, this causes a thundering herd issue where multiple
handler threads become active, only to do no work and immediately
sleep.

The approach here is to push the netlink socket channels to discreet
handler threads to process, rather than polling on every thread.
This will eliminate the need to wake multiple threads.

To check:

  ip netns add left
  ip netns add right
  ip link add center-left type veth peer name left0
  ip link add center-right type veth peer name right0
  ip link set left0 netns left
  ip link set right0 netns right
  ip link set center-left up
  ip link set center-right up
  ip -n left ip link set left0 up
  ip -n left ip addr add 172.31.110.10/24 dev left0
  ip -n right ip link set right0 up
  ip -n right ip addr add 172.31.110.11/24 dev right0

  ovs-vsctl add-br br0
  ovs-vsctl add-port br0 center-right
  ovs-vsctl add-port br0 center-left

  # in one terminal
  perf record -e sched:sched_wakeup,irq:softirq_entry -ag

  # in a separate terminal
  ip netns exec left arping -I left0 -c 1 172.31.110.11

  # in the perf terminal after exiting
  perf script

Look for the number of 'handler' threads which were made active.

Suggested-by: Ben Pfaff 
Reported-by: David Ahern 
Reported-at: 
https://mail.openvswitch.org/pipermail/ovs-dev/2019-December/365857.html
Cc: Matteo Croce 
Cc: Flavio Leitner 
Fixes: 69c51582f ("dpif-netlink: don't allocate per thread netlink sockets")
Signed-off-by: Aaron Conole 
---
v2: Oops - forgot to commit my whitespace cleanups.

lib/dpif-netlink.c | 289 -
 1 file changed, 179 insertions(+), 110 deletions(-)

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 7da4fb54d9..71d2805427 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -183,6 +183,10 @@ struct dpif_handler {
 int n_events; /* Num events returned by epoll_wait(). */
 int event_offset; /* Offset into 'epoll_events'. */
 
+struct dpif_channel *channels; /* Array of channels for each port. */
+uint32_t *port_idx_map;/* Port idx to channel idx. */
+size_t n_channels;
+
 #ifdef _WIN32
 /* Pool of sockets. */
 struct dpif_windows_vport_sock *vport_sock_pool;
@@ -202,8 +206,6 @@ struct dpif_netlink {
 struct dpif_handler *handlers;
 uint32_t n_handlers;   /* Num of upcall handlers. */
 struct dpif_channel *channels; /* Array of channels for each port. */
-int uc_array_size; /* Size of 'handler->channels' and */
-   /* 'handler->epoll_events'. */
 
 /* Change notification. */
 struct nl_sock *port_notifier; /* vport multicast group subscriber. */
@@ -287,6 +289,55 @@ close_nl_sock(struct nl_sock *sock)
 #endif
 }
 
+static uint32_t
+dpif_handler_port_idx_max(const struct dpif_handler *handler,
+  struct dpif_channel **out_chn)
+{
+uint32_t max = 0;
+size_t i;
+
+for (i = 0; i < handler->n_channels; ++i) {
+if (handler->channels[i].sock &&
+handler->port_idx_map[i] > max) {
+max = handler->port_idx_map[i];
+if (out_chn) {
+*out_chn = &handler->channels[i];
+}
+}
+}
+
+return max;
+}
+
+static size_t
+dpif_handler_active_channels(const struct dpif_handler *handler)
+{
+size_t i, ret = 0;
+
+for (i = 0; i < handler->n_channels; ++i) {
+if (handler->channels[i].sock) {
+ret++;
+}
+}
+
+return ret;
+}
+
+static struct dpif_channel *
+dpif_handler_has_port_idx(const struct dpif_handler *handler, uint32_t idx)
+{
+size_t i;
+
+for (i = 0; i < handler->n_channels; ++i) {
+if (handler->channels[i].sock &&
+handler->port_idx_map[i] == idx) {
+return &handler->channels[i];
+}
+}
+
+return NULL;
+}
+
 static struct dpif_netlink *
 dpif_netlink_cast(const struct dpif *dpif)
 {
@@ -452,90 +503,86 @@ static bool
 vport_get_pid(struct dpif_netlink *dpif, uint32_t port_idx,
   uint32_t *upcall_pid)
 {
-/* Since the nl_sock can only be assigned in either all
- * or none "dpif" channels, the following check
- * would suffice. */
-if (!dpif->channels[port_idx].sock) {
-return false;
-}
+size_t i;
+
 ovs_assert(!WINDOWS || dpif->n_handlers <= 1);
 
-*upcall_pid = nl_sock_pid(dpif->channels[port_idx].sock);
+/* The 'port_idx' should only be valid for a single handler. */
+for (i = 0; i < dpif->n_handlers; ++i) {
+struct dpif_channel *channel =
+dpif_handler_has_port_idx(&dpif->handlers[i], port_idx);
+
+if (channel) {
+*upcall_pid = nl_sock_pid(channel->sock);
+return true;
+}
+}
 
-return true;
+return false;
 }
 
 static int
 vport_add_channel(struct dpif_netlink *dpif, odp_port_t port_no,
   struct n