Re: [ovs-dev] [PATCH 3/3] dpif-netlink: Introduce per-cpu upcall dispatch

2021-07-05 Thread Mark Gray
On 01/07/2021 22:34, Flavio Leitner wrote:
> 
> Hi Mark,
> 
> One more thing, this seems a relevant change to mention in the
> NEWS file.
> 

Done

> Thanks,
> fbl
> 
> On Wed, Jun 30, 2021 at 05:56:11AM -0400, Mark Gray wrote:
>> The Open vSwitch kernel module uses the upcall mechanism to send
>> packets from kernel space to user space when it misses in the kernel
>> space flow table. The upcall sends packets via a Netlink socket.
>> Currently, a Netlink socket is created for every vport. In this way,
>> there is a 1:1 mapping between a vport and a Netlink socket.
>> When a packet is received by a vport, if it needs to be sent to
>> user space, it is sent via the corresponding Netlink socket.
>>
>> This mechanism, with various iterations of the corresponding user
>> space code, has seen some limitations and issues:
>>
>> * On systems with a large number of vports, there is correspondingly
>> a large number of Netlink sockets which can limit scaling.
>> (https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
>> * Packet reordering on upcalls.
>> (https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
>> * A thundering herd issue.
>> (https://bugzilla.redhat.com/show_bug.cgi?id=183)
>>
>> This patch introduces an alternative, feature-negotiated, upcall
>> mode using a per-cpu dispatch rather than a per-vport dispatch.
>>
>> In this mode, the Netlink socket to be used for the upcall is
>> selected based on the CPU of the thread that is executing the upcall.
>> In this way, it resolves the issues above as:
>>
>> a) The number of Netlink sockets scales with the number of CPUs
>> rather than the number of vports.
>> b) Ordering per-flow is maintained as packets are distributed to
>> CPUs based on mechanisms such as RSS and flows are distributed
>> to a single user space thread.
>> c) Packets from a flow can only wake up one user space thread.
>>
>> Reported-at: https://bugzilla.redhat.com/1844576
>> Signed-off-by: Mark Gray 
>> ---
>>
>> Notes:
>> v1 - Reworked based on Flavio's comments:
>>  * change DISPATCH_MODE_PER_CPU() to inline function
>>  * add `ovs-appctl` command to check dispatch mode for datapaths
>>  * fixed issue with userspace actions (tested using `ovs-ofctl 
>> monitor br0 65534 -P nxt_packet_in`)
>>  * update documentation as requested
>>
>>  .../linux/compat/include/linux/openvswitch.h  |   7 +
>>  lib/dpif-netdev.c |   1 +
>>  lib/dpif-netlink.c| 456 --
>>  lib/dpif-provider.h   |  32 +-
>>  lib/dpif.c|  17 +
>>  lib/dpif.h|   1 +
>>  ofproto/ofproto-dpif-upcall.c |  51 +-
>>  ofproto/ofproto.c |  12 -
>>  vswitchd/vswitch.xml  |  23 +-
>>  9 files changed, 504 insertions(+), 96 deletions(-)
>>
>> diff --git a/datapath/linux/compat/include/linux/openvswitch.h 
>> b/datapath/linux/compat/include/linux/openvswitch.h
>> index 875de20250ce..f29265df055e 100644
>> --- a/datapath/linux/compat/include/linux/openvswitch.h
>> +++ b/datapath/linux/compat/include/linux/openvswitch.h
>> @@ -89,6 +89,8 @@ enum ovs_datapath_cmd {
>>   * set on the datapath port (for OVS_ACTION_ATTR_MISS).  Only valid on
>>   * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
>>   * not be sent.
>> + * OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when
>> + * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set.
>>   * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
>>   * datapath.  Always present in notifications.
>>   * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for 
>> the
>> @@ -105,6 +107,8 @@ enum ovs_datapath_attr {
>>  OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */
>>  OVS_DP_ATTR_USER_FEATURES,  /* OVS_DP_F_*  */
>>  OVS_DP_ATTR_PAD,
>> +OVS_DP_ATTR_PAD2,
>> +OVS_DP_ATTR_PER_CPU_PIDS,   /* Netlink PIDS to receive upcalls */
>>  __OVS_DP_ATTR_MAX
>>  };
>>  
>> @@ -146,6 +150,9 @@ struct ovs_vport_stats {
>>  /* Allow tc offload recirc sharing */
>>  #define OVS_DP_F_TC_RECIRC_SHARING  (1 << 2)
>>  
>> +/* Allow per-cpu dispatch of upcalls */
>> +#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU (1 << 3)
>> +
>>  /* Fixed logical ports. */
>>  #define OVSP_LOCAL  ((__u32)0)
>>  
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>> index c5ab35d2a5a5..b2c2baadf4f3 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -8562,6 +8562,7 @@ const struct dpif_class dpif_netdev_class = {
>>  dpif_netdev_operate,
>>  NULL,   /* recv_set */
>>  NULL,   /* handlers_set */
>> +NULL,   /* number_handlers_required */
>>  dpif_netdev_set_config,
>>  dpif_netdev_queue_to_priority,
>>  NULL,   /* recv */
>> diff 

Re: [ovs-dev] [PATCH 3/3] dpif-netlink: Introduce per-cpu upcall dispatch

2021-07-05 Thread Mark Gray
On 01/07/2021 21:51, Flavio Leitner wrote:
> 
> Hi Mark,
> 
> I've not tested this yet.
> See some comments below.
> 
> On Wed, Jun 30, 2021 at 05:56:11AM -0400, Mark Gray wrote:
>> The Open vSwitch kernel module uses the upcall mechanism to send
>> packets from kernel space to user space when it misses in the kernel
>> space flow table. The upcall sends packets via a Netlink socket.
>> Currently, a Netlink socket is created for every vport. In this way,
>> there is a 1:1 mapping between a vport and a Netlink socket.
>> When a packet is received by a vport, if it needs to be sent to
>> user space, it is sent via the corresponding Netlink socket.
>>
>> This mechanism, with various iterations of the corresponding user
>> space code, has seen some limitations and issues:
>>
>> * On systems with a large number of vports, there is correspondingly
>> a large number of Netlink sockets which can limit scaling.
>> (https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
>> * Packet reordering on upcalls.
>> (https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
>> * A thundering herd issue.
>> (https://bugzilla.redhat.com/show_bug.cgi?id=183)
>>
>> This patch introduces an alternative, feature-negotiated, upcall
>> mode using a per-cpu dispatch rather than a per-vport dispatch.
>>
>> In this mode, the Netlink socket to be used for the upcall is
>> selected based on the CPU of the thread that is executing the upcall.
>> In this way, it resolves the issues above as:
>>
>> a) The number of Netlink sockets scales with the number of CPUs
>> rather than the number of vports.
>> b) Ordering per-flow is maintained as packets are distributed to
>> CPUs based on mechanisms such as RSS and flows are distributed
>> to a single user space thread.
>> c) Packets from a flow can only wake up one user space thread.
>>
>> Reported-at: https://bugzilla.redhat.com/1844576
>> Signed-off-by: Mark Gray 
>> ---
>>
>> Notes:
>> v1 - Reworked based on Flavio's comments:
>>  * change DISPATCH_MODE_PER_CPU() to inline function
>>  * add `ovs-appctl` command to check dispatch mode for datapaths
>>  * fixed issue with userspace actions (tested using `ovs-ofctl 
>> monitor br0 65534 -P nxt_packet_in`)
>>  * update documentation as requested
>>
>>  .../linux/compat/include/linux/openvswitch.h  |   7 +
>>  lib/dpif-netdev.c |   1 +
>>  lib/dpif-netlink.c| 456 --
>>  lib/dpif-provider.h   |  32 +-
>>  lib/dpif.c|  17 +
>>  lib/dpif.h|   1 +
>>  ofproto/ofproto-dpif-upcall.c |  51 +-
>>  ofproto/ofproto.c |  12 -
>>  vswitchd/vswitch.xml  |  23 +-
>>  9 files changed, 504 insertions(+), 96 deletions(-)
>>
>> diff --git a/datapath/linux/compat/include/linux/openvswitch.h 
>> b/datapath/linux/compat/include/linux/openvswitch.h
>> index 875de20250ce..f29265df055e 100644
>> --- a/datapath/linux/compat/include/linux/openvswitch.h
>> +++ b/datapath/linux/compat/include/linux/openvswitch.h
>> @@ -89,6 +89,8 @@ enum ovs_datapath_cmd {
>>   * set on the datapath port (for OVS_ACTION_ATTR_MISS).  Only valid on
>>   * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
>>   * not be sent.
>> + * OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when
>> + * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set.
>>   * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
>>   * datapath.  Always present in notifications.
>>   * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for 
>> the
>> @@ -105,6 +107,8 @@ enum ovs_datapath_attr {
>>  OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */
>>  OVS_DP_ATTR_USER_FEATURES,  /* OVS_DP_F_*  */
>>  OVS_DP_ATTR_PAD,
>> +OVS_DP_ATTR_PAD2,
>> +OVS_DP_ATTR_PER_CPU_PIDS,   /* Netlink PIDS to receive upcalls */
>>  __OVS_DP_ATTR_MAX
>>  };
>>  
>> @@ -146,6 +150,9 @@ struct ovs_vport_stats {
>>  /* Allow tc offload recirc sharing */
>>  #define OVS_DP_F_TC_RECIRC_SHARING  (1 << 2)
>>  
>> +/* Allow per-cpu dispatch of upcalls */
>> +#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU (1 << 3)
>> +
>>  /* Fixed logical ports. */
>>  #define OVSP_LOCAL  ((__u32)0)
>>  
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>> index c5ab35d2a5a5..b2c2baadf4f3 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -8562,6 +8562,7 @@ const struct dpif_class dpif_netdev_class = {
>>  dpif_netdev_operate,
>>  NULL,   /* recv_set */
>>  NULL,   /* handlers_set */
>> +NULL,   /* number_handlers_required */
>>  dpif_netdev_set_config,
>>  dpif_netdev_queue_to_priority,
>>  NULL,   /* recv */
>> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
>

Re: [ovs-dev] [PATCH 3/3] dpif-netlink: Introduce per-cpu upcall dispatch

2021-07-01 Thread Flavio Leitner


Hi Mark,

One more thing, this seems a relevant change to mention in the
NEWS file.

Thanks,
fbl

On Wed, Jun 30, 2021 at 05:56:11AM -0400, Mark Gray wrote:
> The Open vSwitch kernel module uses the upcall mechanism to send
> packets from kernel space to user space when it misses in the kernel
> space flow table. The upcall sends packets via a Netlink socket.
> Currently, a Netlink socket is created for every vport. In this way,
> there is a 1:1 mapping between a vport and a Netlink socket.
> When a packet is received by a vport, if it needs to be sent to
> user space, it is sent via the corresponding Netlink socket.
> 
> This mechanism, with various iterations of the corresponding user
> space code, has seen some limitations and issues:
> 
> * On systems with a large number of vports, there is correspondingly
> a large number of Netlink sockets which can limit scaling.
> (https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
> * Packet reordering on upcalls.
> (https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
> * A thundering herd issue.
> (https://bugzilla.redhat.com/show_bug.cgi?id=183)
> 
> This patch introduces an alternative, feature-negotiated, upcall
> mode using a per-cpu dispatch rather than a per-vport dispatch.
> 
> In this mode, the Netlink socket to be used for the upcall is
> selected based on the CPU of the thread that is executing the upcall.
> In this way, it resolves the issues above as:
> 
> a) The number of Netlink sockets scales with the number of CPUs
> rather than the number of vports.
> b) Ordering per-flow is maintained as packets are distributed to
> CPUs based on mechanisms such as RSS and flows are distributed
> to a single user space thread.
> c) Packets from a flow can only wake up one user space thread.
> 
> Reported-at: https://bugzilla.redhat.com/1844576
> Signed-off-by: Mark Gray 
> ---
> 
> Notes:
> v1 - Reworked based on Flavio's comments:
>  * change DISPATCH_MODE_PER_CPU() to inline function
>  * add `ovs-appctl` command to check dispatch mode for datapaths
>  * fixed issue with userspace actions (tested using `ovs-ofctl 
> monitor br0 65534 -P nxt_packet_in`)
>  * update documentation as requested
> 
>  .../linux/compat/include/linux/openvswitch.h  |   7 +
>  lib/dpif-netdev.c |   1 +
>  lib/dpif-netlink.c| 456 --
>  lib/dpif-provider.h   |  32 +-
>  lib/dpif.c|  17 +
>  lib/dpif.h|   1 +
>  ofproto/ofproto-dpif-upcall.c |  51 +-
>  ofproto/ofproto.c |  12 -
>  vswitchd/vswitch.xml  |  23 +-
>  9 files changed, 504 insertions(+), 96 deletions(-)
> 
> diff --git a/datapath/linux/compat/include/linux/openvswitch.h 
> b/datapath/linux/compat/include/linux/openvswitch.h
> index 875de20250ce..f29265df055e 100644
> --- a/datapath/linux/compat/include/linux/openvswitch.h
> +++ b/datapath/linux/compat/include/linux/openvswitch.h
> @@ -89,6 +89,8 @@ enum ovs_datapath_cmd {
>   * set on the datapath port (for OVS_ACTION_ATTR_MISS).  Only valid on
>   * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
>   * not be sent.
> + * OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when
> + * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set.
>   * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
>   * datapath.  Always present in notifications.
>   * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for 
> the
> @@ -105,6 +107,8 @@ enum ovs_datapath_attr {
>   OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */
>   OVS_DP_ATTR_USER_FEATURES,  /* OVS_DP_F_*  */
>   OVS_DP_ATTR_PAD,
> + OVS_DP_ATTR_PAD2,
> + OVS_DP_ATTR_PER_CPU_PIDS,   /* Netlink PIDS to receive upcalls */
>   __OVS_DP_ATTR_MAX
>  };
>  
> @@ -146,6 +150,9 @@ struct ovs_vport_stats {
>  /* Allow tc offload recirc sharing */
>  #define OVS_DP_F_TC_RECIRC_SHARING  (1 << 2)
>  
> +/* Allow per-cpu dispatch of upcalls */
> +#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU (1 << 3)
> +
>  /* Fixed logical ports. */
>  #define OVSP_LOCAL  ((__u32)0)
>  
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index c5ab35d2a5a5..b2c2baadf4f3 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -8562,6 +8562,7 @@ const struct dpif_class dpif_netdev_class = {
>  dpif_netdev_operate,
>  NULL,   /* recv_set */
>  NULL,   /* handlers_set */
> +NULL,   /* number_handlers_required */
>  dpif_netdev_set_config,
>  dpif_netdev_queue_to_priority,
>  NULL,   /* recv */
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> index f92905dd83fd..2399879aea3e 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -98,6 +9

Re: [ovs-dev] [PATCH 3/3] dpif-netlink: Introduce per-cpu upcall dispatch

2021-07-01 Thread Flavio Leitner


Hi Mark,

I've not tested this yet.
See some comments below.

On Wed, Jun 30, 2021 at 05:56:11AM -0400, Mark Gray wrote:
> The Open vSwitch kernel module uses the upcall mechanism to send
> packets from kernel space to user space when it misses in the kernel
> space flow table. The upcall sends packets via a Netlink socket.
> Currently, a Netlink socket is created for every vport. In this way,
> there is a 1:1 mapping between a vport and a Netlink socket.
> When a packet is received by a vport, if it needs to be sent to
> user space, it is sent via the corresponding Netlink socket.
> 
> This mechanism, with various iterations of the corresponding user
> space code, has seen some limitations and issues:
> 
> * On systems with a large number of vports, there is correspondingly
> a large number of Netlink sockets which can limit scaling.
> (https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
> * Packet reordering on upcalls.
> (https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
> * A thundering herd issue.
> (https://bugzilla.redhat.com/show_bug.cgi?id=183)
> 
> This patch introduces an alternative, feature-negotiated, upcall
> mode using a per-cpu dispatch rather than a per-vport dispatch.
> 
> In this mode, the Netlink socket to be used for the upcall is
> selected based on the CPU of the thread that is executing the upcall.
> In this way, it resolves the issues above as:
> 
> a) The number of Netlink sockets scales with the number of CPUs
> rather than the number of vports.
> b) Ordering per-flow is maintained as packets are distributed to
> CPUs based on mechanisms such as RSS and flows are distributed
> to a single user space thread.
> c) Packets from a flow can only wake up one user space thread.
> 
> Reported-at: https://bugzilla.redhat.com/1844576
> Signed-off-by: Mark Gray 
> ---
> 
> Notes:
> v1 - Reworked based on Flavio's comments:
>  * change DISPATCH_MODE_PER_CPU() to inline function
>  * add `ovs-appctl` command to check dispatch mode for datapaths
>  * fixed issue with userspace actions (tested using `ovs-ofctl 
> monitor br0 65534 -P nxt_packet_in`)
>  * update documentation as requested
> 
>  .../linux/compat/include/linux/openvswitch.h  |   7 +
>  lib/dpif-netdev.c |   1 +
>  lib/dpif-netlink.c| 456 --
>  lib/dpif-provider.h   |  32 +-
>  lib/dpif.c|  17 +
>  lib/dpif.h|   1 +
>  ofproto/ofproto-dpif-upcall.c |  51 +-
>  ofproto/ofproto.c |  12 -
>  vswitchd/vswitch.xml  |  23 +-
>  9 files changed, 504 insertions(+), 96 deletions(-)
> 
> diff --git a/datapath/linux/compat/include/linux/openvswitch.h 
> b/datapath/linux/compat/include/linux/openvswitch.h
> index 875de20250ce..f29265df055e 100644
> --- a/datapath/linux/compat/include/linux/openvswitch.h
> +++ b/datapath/linux/compat/include/linux/openvswitch.h
> @@ -89,6 +89,8 @@ enum ovs_datapath_cmd {
>   * set on the datapath port (for OVS_ACTION_ATTR_MISS).  Only valid on
>   * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
>   * not be sent.
> + * OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when
> + * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set.
>   * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
>   * datapath.  Always present in notifications.
>   * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for 
> the
> @@ -105,6 +107,8 @@ enum ovs_datapath_attr {
>   OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */
>   OVS_DP_ATTR_USER_FEATURES,  /* OVS_DP_F_*  */
>   OVS_DP_ATTR_PAD,
> + OVS_DP_ATTR_PAD2,
> + OVS_DP_ATTR_PER_CPU_PIDS,   /* Netlink PIDS to receive upcalls */
>   __OVS_DP_ATTR_MAX
>  };
>  
> @@ -146,6 +150,9 @@ struct ovs_vport_stats {
>  /* Allow tc offload recirc sharing */
>  #define OVS_DP_F_TC_RECIRC_SHARING  (1 << 2)
>  
> +/* Allow per-cpu dispatch of upcalls */
> +#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU (1 << 3)
> +
>  /* Fixed logical ports. */
>  #define OVSP_LOCAL  ((__u32)0)
>  
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index c5ab35d2a5a5..b2c2baadf4f3 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -8562,6 +8562,7 @@ const struct dpif_class dpif_netdev_class = {
>  dpif_netdev_operate,
>  NULL,   /* recv_set */
>  NULL,   /* handlers_set */
> +NULL,   /* number_handlers_required */
>  dpif_netdev_set_config,
>  dpif_netdev_queue_to_priority,
>  NULL,   /* recv */
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> index f92905dd83fd..2399879aea3e 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -98,6 +98,8 @@ struct dpif_netlink_dp {
>   

[ovs-dev] [PATCH 3/3] dpif-netlink: Introduce per-cpu upcall dispatch

2021-06-30 Thread Mark Gray
The Open vSwitch kernel module uses the upcall mechanism to send
packets from kernel space to user space when it misses in the kernel
space flow table. The upcall sends packets via a Netlink socket.
Currently, a Netlink socket is created for every vport. In this way,
there is a 1:1 mapping between a vport and a Netlink socket.
When a packet is received by a vport, if it needs to be sent to
user space, it is sent via the corresponding Netlink socket.

This mechanism, with various iterations of the corresponding user
space code, has seen some limitations and issues:

* On systems with a large number of vports, there is correspondingly
a large number of Netlink sockets which can limit scaling.
(https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
* Packet reordering on upcalls.
(https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
* A thundering herd issue.
(https://bugzilla.redhat.com/show_bug.cgi?id=183)

This patch introduces an alternative, feature-negotiated, upcall
mode using a per-cpu dispatch rather than a per-vport dispatch.

In this mode, the Netlink socket to be used for the upcall is
selected based on the CPU of the thread that is executing the upcall.
In this way, it resolves the issues above as:

a) The number of Netlink sockets scales with the number of CPUs
rather than the number of vports.
b) Ordering per-flow is maintained as packets are distributed to
CPUs based on mechanisms such as RSS and flows are distributed
to a single user space thread.
c) Packets from a flow can only wake up one user space thread.

Reported-at: https://bugzilla.redhat.com/1844576
Signed-off-by: Mark Gray 
---

Notes:
v1 - Reworked based on Flavio's comments:
 * change DISPATCH_MODE_PER_CPU() to inline function
 * add `ovs-appctl` command to check dispatch mode for datapaths
 * fixed issue with userspace actions (tested using `ovs-ofctl monitor 
br0 65534 -P nxt_packet_in`)
 * update documentation as requested

 .../linux/compat/include/linux/openvswitch.h  |   7 +
 lib/dpif-netdev.c |   1 +
 lib/dpif-netlink.c| 456 --
 lib/dpif-provider.h   |  32 +-
 lib/dpif.c|  17 +
 lib/dpif.h|   1 +
 ofproto/ofproto-dpif-upcall.c |  51 +-
 ofproto/ofproto.c |  12 -
 vswitchd/vswitch.xml  |  23 +-
 9 files changed, 504 insertions(+), 96 deletions(-)

diff --git a/datapath/linux/compat/include/linux/openvswitch.h 
b/datapath/linux/compat/include/linux/openvswitch.h
index 875de20250ce..f29265df055e 100644
--- a/datapath/linux/compat/include/linux/openvswitch.h
+++ b/datapath/linux/compat/include/linux/openvswitch.h
@@ -89,6 +89,8 @@ enum ovs_datapath_cmd {
  * set on the datapath port (for OVS_ACTION_ATTR_MISS).  Only valid on
  * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
  * not be sent.
+ * OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when
+ * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set.
  * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
  * datapath.  Always present in notifications.
  * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the
@@ -105,6 +107,8 @@ enum ovs_datapath_attr {
OVS_DP_ATTR_MEGAFLOW_STATS, /* struct ovs_dp_megaflow_stats */
OVS_DP_ATTR_USER_FEATURES,  /* OVS_DP_F_*  */
OVS_DP_ATTR_PAD,
+   OVS_DP_ATTR_PAD2,
+   OVS_DP_ATTR_PER_CPU_PIDS,   /* Netlink PIDS to receive upcalls */
__OVS_DP_ATTR_MAX
 };
 
@@ -146,6 +150,9 @@ struct ovs_vport_stats {
 /* Allow tc offload recirc sharing */
 #define OVS_DP_F_TC_RECIRC_SHARING  (1 << 2)
 
+/* Allow per-cpu dispatch of upcalls */
+#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU (1 << 3)
+
 /* Fixed logical ports. */
 #define OVSP_LOCAL  ((__u32)0)
 
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index c5ab35d2a5a5..b2c2baadf4f3 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -8562,6 +8562,7 @@ const struct dpif_class dpif_netdev_class = {
 dpif_netdev_operate,
 NULL,   /* recv_set */
 NULL,   /* handlers_set */
+NULL,   /* number_handlers_required */
 dpif_netdev_set_config,
 dpif_netdev_queue_to_priority,
 NULL,   /* recv */
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index f92905dd83fd..2399879aea3e 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -98,6 +98,8 @@ struct dpif_netlink_dp {
 const struct ovs_dp_stats *stats;  /* OVS_DP_ATTR_STATS. */
 const struct ovs_dp_megaflow_stats *megaflow_stats;
/* OVS_DP_ATTR_MEGAFLOW_STATS.*/
+const uint32_t *upcall_pids;   /* OVS_DP_ATTR_PER_CPU_PIDS */
+uint32_t n_upcall_pids;
 };
 
 static void dpif_netlink_dp_