Re: [ovs-dev] [RFC net-next] openvswitch: Introduce per-cpu upcall dispatch

2021-06-30 Thread Mark Gray
On 19/05/2021 22:47, Pravin Shelar wrote:

Thanks for the review and sorry for the delay. I will be more responsive
to any further changes from this point on.

> On Fri, Apr 30, 2021 at 8:33 AM Mark Gray  wrote:
>>
>> The Open vSwitch kernel module uses the upcall mechanism to send
>> packets from kernel space to user space when it misses in the kernel
>> space flow table. The upcall sends packets via a Netlink socket.
>> Currently, a Netlink socket is created for every vport. In this way,
>> there is a 1:1 mapping between a vport and a Netlink socket.
>> When a packet is received by a vport, if it needs to be sent to
>> user space, it is sent via the corresponding Netlink socket.
>>
>> This mechanism, with various iterations of the corresponding user
>> space code, has seen some limitations and issues:
>>
>> * On systems with a large number of vports, there is a correspondingly
>> large number of Netlink sockets which can limit scaling.
>> (https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
>> * Packet reordering on upcalls.
>> (https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
>> * A thundering herd issue.
>> (https://bugzilla.redhat.com/show_bug.cgi?id=183)
>>
>> This patch introduces an alternative, feature-negotiated, upcall
>> mode using a per-cpu dispatch rather than a per-vport dispatch.
>>
>> In this mode, the Netlink socket to be used for the upcall is
>> selected based on the CPU of the thread that is executing the upcall.
>> In this way, it resolves the issues above as:
>>
>> a) The number of Netlink sockets scales with the number of CPUs
>> rather than the number of vports.
>> b) Ordering per-flow is maintained as packets are distributed to
>> CPUs based on mechanisms such as RSS and flows are distributed
>> to a single user space thread.
>> c) Packets from a flow can only wake up one user space thread.
>>
>> The corresponding user space code can be found at:
>> https://mail.openvswitch.org/pipermail/ovs-dev/2021-April/382618.html
>>
>> Bugzilla: https://bugzilla.redhat.com/1844576
>> Signed-off-by: Mark Gray 
>> ---
>>  include/uapi/linux/openvswitch.h |  8 
>>  net/openvswitch/datapath.c   | 70 +++-
>>  net/openvswitch/datapath.h   | 18 
>>  net/openvswitch/flow_netlink.c   |  4 --
>>  4 files changed, 94 insertions(+), 6 deletions(-)
>>
>> diff --git a/include/uapi/linux/openvswitch.h 
>> b/include/uapi/linux/openvswitch.h
>> index 8d16744edc31..6571b57b2268 100644
>> --- a/include/uapi/linux/openvswitch.h
>> +++ b/include/uapi/linux/openvswitch.h
>> @@ -70,6 +70,8 @@ enum ovs_datapath_cmd {
>>   * set on the datapath port (for OVS_ACTION_ATTR_MISS).  Only valid on
>>   * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
>>   * not be sent.
>> + * OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when
>> + * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set.
>>   * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
>>   * datapath.  Always present in notifications.
>>   * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for 
>> the
>> @@ -87,6 +89,9 @@ enum ovs_datapath_attr {
>> OVS_DP_ATTR_USER_FEATURES,  /* OVS_DP_F_*  */
>> OVS_DP_ATTR_PAD,
>> OVS_DP_ATTR_MASKS_CACHE_SIZE,
>> +   OVS_DP_ATTR_PER_CPU_PIDS,   /* Netlink PIDS to receive upcalls in 
>> per-cpu
>> +* dispatch mode
>> +*/
>> __OVS_DP_ATTR_MAX
>>  };
>>
>> @@ -127,6 +132,9 @@ struct ovs_vport_stats {
>>  /* Allow tc offload recirc sharing */
>>  #define OVS_DP_F_TC_RECIRC_SHARING (1 << 2)
>>
>> +/* Allow per-cpu dispatch of upcalls */
>> +#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU   (1 << 3)
>> +
>>  /* Fixed logical ports. */
>>  #define OVSP_LOCAL  ((__u32)0)
>>
>> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
>> index 9d6ef6cb9b26..98d54f41fdaa 100644
>> --- a/net/openvswitch/datapath.c
>> +++ b/net/openvswitch/datapath.c
>> @@ -121,6 +121,8 @@ int lockdep_ovsl_is_held(void)
>>  #endif
>>
>>  static struct vport *new_vport(const struct vport_parms *);
>> +static u32 ovs_dp_get_upcall_portid(const struct datapath *, uint32_t);
>> +static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr 
>> *);
>>  static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
>>  const struct sw_flow_key *,
>>  const struct dp_upcall_info *,
>> @@ -238,7 +240,12 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct 
>> sw_flow_key *key)
>>
>> memset(, 0, sizeof(upcall));
>> upcall.cmd = OVS_PACKET_CMD_MISS;
>> -   upcall.portid = ovs_vport_find_upcall_portid(p, skb);
>> +
>> +   if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
>> +   upcall.portid = ovs_dp_get_upcall_portid(dp, 
>> smp_processor_id());
>> 

Re: [ovs-dev] [RFC net-next] openvswitch: Introduce per-cpu upcall dispatch

2021-06-30 Thread Mark Gray
On 28/05/2021 20:49, Flavio Leitner wrote:
> 
> Hi Mark,
> 
> I think this patch is going in the right direction but there
> are some points that I think we should address. See below.
> 
> On Fri, Apr 30, 2021 at 11:33:25AM -0400, Mark Gray wrote:
>> The Open vSwitch kernel module uses the upcall mechanism to send
>> packets from kernel space to user space when it misses in the kernel
>> space flow table. The upcall sends packets via a Netlink socket.
>> Currently, a Netlink socket is created for every vport. In this way,
>> there is a 1:1 mapping between a vport and a Netlink socket.
>> When a packet is received by a vport, if it needs to be sent to
>> user space, it is sent via the corresponding Netlink socket.
>>
>> This mechanism, with various iterations of the corresponding user
>> space code, has seen some limitations and issues:
>>
>> * On systems with a large number of vports, there is a correspondingly
>> large number of Netlink sockets which can limit scaling.
>> (https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
>> * Packet reordering on upcalls.
>> (https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
>> * A thundering herd issue.
>> (https://bugzilla.redhat.com/show_bug.cgi?id=183)
>>
>> This patch introduces an alternative, feature-negotiated, upcall
>> mode using a per-cpu dispatch rather than a per-vport dispatch.
>>
>> In this mode, the Netlink socket to be used for the upcall is
>> selected based on the CPU of the thread that is executing the upcall.
>> In this way, it resolves the issues above as:
>>
>> a) The number of Netlink sockets scales with the number of CPUs
>> rather than the number of vports.
>> b) Ordering per-flow is maintained as packets are distributed to
>> CPUs based on mechanisms such as RSS and flows are distributed
>> to a single user space thread.
>> c) Packets from a flow can only wake up one user space thread.
>>
>> The corresponding user space code can be found at:
>> https://mail.openvswitch.org/pipermail/ovs-dev/2021-April/382618.html
> 
> Thanks for writing a nice commit description.
> 
>>
>> Bugzilla: https://bugzilla.redhat.com/1844576
>> Signed-off-by: Mark Gray 
>> ---
>>  include/uapi/linux/openvswitch.h |  8 
>>  net/openvswitch/datapath.c   | 70 +++-
>>  net/openvswitch/datapath.h   | 18 
>>  net/openvswitch/flow_netlink.c   |  4 --
>>  4 files changed, 94 insertions(+), 6 deletions(-)
>>
>> diff --git a/include/uapi/linux/openvswitch.h 
>> b/include/uapi/linux/openvswitch.h
>> index 8d16744edc31..6571b57b2268 100644
>> --- a/include/uapi/linux/openvswitch.h
>> +++ b/include/uapi/linux/openvswitch.h
>> @@ -70,6 +70,8 @@ enum ovs_datapath_cmd {
>>   * set on the datapath port (for OVS_ACTION_ATTR_MISS).  Only valid on
>>   * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
>>   * not be sent.
>> + * OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when
>> + * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set.
>>   * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
>>   * datapath.  Always present in notifications.
>>   * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for 
>> the
>> @@ -87,6 +89,9 @@ enum ovs_datapath_attr {
>>  OVS_DP_ATTR_USER_FEATURES,  /* OVS_DP_F_*  */
>>  OVS_DP_ATTR_PAD,
>>  OVS_DP_ATTR_MASKS_CACHE_SIZE,
>> +OVS_DP_ATTR_PER_CPU_PIDS,   /* Netlink PIDS to receive upcalls in 
>> per-cpu
>> + * dispatch mode
>> + */
>>  __OVS_DP_ATTR_MAX
>>  };
>>  
>> @@ -127,6 +132,9 @@ struct ovs_vport_stats {
>>  /* Allow tc offload recirc sharing */
>>  #define OVS_DP_F_TC_RECIRC_SHARING  (1 << 2)
>>  
>> +/* Allow per-cpu dispatch of upcalls */
>> +#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU(1 << 3)
>> +
>>  /* Fixed logical ports. */
>>  #define OVSP_LOCAL  ((__u32)0)
>>  
>> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
>> index 9d6ef6cb9b26..98d54f41fdaa 100644
>> --- a/net/openvswitch/datapath.c
>> +++ b/net/openvswitch/datapath.c
>> @@ -121,6 +121,8 @@ int lockdep_ovsl_is_held(void)
>>  #endif
>>  
>>  static struct vport *new_vport(const struct vport_parms *);
>> +static u32 ovs_dp_get_upcall_portid(const struct datapath *, uint32_t);
>> +static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr 
>> *);
>>  static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
>>   const struct sw_flow_key *,
>>   const struct dp_upcall_info *,
>> @@ -238,7 +240,12 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct 
>> sw_flow_key *key)
>>  
>>  memset(, 0, sizeof(upcall));
>>  upcall.cmd = OVS_PACKET_CMD_MISS;
>> -upcall.portid = ovs_vport_find_upcall_portid(p, skb);
>> +
>> +if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
>> +upcall.portid = 

Re: [ovs-dev] [RFC net-next] openvswitch: Introduce per-cpu upcall dispatch

2021-05-28 Thread Flavio Leitner


Hi Mark,

I think this patch is going in the right direction but there
are some points that I think we should address. See below.

On Fri, Apr 30, 2021 at 11:33:25AM -0400, Mark Gray wrote:
> The Open vSwitch kernel module uses the upcall mechanism to send
> packets from kernel space to user space when it misses in the kernel
> space flow table. The upcall sends packets via a Netlink socket.
> Currently, a Netlink socket is created for every vport. In this way,
> there is a 1:1 mapping between a vport and a Netlink socket.
> When a packet is received by a vport, if it needs to be sent to
> user space, it is sent via the corresponding Netlink socket.
> 
> This mechanism, with various iterations of the corresponding user
> space code, has seen some limitations and issues:
> 
> * On systems with a large number of vports, there is a correspondingly
> large number of Netlink sockets which can limit scaling.
> (https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
> * Packet reordering on upcalls.
> (https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
> * A thundering herd issue.
> (https://bugzilla.redhat.com/show_bug.cgi?id=183)
> 
> This patch introduces an alternative, feature-negotiated, upcall
> mode using a per-cpu dispatch rather than a per-vport dispatch.
> 
> In this mode, the Netlink socket to be used for the upcall is
> selected based on the CPU of the thread that is executing the upcall.
> In this way, it resolves the issues above as:
> 
> a) The number of Netlink sockets scales with the number of CPUs
> rather than the number of vports.
> b) Ordering per-flow is maintained as packets are distributed to
> CPUs based on mechanisms such as RSS and flows are distributed
> to a single user space thread.
> c) Packets from a flow can only wake up one user space thread.
> 
> The corresponding user space code can be found at:
> https://mail.openvswitch.org/pipermail/ovs-dev/2021-April/382618.html

Thanks for writing a nice commit description.

> 
> Bugzilla: https://bugzilla.redhat.com/1844576
> Signed-off-by: Mark Gray 
> ---
>  include/uapi/linux/openvswitch.h |  8 
>  net/openvswitch/datapath.c   | 70 +++-
>  net/openvswitch/datapath.h   | 18 
>  net/openvswitch/flow_netlink.c   |  4 --
>  4 files changed, 94 insertions(+), 6 deletions(-)
> 
> diff --git a/include/uapi/linux/openvswitch.h 
> b/include/uapi/linux/openvswitch.h
> index 8d16744edc31..6571b57b2268 100644
> --- a/include/uapi/linux/openvswitch.h
> +++ b/include/uapi/linux/openvswitch.h
> @@ -70,6 +70,8 @@ enum ovs_datapath_cmd {
>   * set on the datapath port (for OVS_ACTION_ATTR_MISS).  Only valid on
>   * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
>   * not be sent.
> + * OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when
> + * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set.
>   * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
>   * datapath.  Always present in notifications.
>   * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for 
> the
> @@ -87,6 +89,9 @@ enum ovs_datapath_attr {
>   OVS_DP_ATTR_USER_FEATURES,  /* OVS_DP_F_*  */
>   OVS_DP_ATTR_PAD,
>   OVS_DP_ATTR_MASKS_CACHE_SIZE,
> + OVS_DP_ATTR_PER_CPU_PIDS,   /* Netlink PIDS to receive upcalls in 
> per-cpu
> +  * dispatch mode
> +  */
>   __OVS_DP_ATTR_MAX
>  };
>  
> @@ -127,6 +132,9 @@ struct ovs_vport_stats {
>  /* Allow tc offload recirc sharing */
>  #define OVS_DP_F_TC_RECIRC_SHARING   (1 << 2)
>  
> +/* Allow per-cpu dispatch of upcalls */
> +#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU (1 << 3)
> +
>  /* Fixed logical ports. */
>  #define OVSP_LOCAL  ((__u32)0)
>  
> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
> index 9d6ef6cb9b26..98d54f41fdaa 100644
> --- a/net/openvswitch/datapath.c
> +++ b/net/openvswitch/datapath.c
> @@ -121,6 +121,8 @@ int lockdep_ovsl_is_held(void)
>  #endif
>  
>  static struct vport *new_vport(const struct vport_parms *);
> +static u32 ovs_dp_get_upcall_portid(const struct datapath *, uint32_t);
> +static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr 
> *);
>  static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
>const struct sw_flow_key *,
>const struct dp_upcall_info *,
> @@ -238,7 +240,12 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct 
> sw_flow_key *key)
>  
>   memset(, 0, sizeof(upcall));
>   upcall.cmd = OVS_PACKET_CMD_MISS;
> - upcall.portid = ovs_vport_find_upcall_portid(p, skb);
> +
> + if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
> + upcall.portid = ovs_dp_get_upcall_portid(dp, 
> smp_processor_id());
> + else
> + upcall.portid = ovs_vport_find_upcall_portid(p, 

Re: [ovs-dev] [RFC net-next] openvswitch: Introduce per-cpu upcall dispatch

2021-05-19 Thread Pravin Shelar
On Fri, Apr 30, 2021 at 8:33 AM Mark Gray  wrote:
>
> The Open vSwitch kernel module uses the upcall mechanism to send
> packets from kernel space to user space when it misses in the kernel
> space flow table. The upcall sends packets via a Netlink socket.
> Currently, a Netlink socket is created for every vport. In this way,
> there is a 1:1 mapping between a vport and a Netlink socket.
> When a packet is received by a vport, if it needs to be sent to
> user space, it is sent via the corresponding Netlink socket.
>
> This mechanism, with various iterations of the corresponding user
> space code, has seen some limitations and issues:
>
> * On systems with a large number of vports, there is a correspondingly
> large number of Netlink sockets which can limit scaling.
> (https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
> * Packet reordering on upcalls.
> (https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
> * A thundering herd issue.
> (https://bugzilla.redhat.com/show_bug.cgi?id=183)
>
> This patch introduces an alternative, feature-negotiated, upcall
> mode using a per-cpu dispatch rather than a per-vport dispatch.
>
> In this mode, the Netlink socket to be used for the upcall is
> selected based on the CPU of the thread that is executing the upcall.
> In this way, it resolves the issues above as:
>
> a) The number of Netlink sockets scales with the number of CPUs
> rather than the number of vports.
> b) Ordering per-flow is maintained as packets are distributed to
> CPUs based on mechanisms such as RSS and flows are distributed
> to a single user space thread.
> c) Packets from a flow can only wake up one user space thread.
>
> The corresponding user space code can be found at:
> https://mail.openvswitch.org/pipermail/ovs-dev/2021-April/382618.html
>
> Bugzilla: https://bugzilla.redhat.com/1844576
> Signed-off-by: Mark Gray 
> ---
>  include/uapi/linux/openvswitch.h |  8 
>  net/openvswitch/datapath.c   | 70 +++-
>  net/openvswitch/datapath.h   | 18 
>  net/openvswitch/flow_netlink.c   |  4 --
>  4 files changed, 94 insertions(+), 6 deletions(-)
>
> diff --git a/include/uapi/linux/openvswitch.h 
> b/include/uapi/linux/openvswitch.h
> index 8d16744edc31..6571b57b2268 100644
> --- a/include/uapi/linux/openvswitch.h
> +++ b/include/uapi/linux/openvswitch.h
> @@ -70,6 +70,8 @@ enum ovs_datapath_cmd {
>   * set on the datapath port (for OVS_ACTION_ATTR_MISS).  Only valid on
>   * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
>   * not be sent.
> + * OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when
> + * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set.
>   * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
>   * datapath.  Always present in notifications.
>   * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for 
> the
> @@ -87,6 +89,9 @@ enum ovs_datapath_attr {
> OVS_DP_ATTR_USER_FEATURES,  /* OVS_DP_F_*  */
> OVS_DP_ATTR_PAD,
> OVS_DP_ATTR_MASKS_CACHE_SIZE,
> +   OVS_DP_ATTR_PER_CPU_PIDS,   /* Netlink PIDS to receive upcalls in 
> per-cpu
> +* dispatch mode
> +*/
> __OVS_DP_ATTR_MAX
>  };
>
> @@ -127,6 +132,9 @@ struct ovs_vport_stats {
>  /* Allow tc offload recirc sharing */
>  #define OVS_DP_F_TC_RECIRC_SHARING (1 << 2)
>
> +/* Allow per-cpu dispatch of upcalls */
> +#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU   (1 << 3)
> +
>  /* Fixed logical ports. */
>  #define OVSP_LOCAL  ((__u32)0)
>
> diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
> index 9d6ef6cb9b26..98d54f41fdaa 100644
> --- a/net/openvswitch/datapath.c
> +++ b/net/openvswitch/datapath.c
> @@ -121,6 +121,8 @@ int lockdep_ovsl_is_held(void)
>  #endif
>
>  static struct vport *new_vport(const struct vport_parms *);
> +static u32 ovs_dp_get_upcall_portid(const struct datapath *, uint32_t);
> +static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr 
> *);
>  static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
>  const struct sw_flow_key *,
>  const struct dp_upcall_info *,
> @@ -238,7 +240,12 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct 
> sw_flow_key *key)
>
> memset(, 0, sizeof(upcall));
> upcall.cmd = OVS_PACKET_CMD_MISS;
> -   upcall.portid = ovs_vport_find_upcall_portid(p, skb);
> +
> +   if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
> +   upcall.portid = ovs_dp_get_upcall_portid(dp, 
> smp_processor_id());
> +   else
> +   upcall.portid = ovs_vport_find_upcall_portid(p, skb);
> +
> upcall.mru = OVS_CB(skb)->mru;
> error = ovs_dp_upcall(dp, skb, key, , 0);
> if (unlikely(error))
> @@ -1590,16 

[ovs-dev] [RFC net-next] openvswitch: Introduce per-cpu upcall dispatch

2021-04-30 Thread Mark Gray
The Open vSwitch kernel module uses the upcall mechanism to send
packets from kernel space to user space when it misses in the kernel
space flow table. The upcall sends packets via a Netlink socket.
Currently, a Netlink socket is created for every vport. In this way,
there is a 1:1 mapping between a vport and a Netlink socket.
When a packet is received by a vport, if it needs to be sent to
user space, it is sent via the corresponding Netlink socket.

This mechanism, with various iterations of the corresponding user
space code, has seen some limitations and issues:

* On systems with a large number of vports, there is a correspondingly
large number of Netlink sockets which can limit scaling.
(https://bugzilla.redhat.com/show_bug.cgi?id=1526306)
* Packet reordering on upcalls.
(https://bugzilla.redhat.com/show_bug.cgi?id=1844576)
* A thundering herd issue.
(https://bugzilla.redhat.com/show_bug.cgi?id=183)

This patch introduces an alternative, feature-negotiated, upcall
mode using a per-cpu dispatch rather than a per-vport dispatch.

In this mode, the Netlink socket to be used for the upcall is
selected based on the CPU of the thread that is executing the upcall.
In this way, it resolves the issues above as:

a) The number of Netlink sockets scales with the number of CPUs
rather than the number of vports.
b) Ordering per-flow is maintained as packets are distributed to
CPUs based on mechanisms such as RSS and flows are distributed
to a single user space thread.
c) Packets from a flow can only wake up one user space thread.

The corresponding user space code can be found at:
https://mail.openvswitch.org/pipermail/ovs-dev/2021-April/382618.html

Bugzilla: https://bugzilla.redhat.com/1844576
Signed-off-by: Mark Gray 
---
 include/uapi/linux/openvswitch.h |  8 
 net/openvswitch/datapath.c   | 70 +++-
 net/openvswitch/datapath.h   | 18 
 net/openvswitch/flow_netlink.c   |  4 --
 4 files changed, 94 insertions(+), 6 deletions(-)

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 8d16744edc31..6571b57b2268 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -70,6 +70,8 @@ enum ovs_datapath_cmd {
  * set on the datapath port (for OVS_ACTION_ATTR_MISS).  Only valid on
  * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
  * not be sent.
+ * OVS_DP_ATTR_PER_CPU_PIDS: Per-cpu array of PIDs for upcalls when
+ * OVS_DP_F_DISPATCH_UPCALL_PER_CPU feature is set.
  * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
  * datapath.  Always present in notifications.
  * @OVS_DP_ATTR_MEGAFLOW_STATS: Statistics about mega flow masks usage for the
@@ -87,6 +89,9 @@ enum ovs_datapath_attr {
OVS_DP_ATTR_USER_FEATURES,  /* OVS_DP_F_*  */
OVS_DP_ATTR_PAD,
OVS_DP_ATTR_MASKS_CACHE_SIZE,
+   OVS_DP_ATTR_PER_CPU_PIDS,   /* Netlink PIDS to receive upcalls in 
per-cpu
+* dispatch mode
+*/
__OVS_DP_ATTR_MAX
 };
 
@@ -127,6 +132,9 @@ struct ovs_vport_stats {
 /* Allow tc offload recirc sharing */
 #define OVS_DP_F_TC_RECIRC_SHARING (1 << 2)
 
+/* Allow per-cpu dispatch of upcalls */
+#define OVS_DP_F_DISPATCH_UPCALL_PER_CPU   (1 << 3)
+
 /* Fixed logical ports. */
 #define OVSP_LOCAL  ((__u32)0)
 
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 9d6ef6cb9b26..98d54f41fdaa 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -121,6 +121,8 @@ int lockdep_ovsl_is_held(void)
 #endif
 
 static struct vport *new_vport(const struct vport_parms *);
+static u32 ovs_dp_get_upcall_portid(const struct datapath *, uint32_t);
+static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *);
 static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
 const struct sw_flow_key *,
 const struct dp_upcall_info *,
@@ -238,7 +240,12 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct 
sw_flow_key *key)
 
memset(, 0, sizeof(upcall));
upcall.cmd = OVS_PACKET_CMD_MISS;
-   upcall.portid = ovs_vport_find_upcall_portid(p, skb);
+
+   if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
+   upcall.portid = ovs_dp_get_upcall_portid(dp, 
smp_processor_id());
+   else
+   upcall.portid = ovs_vport_find_upcall_portid(p, skb);
+
upcall.mru = OVS_CB(skb)->mru;
error = ovs_dp_upcall(dp, skb, key, , 0);
if (unlikely(error))
@@ -1590,16 +1597,67 @@ static void ovs_dp_reset_user_features(struct sk_buff 
*skb,
 
 DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
 
+static int ovs_dp_set_upcall_portids(struct datapath *dp,
+const struct nlattr *ids)
+{
+