On 08.12.2019 10:38, Eli Britstein wrote:
> 
> On 12/5/2019 6:46 PM, Sriharsha Basavapatna wrote:
>> On Wed, Dec 4, 2019 at 8:55 PM Eli Britstein <el...@mellanox.com> wrote:
>>>
>>> On 12/3/2019 5:19 PM, Sriharsha Basavapatna wrote:
>>>> On Wed, Nov 20, 2019 at 9:07 PM Eli Britstein <el...@mellanox.com> wrote:
>>>>> Signed-off-by: Eli Britstein <el...@mellanox.com>
>>>>> Reviewed-by: Oz Shlomo <o...@mellanox.com>
>>>>> ---
>>>>>    NEWS                           |  2 +
>>>>>    lib/netdev-offload-dpdk-flow.c | 87 
>>>>> ++++++++++++++++++++++++++++++++++++++++--
>>>>>    2 files changed, 85 insertions(+), 4 deletions(-)
>>>>>
>>>>> diff --git a/NEWS b/NEWS
>>>>> index 88b818948..ca9c2b230 100644
>>>>> --- a/NEWS
>>>>> +++ b/NEWS
>>>>> @@ -10,6 +10,8 @@ Post-v2.12.0
>>>>>           if supported by libbpf.
>>>>>         * Add option to enable, disable and query TCP sequence checking in
>>>>>           conntrack.
>>>>> +   - DPDK:
>>>>> +     * Add hardware offload support for output actions.
>>>>>
>>>>>    v2.12.0 - 03 Sep 2019
>>>>>    ---------------------
>>>>> diff --git a/lib/netdev-offload-dpdk-flow.c 
>>>>> b/lib/netdev-offload-dpdk-flow.c
>>>>> index dbbc45e99..6e7efb315 100644
>>>>> --- a/lib/netdev-offload-dpdk-flow.c
>>>>> +++ b/lib/netdev-offload-dpdk-flow.c
>>>>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct 
>>>>> rte_flow_action *actions)
>>>>>            } else {
>>>>>                ds_put_cstr(s, "  RSS = null\n");
>>>>>            }
>>>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
>>>>> +        const struct rte_flow_action_count *count = actions->conf;
>>>>> +
>>>>> +        ds_put_cstr(s, "rte flow count action:\n");
>>>>> +        if (count) {
>>>>> +            ds_put_format(s,
>>>>> +                          "  Count: shared=%d, id=%d\n",
>>>>> +                          count->shared, count->id);
>>>>> +        } else {
>>>>> +            ds_put_cstr(s, "  Count = null\n");
>>>>> +        }
>>>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
>>>>> +        const struct rte_flow_action_port_id *port_id = actions->conf;
>>>>> +
>>>>> +        ds_put_cstr(s, "rte flow port-id action:\n");
>>>>> +        if (port_id) {
>>>>> +            ds_put_format(s,
>>>>> +                          "  Port-id: original=%d, id=%d\n",
>>>>> +                          port_id->original, port_id->id);
>>>>> +        } else {
>>>>> +            ds_put_cstr(s, "  Port-id = null\n");
>>>>> +        }
>>>>>        } else {
>>>>>            ds_put_format(s, "unknown rte flow action (%d)\n", 
>>>>> actions->type);
>>>>>        }
>>>>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns 
>>>>> *patterns,
>>>>>        return 0;
>>>>>    }
>>>>>
>>>>> +static void
>>>>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions)
>>>>> +{
>>>>> +    struct rte_flow_action_count *count = xzalloc(sizeof *count);
>>>>> +
>>>>> +    count->shared = 0;
>>>>> +    /* Each flow has a single count action, so no need of id */
>>>>> +    count->id = 0;
>>>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
>>>>> +}
>>>>> +
>>>>> +static void
>>>>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions,
>>>>> +                                    struct netdev *outdev)
>>>>> +{
>>>>> +    struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id);
>>>>> +
>>>>> +    port_id->id = netdev_dpdk_get_port_id(outdev);
>>>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
>>>>> +}
>>>>> +
>>>>> +static int
>>>>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions,
>>>>> +                                   const struct nlattr *nla,
>>>>> +                                   struct offload_info *info)
>>>>> +{
>>>>> +    struct netdev *outdev;
>>>>> +    odp_port_t port;
>>>>> +
>>>>> +    port = nl_attr_get_odp_port(nla);
>>>>> +    outdev = netdev_ports_get(port, info->dpif_class);
>>>>> +    if (outdev == NULL) {
>>>>> +        VLOG_DBG_RL(&error_rl,
>>>>> +                    "Cannot find netdev for odp port %d", port);
>>>>> +        return -1;
>>>>> +    }
>>>>> +    if (!netdev_dpdk_flow_api_supported(outdev)) {
>>>>> +        VLOG_DBG_RL(&error_rl,
>>>>> +                    "Output to %s cannot be offloaded",
>>>>> +                    netdev_get_name(outdev));
>>>>> +        return -1;
>>>>> +    }
>>>>> +
>>>>> +    netdev_dpdk_flow_add_count_action(actions);
>>>>> +    netdev_dpdk_flow_add_port_id_action(actions, outdev);
>>>>> +    netdev_close(outdev);
>>>>> +
>>>>> +    return 0;
>>>>> +}
>>>>> +
>>>>>    int
>>>>>    netdev_dpdk_flow_actions_add(struct flow_actions *actions,
>>>>>                                 struct nlattr *nl_actions,
>>>>>                                 size_t nl_actions_len,
>>>>> -                             struct offload_info *info OVS_UNUSED)
>>>>> +                             struct offload_info *info)
>>>>>    {
>>>>>        struct nlattr *nla;
>>>>>        size_t left;
>>>>>
>>>>>        NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
>>>>> -        VLOG_DBG_RL(&error_rl,
>>>>> -                    "Unsupported action type %d", nl_attr_type(nla));
>>>>> -        return -1;
>>>>> +        if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT &&
>>>>> +            left <= NLA_ALIGN(nla->nla_len)) {
>>>>> +            if (netdev_dpdk_flow_add_output_action(actions, nla, info )) 
>>>>> {
>>>>> +                return -1;
>>>>> +            }
>>>> The check in netdev_dpdk_flow_add_output_action() is insufficient to
>>>> decide whether the device is capable of full-offload, since it assumes
>>>> that the output action is supported if netdev_dpdk type is
>>>> DPDK_DEV_ETH.
>>>>
>>>> There are a couple of issues with this approach:
>>>>
>>>> 1. For a device that is not capable of full-offload, it results in 2
>>>> calls to the PMD for every flow-offload request. The first attempt for
>>>> full-offload fails, followed by another call for partial-offload.
>>> We cannot avoid this, as we want to do full offload, and only fallback
>>> to partial if it doesn't work.
>>>> 2. Even for devices that support full-offload, the offload might fail
>>>> if the in-port and out-port are not in the same switch-domain. For
>>>> example, if only partial-offload is configured (vhost-user ports) in
>>>> the vSwitch, then the PMD should check if the in-port and out-port are
>>>> on the same switch (domain-id must match) and fail the request
>>>> otherwise. This check is important to alert the user of
>>>> misconfigurations. If OVS doesn't do this check, all pmd drivers will
>>>> have to do this check and that duplication is not desirable.
>>> I don't think it's in the OVS scope to do such checks, but it should be
>>> in the driver's level (PMD), the same way it is in the kernel offloading
>>> using TC.
>>>
>> IMO, it is not out-of-scope for OVS to add these checks to make better
>> offload decisions:
>> - This layer of OVS (netdev-offload-dpdk) is already offload-aware and
>> it is using a set of offload APIs (rte_flow)
> It is aware of the offload APIs, but not on specific HW capabilities, so 
> in some cases
> 
> trying to make such restrictions won't cover all use cases in best case and 
> result in
> redundant code.
> 
> For example, Mellanox ConnectX-5/ConnectX-6 are dual port NICs, with
> embedded eswitch. In ConnectX-5 we cannot forward from pf0vf0 to pf1vf0, but
> we can in ConnectX-6.
> 
> It is not something exposed at all to the user, specifically to OVS.
> 
> So, I think those kind of decisions that involve HW specifics should be done 
> in the
> layer that has the HW knowledge, e.g. - the driver.
> 
>> - Full offload may not be the preferred configuration for a customer
>> (since it requires additional config like SRIOV). That is, the code
>> should not make the distinction that full-offload is always preferred
>> and partial-offload is only a fallback option.
> At the moment, any kind of offload requires SRIOV.


I don't think that SRIOV required to use Flow Director on Intel NICs.
So, partial offloading should work on Intel NICs wihtout enabling SRIOV.


> BTW, to resolve this, 
> after full
> 
> offload is accepted, we plan to resubmit vDPA (see
> https://patchwork.ozlabs.org/cover/1178472/) that will allow using offloads 
> for
> virtio as well.
> 
>> - Without such checks, offload rate of partial-offload configuration
>> could be potentially impacted with a large number of flows.
> I don't think failing in the PMD level will be much slower than failing 
> in OVS level.
>> - Similar improvement (checking/logging in OVS as opposed to
>> individual drivers) could be made in TC kernel offloads too
> In order to add such checks, we can maybe think in the future of a 
> capability
> 
> querying mechanism, to query each device (at init time, maybe using
> rte_flow_validate) and make decisions based on that.
> 
> However, I think this mechanism is neither a simple one, nor should be a
> prerequisite for this patch-set.
> 
>>
>>> I agree that the argument of logging is important, but this is something
>>> that I think out of scope of this series. All prints in DPDK go to
>>> /dev/null, and not logged into OVS' log.
>>>
>>> BTW, in kernel, there is a similar issue with drivers that logs using
>>> extack. Those messages are not dumped to dmesg, and OVS also doesn't
>>> read them to its log. Also out of scope of this series...
>>>
>>>> Here's how this could be addressed, by identifying whether the device
>>>> is capable of full-offload or partial-offload.
>>>>
>>>> 1. Check if the target device (in-port) is full-offload capable. This
>>>> could be done by checking if rte_flow api is supported, along with a
>>>> valid switch-domain-id. We could even improve this further, similar to
>>>> the kernel switchdev framework in which the device mode is
>>>> configurable (legacy/eswitch modes) and available to consumers, but
>>>> this might require some more DPDK framework changes.
>>> I think we can enhance to check if both are ETH. However, As I wrote
>>> above, I think checking other HW properties should not be in OVS scope,
>>> but in driver's domain.
>>>> 2. If the device is full-offload capable, and if output action is
>>>> specified,  check if in-port and out-port are on the same switch
>>>> (switch-domain-id match mentioned above).
>>>>
>>>> If both 1 and 2 succeed, then the device is capable of full-offload
>>>> and we can proceed with full-offload item/action setup, followed by
>>>> rte_flow_create().
>>>>
>>>> Otherwise, we fallback to partial-offload code path. Here, we can
>>>> handle the case where a vhost-user interface is either the in-port or
>>>> out-port of the flow being offloaded. Depending on whether the
>>>> vhost-user interface is the in-port or out-port, if the other port
>>>> associated with the flow is offload capable (DPDK_DEV_ETH), then we
>>>> offload the flow to that device. This would help us build further
>>>> extensions to the existing partial-offload framework by adding more
>>>> actions such as VLAN and Tunnel actions (apart from MARK/RSS), except
>>>> the output action that still needs to be processed in software.
>>>>
>>>> Thanks,
>>>> -Harsha
>>>>
>>>>> +        } else {
>>>>> +            VLOG_DBG_RL(&error_rl,
>>>>> +                        "Unsupported action type %d", nl_attr_type(nla));
>>>>> +            return -1;
>>>>> +        }
>>>>>        }
>>>>>
>>>>>        add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
>>>>> --
>>>>> 2.14.5
>>>>>
>>>>> _______________________________________________
>>>>> dev mailing list
>>>>> d...@openvswitch.org
>>>>> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&amp;data=02%7C01%7Celibr%40mellanox.com%7Cc11727da2a894b08f61608d779a2c470%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C637111612310807084&amp;sdata=kturtC%2BTMsvvgZ1iaBa9WqHfS60CjaJ8Oqp%2BGDvbfgY%3D&amp;reserved=0
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to