On 12/4/2019 5:42 PM, Ilya Maximets wrote:
> On 04.12.2019 16:25, Eli Britstein wrote:
>> On 12/3/2019 5:19 PM, Sriharsha Basavapatna wrote:
>>> On Wed, Nov 20, 2019 at 9:07 PM Eli Britstein <el...@mellanox.com> wrote:
>>>> Signed-off-by: Eli Britstein <el...@mellanox.com>
>>>> Reviewed-by: Oz Shlomo <o...@mellanox.com>
>>>> ---
>>>>    NEWS                           |  2 +
>>>>    lib/netdev-offload-dpdk-flow.c | 87 
>>>> ++++++++++++++++++++++++++++++++++++++++--
>>>>    2 files changed, 85 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/NEWS b/NEWS
>>>> index 88b818948..ca9c2b230 100644
>>>> --- a/NEWS
>>>> +++ b/NEWS
>>>> @@ -10,6 +10,8 @@ Post-v2.12.0
>>>>           if supported by libbpf.
>>>>         * Add option to enable, disable and query TCP sequence checking in
>>>>           conntrack.
>>>> +   - DPDK:
>>>> +     * Add hardware offload support for output actions.
>>>>
>>>>    v2.12.0 - 03 Sep 2019
>>>>    ---------------------
>>>> diff --git a/lib/netdev-offload-dpdk-flow.c 
>>>> b/lib/netdev-offload-dpdk-flow.c
>>>> index dbbc45e99..6e7efb315 100644
>>>> --- a/lib/netdev-offload-dpdk-flow.c
>>>> +++ b/lib/netdev-offload-dpdk-flow.c
>>>> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct 
>>>> rte_flow_action *actions)
>>>>            } else {
>>>>                ds_put_cstr(s, "  RSS = null\n");
>>>>            }
>>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
>>>> +        const struct rte_flow_action_count *count = actions->conf;
>>>> +
>>>> +        ds_put_cstr(s, "rte flow count action:\n");
>>>> +        if (count) {
>>>> +            ds_put_format(s,
>>>> +                          "  Count: shared=%d, id=%d\n",
>>>> +                          count->shared, count->id);
>>>> +        } else {
>>>> +            ds_put_cstr(s, "  Count = null\n");
>>>> +        }
>>>> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
>>>> +        const struct rte_flow_action_port_id *port_id = actions->conf;
>>>> +
>>>> +        ds_put_cstr(s, "rte flow port-id action:\n");
>>>> +        if (port_id) {
>>>> +            ds_put_format(s,
>>>> +                          "  Port-id: original=%d, id=%d\n",
>>>> +                          port_id->original, port_id->id);
>>>> +        } else {
>>>> +            ds_put_cstr(s, "  Port-id = null\n");
>>>> +        }
>>>>        } else {
>>>>            ds_put_format(s, "unknown rte flow action (%d)\n", 
>>>> actions->type);
>>>>        }
>>>> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns 
>>>> *patterns,
>>>>        return 0;
>>>>    }
>>>>
>>>> +static void
>>>> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions)
>>>> +{
>>>> +    struct rte_flow_action_count *count = xzalloc(sizeof *count);
>>>> +
>>>> +    count->shared = 0;
>>>> +    /* Each flow has a single count action, so no need of id */
>>>> +    count->id = 0;
>>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
>>>> +}
>>>> +
>>>> +static void
>>>> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions,
>>>> +                                    struct netdev *outdev)
>>>> +{
>>>> +    struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id);
>>>> +
>>>> +    port_id->id = netdev_dpdk_get_port_id(outdev);
>>>> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
>>>> +}
>>>> +
>>>> +static int
>>>> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions,
>>>> +                                   const struct nlattr *nla,
>>>> +                                   struct offload_info *info)
>>>> +{
>>>> +    struct netdev *outdev;
>>>> +    odp_port_t port;
>>>> +
>>>> +    port = nl_attr_get_odp_port(nla);
>>>> +    outdev = netdev_ports_get(port, info->dpif_class);
>>>> +    if (outdev == NULL) {
>>>> +        VLOG_DBG_RL(&error_rl,
>>>> +                    "Cannot find netdev for odp port %d", port);
>>>> +        return -1;
>>>> +    }
>>>> +    if (!netdev_dpdk_flow_api_supported(outdev)) {
>>>> +        VLOG_DBG_RL(&error_rl,
>>>> +                    "Output to %s cannot be offloaded",
>>>> +                    netdev_get_name(outdev));
>>>> +        return -1;
>>>> +    }
>>>> +
>>>> +    netdev_dpdk_flow_add_count_action(actions);
>>>> +    netdev_dpdk_flow_add_port_id_action(actions, outdev);
>>>> +    netdev_close(outdev);
>>>> +
>>>> +    return 0;
>>>> +}
>>>> +
>>>>    int
>>>>    netdev_dpdk_flow_actions_add(struct flow_actions *actions,
>>>>                                 struct nlattr *nl_actions,
>>>>                                 size_t nl_actions_len,
>>>> -                             struct offload_info *info OVS_UNUSED)
>>>> +                             struct offload_info *info)
>>>>    {
>>>>        struct nlattr *nla;
>>>>        size_t left;
>>>>
>>>>        NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
>>>> -        VLOG_DBG_RL(&error_rl,
>>>> -                    "Unsupported action type %d", nl_attr_type(nla));
>>>> -        return -1;
>>>> +        if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT &&
>>>> +            left <= NLA_ALIGN(nla->nla_len)) {
>>>> +            if (netdev_dpdk_flow_add_output_action(actions, nla, info )) {
>>>> +                return -1;
>>>> +            }
>>> The check in netdev_dpdk_flow_add_output_action() is insufficient to
>>> decide whether the device is capable of full-offload, since it assumes
>>> that the output action is supported if netdev_dpdk type is
>>> DPDK_DEV_ETH.
>>>
>>> There are a couple of issues with this approach:
>>>
>>> 1. For a device that is not capable of full-offload, it results in 2
>>> calls to the PMD for every flow-offload request. The first attempt for
>>> full-offload fails, followed by another call for partial-offload.
>> We cannot avoid this, as we want to do full offload, and only fallback
>> to partial if it doesn't work.
>>> 2. Even for devices that support full-offload, the offload might fail
>>> if the in-port and out-port are not in the same switch-domain. For
>>> example, if only partial-offload is configured (vhost-user ports) in
>>> the vSwitch, then the PMD should check if the in-port and out-port are
>>> on the same switch (domain-id must match) and fail the request
>>> otherwise. This check is important to alert the user of
>>> misconfigurations. If OVS doesn't do this check, all pmd drivers will
>>> have to do this check and that duplication is not desirable.
>> I don't think it's in the OVS scope to do such checks, but it should be
>> in the driver's level (PMD), the same way it is in the kernel offloading
>> using TC.
>>
>> I agree that the argument of logging is important, but this is something
>> that I think out of scope of this series. All prints in DPDK go to
>> /dev/null, and not logged into OVS' log.
> This is not correct.  DPDK logs are redirected to OVS log.  So, if DPDK driver
> uses DPDK logging properly, all the messages should go to ovs-vswitchd.log
> depending on the configured log levels.
OK, so in relevance to the previous comment, if the DPDK PMD logs 
(properly) a message that it is not the same HW, the user will be 
alerted of possible invalid configuration.
>
>> BTW, in kernel, there is a similar issue with drivers that logs using
>> extack. Those messages are not dumped to dmesg, and OVS also doesn't
>> read them to its log. Also out of scope of this series...
>>
>>> Here's how this could be addressed, by identifying whether the device
>>> is capable of full-offload or partial-offload.
>>>
>>> 1. Check if the target device (in-port) is full-offload capable. This
>>> could be done by checking if rte_flow api is supported, along with a
>>> valid switch-domain-id. We could even improve this further, similar to
>>> the kernel switchdev framework in which the device mode is
>>> configurable (legacy/eswitch modes) and available to consumers, but
>>> this might require some more DPDK framework changes.
>> I think we can enhance to check if both are ETH. However, As I wrote
>> above, I think checking other HW properties should not be in OVS scope,
>> but in driver's domain.
When I try to implement it, I see the netdev_dpdk_flow_api_supported API 
checks exactly this. Could you please explain again why its usage is 
restricted to init phase only?
>>> 2. If the device is full-offload capable, and if output action is
>>> specified,  check if in-port and out-port are on the same switch
>>> (switch-domain-id match mentioned above).
>>>
>>> If both 1 and 2 succeed, then the device is capable of full-offload
>>> and we can proceed with full-offload item/action setup, followed by
>>> rte_flow_create().
>>>
>>> Otherwise, we fallback to partial-offload code path. Here, we can
>>> handle the case where a vhost-user interface is either the in-port or
>>> out-port of the flow being offloaded. Depending on whether the
>>> vhost-user interface is the in-port or out-port, if the other port
>>> associated with the flow is offload capable (DPDK_DEV_ETH), then we
>>> offload the flow to that device. This would help us build further
>>> extensions to the existing partial-offload framework by adding more
>>> actions such as VLAN and Tunnel actions (apart from MARK/RSS), except
>>> the output action that still needs to be processed in software.
>>>
>>> Thanks,
>>> -Harsha
>>>
>>>> +        } else {
>>>> +            VLOG_DBG_RL(&error_rl,
>>>> +                        "Unsupported action type %d", nl_attr_type(nla));
>>>> +            return -1;
>>>> +        }
>>>>        }
>>>>
>>>>        add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
>>>> --
>>>> 2.14.5
>>>>
>>>> _______________________________________________
>>>> dev mailing list
>>>> d...@openvswitch.org
>>>> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&amp;data=02%7C01%7Celibr%40mellanox.com%7Ccddf11ececd44fe5046c08d778d08a98%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C637110709393981707&amp;sdata=rTGgfKo%2F6%2Faf7BIwBZi962tbyBBR97WGWgQunYhyGek%3D&amp;reserved=0
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to