On Wed, Dec 4, 2019 at 8:55 PM Eli Britstein <el...@mellanox.com> wrote:
>
>
> On 12/3/2019 5:19 PM, Sriharsha Basavapatna wrote:
> > On Wed, Nov 20, 2019 at 9:07 PM Eli Britstein <el...@mellanox.com> wrote:
> >> Signed-off-by: Eli Britstein <el...@mellanox.com>
> >> Reviewed-by: Oz Shlomo <o...@mellanox.com>
> >> ---
> >>   NEWS                           |  2 +
> >>   lib/netdev-offload-dpdk-flow.c | 87 
> >> ++++++++++++++++++++++++++++++++++++++++--
> >>   2 files changed, 85 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/NEWS b/NEWS
> >> index 88b818948..ca9c2b230 100644
> >> --- a/NEWS
> >> +++ b/NEWS
> >> @@ -10,6 +10,8 @@ Post-v2.12.0
> >>          if supported by libbpf.
> >>        * Add option to enable, disable and query TCP sequence checking in
> >>          conntrack.
> >> +   - DPDK:
> >> +     * Add hardware offload support for output actions.
> >>
> >>   v2.12.0 - 03 Sep 2019
> >>   ---------------------
> >> diff --git a/lib/netdev-offload-dpdk-flow.c 
> >> b/lib/netdev-offload-dpdk-flow.c
> >> index dbbc45e99..6e7efb315 100644
> >> --- a/lib/netdev-offload-dpdk-flow.c
> >> +++ b/lib/netdev-offload-dpdk-flow.c
> >> @@ -274,6 +274,28 @@ ds_put_flow_action(struct ds *s, const struct 
> >> rte_flow_action *actions)
> >>           } else {
> >>               ds_put_cstr(s, "  RSS = null\n");
> >>           }
> >> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
> >> +        const struct rte_flow_action_count *count = actions->conf;
> >> +
> >> +        ds_put_cstr(s, "rte flow count action:\n");
> >> +        if (count) {
> >> +            ds_put_format(s,
> >> +                          "  Count: shared=%d, id=%d\n",
> >> +                          count->shared, count->id);
> >> +        } else {
> >> +            ds_put_cstr(s, "  Count = null\n");
> >> +        }
> >> +    } else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
> >> +        const struct rte_flow_action_port_id *port_id = actions->conf;
> >> +
> >> +        ds_put_cstr(s, "rte flow port-id action:\n");
> >> +        if (port_id) {
> >> +            ds_put_format(s,
> >> +                          "  Port-id: original=%d, id=%d\n",
> >> +                          port_id->original, port_id->id);
> >> +        } else {
> >> +            ds_put_cstr(s, "  Port-id = null\n");
> >> +        }
> >>       } else {
> >>           ds_put_format(s, "unknown rte flow action (%d)\n", 
> >> actions->type);
> >>       }
> >> @@ -531,19 +553,76 @@ netdev_dpdk_flow_patterns_add(struct flow_patterns 
> >> *patterns,
> >>       return 0;
> >>   }
> >>
> >> +static void
> >> +netdev_dpdk_flow_add_count_action(struct flow_actions *actions)
> >> +{
> >> +    struct rte_flow_action_count *count = xzalloc(sizeof *count);
> >> +
> >> +    count->shared = 0;
> >> +    /* Each flow has a single count action, so no need of id */
> >> +    count->id = 0;
> >> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
> >> +}
> >> +
> >> +static void
> >> +netdev_dpdk_flow_add_port_id_action(struct flow_actions *actions,
> >> +                                    struct netdev *outdev)
> >> +{
> >> +    struct rte_flow_action_port_id *port_id = xzalloc(sizeof *port_id);
> >> +
> >> +    port_id->id = netdev_dpdk_get_port_id(outdev);
> >> +    add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
> >> +}
> >> +
> >> +static int
> >> +netdev_dpdk_flow_add_output_action(struct flow_actions *actions,
> >> +                                   const struct nlattr *nla,
> >> +                                   struct offload_info *info)
> >> +{
> >> +    struct netdev *outdev;
> >> +    odp_port_t port;
> >> +
> >> +    port = nl_attr_get_odp_port(nla);
> >> +    outdev = netdev_ports_get(port, info->dpif_class);
> >> +    if (outdev == NULL) {
> >> +        VLOG_DBG_RL(&error_rl,
> >> +                    "Cannot find netdev for odp port %d", port);
> >> +        return -1;
> >> +    }
> >> +    if (!netdev_dpdk_flow_api_supported(outdev)) {
> >> +        VLOG_DBG_RL(&error_rl,
> >> +                    "Output to %s cannot be offloaded",
> >> +                    netdev_get_name(outdev));
> >> +        return -1;
> >> +    }
> >> +
> >> +    netdev_dpdk_flow_add_count_action(actions);
> >> +    netdev_dpdk_flow_add_port_id_action(actions, outdev);
> >> +    netdev_close(outdev);
> >> +
> >> +    return 0;
> >> +}
> >> +
> >>   int
> >>   netdev_dpdk_flow_actions_add(struct flow_actions *actions,
> >>                                struct nlattr *nl_actions,
> >>                                size_t nl_actions_len,
> >> -                             struct offload_info *info OVS_UNUSED)
> >> +                             struct offload_info *info)
> >>   {
> >>       struct nlattr *nla;
> >>       size_t left;
> >>
> >>       NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
> >> -        VLOG_DBG_RL(&error_rl,
> >> -                    "Unsupported action type %d", nl_attr_type(nla));
> >> -        return -1;
> >> +        if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT &&
> >> +            left <= NLA_ALIGN(nla->nla_len)) {
> >> +            if (netdev_dpdk_flow_add_output_action(actions, nla, info )) {
> >> +                return -1;
> >> +            }
> > The check in netdev_dpdk_flow_add_output_action() is insufficient to
> > decide whether the device is capable of full-offload, since it assumes
> > that the output action is supported if netdev_dpdk type is
> > DPDK_DEV_ETH.
> >
> > There are a couple of issues with this approach:
> >
> > 1. For a device that is not capable of full-offload, it results in 2
> > calls to the PMD for every flow-offload request. The first attempt for
> > full-offload fails, followed by another call for partial-offload.
> We cannot avoid this, as we want to do full offload, and only fallback
> to partial if it doesn't work.
> >
> > 2. Even for devices that support full-offload, the offload might fail
> > if the in-port and out-port are not in the same switch-domain. For
> > example, if only partial-offload is configured (vhost-user ports) in
> > the vSwitch, then the PMD should check if the in-port and out-port are
> > on the same switch (domain-id must match) and fail the request
> > otherwise. This check is important to alert the user of
> > misconfigurations. If OVS doesn't do this check, all pmd drivers will
> > have to do this check and that duplication is not desirable.
>
> I don't think it's in the OVS scope to do such checks, but it should be
> in the driver's level (PMD), the same way it is in the kernel offloading
> using TC.
>

IMO, it is not out-of-scope for OVS to add these checks to make better
offload decisions:
- This layer of OVS (netdev-offload-dpdk) is already offload-aware and
it is using a set of offload APIs (rte_flow)
- Full offload may not be the preferred configuration for a customer
(since it requires additional config like SRIOV). That is, the code
should not make the distinction that full-offload is always preferred
and partial-offload is only a fallback option.
- Without such checks, offload rate of partial-offload configuration
could be potentially impacted with a large number of flows.
- Similar improvement (checking/logging in OVS as opposed to
individual drivers) could be made in TC kernel offloads too

> I agree that the argument of logging is important, but this is something
> that I think out of scope of this series. All prints in DPDK go to
> /dev/null, and not logged into OVS' log.
>
> BTW, in kernel, there is a similar issue with drivers that logs using
> extack. Those messages are not dumped to dmesg, and OVS also doesn't
> read them to its log. Also out of scope of this series...
>
> >
> > Here's how this could be addressed, by identifying whether the device
> > is capable of full-offload or partial-offload.
> >
> > 1. Check if the target device (in-port) is full-offload capable. This
> > could be done by checking if rte_flow api is supported, along with a
> > valid switch-domain-id. We could even improve this further, similar to
> > the kernel switchdev framework in which the device mode is
> > configurable (legacy/eswitch modes) and available to consumers, but
> > this might require some more DPDK framework changes.
> I think we can enhance to check if both are ETH. However, As I wrote
> above, I think checking other HW properties should not be in OVS scope,
> but in driver's domain.
> >
> > 2. If the device is full-offload capable, and if output action is
> > specified,  check if in-port and out-port are on the same switch
> > (switch-domain-id match mentioned above).
> >
> > If both 1 and 2 succeed, then the device is capable of full-offload
> > and we can proceed with full-offload item/action setup, followed by
> > rte_flow_create().
> >
> > Otherwise, we fallback to partial-offload code path. Here, we can
> > handle the case where a vhost-user interface is either the in-port or
> > out-port of the flow being offloaded. Depending on whether the
> > vhost-user interface is the in-port or out-port, if the other port
> > associated with the flow is offload capable (DPDK_DEV_ETH), then we
> > offload the flow to that device. This would help us build further
> > extensions to the existing partial-offload framework by adding more
> > actions such as VLAN and Tunnel actions (apart from MARK/RSS), except
> > the output action that still needs to be processed in software.
> >
> > Thanks,
> > -Harsha
> >
> >> +        } else {
> >> +            VLOG_DBG_RL(&error_rl,
> >> +                        "Unsupported action type %d", nl_attr_type(nla));
> >> +            return -1;
> >> +        }
> >>       }
> >>
> >>       add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
> >> --
> >> 2.14.5
> >>
> >> _______________________________________________
> >> dev mailing list
> >> d...@openvswitch.org
> >> https://eur03.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&amp;data=02%7C01%7Celibr%40mellanox.com%7C3397e2af98f74b1db43808d778043f5d%7Ca652971c7d2e4d9ba6a4d149256f461b%7C0%7C0%7C637109831952593096&amp;sdata=NvEUyLaCDekjbTOFO1vRRteSUBkSkFrruVLMJas28OY%3D&amp;reserved=0
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to