Re: [ovs-dev] [PATCH ovs V2 09/21] dpif-netlink: Dump netdevs flows on flow dump

Joe Stringer Thu, 05 Jan 2017 13:28:53 -0800

On 25 December 2016 at 03:39, Paul Blakey <pa...@mellanox.com> wrote:
> While dumping flows, dump flows that were offloaded to
> netdev and parse them back to dpif flow.
>
> Signed-off-by: Paul Blakey <pa...@mellanox.com>
> Reviewed-by: Roi Dayan <r...@mellanox.com>
> ---
>  lib/dpif-netlink.c | 179 
> +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 179 insertions(+)
>
> diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
> index 36f2888..3d8940e 100644
> --- a/lib/dpif-netlink.c
> +++ b/lib/dpif-netlink.c
> @@ -38,6 +38,7 @@
>  #include "flow.h"
>  #include "fat-rwlock.h"
>  #include "netdev.h"
> +#include "netdev-provider.h"
>  #include "netdev-linux.h"
>  #include "netdev-vport.h"
>  #include "netlink-conntrack.h"
> @@ -55,6 +56,7 @@
>  #include "unaligned.h"
>  #include "util.h"
>  #include "openvswitch/vlog.h"
> +#include "openvswitch/match.h"
>
>  VLOG_DEFINE_THIS_MODULE(dpif_netlink);
>  #ifdef _WIN32
> @@ -68,6 +70,8 @@ enum { MAX_PORTS = USHRT_MAX };
>   * missing if we have old headers. */
>  #define ETH_FLAG_LRO      (1 << 15)    /* LRO is enabled */
>
> +#define FLOW_DUMP_MAX_BATCH 50
> +
>  struct dpif_netlink_dp {
>      /* Generic Netlink header. */
>      uint8_t cmd;
> @@ -1355,6 +1359,10 @@ struct dpif_netlink_flow_dump {
>      struct dpif_flow_dump up;
>      struct nl_dump nl_dump;
>      atomic_int status;
> +    struct netdev_flow_dump **netdev_dumps;
> +    int netdev_num;
> +    int netdev_given;
> +    struct ovs_mutex netdev_lock;


Could you add a brief comment above these variables that describes
their use? (It's also common in OVS code to mention that, eg,
netdev_lock protects the following elements. )

If there's a more descriptive name than "netdev_num", like
netdev_max_dumps or something then please use that instead. At a
glance, "given" and "num" don't provide particularly much context
about how they relate to each other or to the dump.

>  };
>
>  static struct dpif_netlink_flow_dump *
> @@ -1363,6 +1371,34 @@ dpif_netlink_flow_dump_cast(struct dpif_flow_dump 
> *dump)
>      return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up);
>  }
>
> +static void start_netdev_dump(const struct dpif *dpif_,
> +                              struct dpif_netlink_flow_dump *dump) {
> +
> +    if (!netdev_flow_api_enabled) {
> +        dump->netdev_num = 0;
> +        return;
> +    }

Typically for style we still place all variable declarations at the
top of a function, in a christmas tree long lines to short lines,
before functional code like this.

> +
> +    struct netdev_list_element *element;
> +    struct ovs_list port_list;
> +    int ports = netdev_hmap_port_get_list(dpif_->dpif_class, &port_list);
> +    int i = 0;
> +
> +    dump->netdev_dumps =
> +        ports ? xzalloc(sizeof(struct netdev_flow_dump *) * ports) : 0;

Can this be sizeof(dump->netdev_dumps)?

> +    dump->netdev_num = ports;
> +    dump->netdev_given = 0;
> +
> +    LIST_FOR_EACH(element, node, &port_list) {
> +        dump->netdev_dumps[i] = netdev_flow_dump_create(element->netdev);
> +        dump->netdev_dumps[i]->port = element->port_no;
> +        i++;
> +    }

As a matter of style, it's easier to see that this loop is bounded by
'ports' (and that number is correct) if it's structured as

for (i = 0; i < ports; i++) {
element = get_next_node;
...
}

Also, it seems that even if the netdev doesn't support flow_dump, we
allocate a netdev_flow_dump and add it to the netdev_dumps here..
perhaps we could/should skip it for these netdevs instead?

> +    netdev_port_list_del(&port_list);
> +
> +    ovs_mutex_init(&dump->netdev_lock);

I don't see a corresponding ovs_mutex_destroy() call for this.

> +}
> +
>  static struct dpif_flow_dump *
>  dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse)
>  {
> @@ -1387,6 +1423,8 @@ dpif_netlink_flow_dump_create(const struct dpif *dpif_, 
> bool terse)
>      atomic_init(&dump->status, 0);
>      dump->up.terse = terse;
>
> +    start_netdev_dump(dpif_, dump);
> +
>      return &dump->up;
>  }
>
> @@ -1397,6 +1435,16 @@ dpif_netlink_flow_dump_destroy(struct dpif_flow_dump 
> *dump_)
>      unsigned int nl_status = nl_dump_done(&dump->nl_dump);
>      int dump_status;
>
> +    if (netdev_flow_api_enabled) {
> +        for (int i = 0; i < dump->netdev_num; i++) {
> +            int err = netdev_flow_dump_destroy(dump->netdev_dumps[i]);
> +            if (err != 0 && err != EOPNOTSUPP) {
> +                VLOG_ERR("failed dumping netdev: %s", ovs_strerror(err));
> +            }
> +        }
> +        free(dump->netdev_dumps);
> +    }

You don't really need to check for netdev_flow_api_enabled here;
netdev_num will be 0 if it is disabled, so that for loop turns into a
no-op; then you could initialize dump->netdev_dumps to NULL in that
case and unconditionally free it. It's a bit simpler to read the code
if you don't have to think about whether or not hardware offloads are
enabled.

> +
>      /* No other thread has access to 'dump' at this point. */
>      atomic_read_relaxed(&dump->status, &dump_status);
>      free(dump);
> @@ -1410,6 +1458,11 @@ struct dpif_netlink_flow_dump_thread {
>      struct dpif_flow_stats stats;
>      struct ofpbuf nl_flows;     /* Always used to store flows. */
>      struct ofpbuf *nl_actions;  /* Used if kernel does not supply actions. */
> +    struct odputil_keybuf keybuf[FLOW_DUMP_MAX_BATCH];
> +    struct odputil_keybuf maskbuf[FLOW_DUMP_MAX_BATCH];
> +    struct odputil_keybuf actbuf[FLOW_DUMP_MAX_BATCH];
> +    int netdev_cur_dump;
> +    bool netdev_done;

I wonder if it's worthwhile to reuse 'nl_flows' to store all of these
netlink-formatted key/mask/acts instead of having these keybufs? It
seems that it is currently unused for the first half of the
dpif_netlink_flow_dump while the flows are being dumped from the
netdev.

Regardless of the above question, I also question whether
FLOW_DUMP_MAX_BATCH is too big for dumping from the kernel. How many
tc flows will we really get from the kernel at once?

>  };
>
>  static struct dpif_netlink_flow_dump_thread *
> @@ -1429,6 +1482,8 @@ dpif_netlink_flow_dump_thread_create(struct 
> dpif_flow_dump *dump_)
>      thread->dump = dump;
>      ofpbuf_init(&thread->nl_flows, NL_DUMP_BUFSIZE);
>      thread->nl_actions = NULL;
> +    thread->netdev_cur_dump = 0;
> +    thread->netdev_done = !(thread->netdev_cur_dump < dump->netdev_num);
>
>      return &thread->up;
>  }
> @@ -1466,6 +1521,90 @@ dpif_netlink_flow_to_dpif_flow(struct dpif *dpif, 
> struct dpif_flow *dpif_flow,
>      dpif_netlink_flow_get_stats(datapath_flow, &dpif_flow->stats);
>  }
>
> +static void
> +dpif_netlink_advance_netdev_dump(struct dpif_netlink_flow_dump_thread 
> *thread)
> +{
> +    struct dpif_netlink_flow_dump *dump = thread->dump;
> +
> +    ovs_mutex_lock(&dump->netdev_lock);
> +    /* if we haven't finished (dumped everything) */
> +    if (dump->netdev_given < dump->netdev_num) {
> +        /* if we are the first to find that given dump is finished
> +         * (for race condition, e.g 3 finish dump 0 at the same time) */

Why is there a race condition here if this is executed under netdev_lock?

> +        if (thread->netdev_cur_dump == dump->netdev_given) {
> +            thread->netdev_cur_dump = ++dump->netdev_given;
> +            /* did we just finish the last dump? done. */
> +            if (dump->netdev_given == dump->netdev_num) {
> +                thread->netdev_done = true;
> +            }
> +        } else {
> +            /* otherwise, we are behind, catch up */
> +            thread->netdev_cur_dump = dump->netdev_given;
> +        }
> +    } else {
> +        /* some other thread finished */
> +        thread->netdev_done = true;
> +    }
> +    ovs_mutex_unlock(&dump->netdev_lock);
> +}
> +
> +static struct odp_support netdev_flow_support = {
> +    .max_mpls_depth = SIZE_MAX,
> +    .recirc = false,
> +    .ct_state = false,
> +    .ct_zone = false,
> +    .ct_mark = false,
> +    .ct_label = false,
> +};
> +
> +static int
> +dpif_netlink_netdev_match_to_dpif_flow(struct match *match,
> +                                       struct ofpbuf *key_buf,
> +                                       struct ofpbuf *mask_buf,
> +                                       struct nlattr *actions,
> +                                       struct dpif_flow_stats *stats,
> +                                       ovs_u128 *ufid,
> +                                       struct dpif_flow *flow,
> +                                       bool terse OVS_UNUSED)
> +{
> +
> +    struct odp_flow_key_parms odp_parms = {
> +        .flow = &match->flow,
> +        .mask = &match->wc.masks,
> +        .support = netdev_flow_support,

There's also 'key_buf' field in parms that may be needed.

> +    };
> +    size_t offset;
> +
> +    memset(flow, 0, sizeof *flow);
> +
> +    /* Key */
> +    offset = key_buf->size;
> +    flow->key = ofpbuf_tail(key_buf);
> +    odp_flow_key_from_flow(&odp_parms, key_buf);
> +    flow->key_len = key_buf->size - offset;
> +
> +    /* Mask */
> +    offset = mask_buf->size;
> +    flow->mask = ofpbuf_tail(mask_buf);
> +    odp_parms.key_buf = key_buf;
> +    odp_flow_key_from_mask(&odp_parms, mask_buf);
> +    flow->mask_len = mask_buf->size - offset;
> +
> +    /* Actions */
> +    flow->actions = nl_attr_get(actions);
> +    flow->actions_len = nl_attr_get_size(actions);
> +
> +    /* Stats */
> +    memcpy(&flow->stats, stats, sizeof *stats);
> +
> +    /* UFID */
> +    flow->ufid_present = true;
> +    flow->ufid = *ufid;
> +
> +    flow->pmd_id = PMD_ID_NULL;
> +    return 0;
> +}
> +
>  static int
>  dpif_netlink_flow_dump_next(struct dpif_flow_dump_thread *thread_,
>                              struct dpif_flow *flows, int max_flows)
> @@ -1475,11 +1614,51 @@ dpif_netlink_flow_dump_next(struct 
> dpif_flow_dump_thread *thread_,
>      struct dpif_netlink_flow_dump *dump = thread->dump;
>      struct dpif_netlink *dpif = dpif_netlink_cast(thread->up.dpif);
>      int n_flows;
> +    int i = 0;
>
>      ofpbuf_delete(thread->nl_actions);
>      thread->nl_actions = NULL;
>
>      n_flows = 0;
> +
> +    while (!thread->netdev_done && n_flows < max_flows
> +           && i < FLOW_DUMP_MAX_BATCH) {
> +        struct odputil_keybuf *maskbuf = &thread->maskbuf[i];
> +        struct odputil_keybuf *keybuf = &thread->keybuf[i];
> +        struct odputil_keybuf *actbuf = &thread->actbuf[i];
> +        struct ofpbuf key, mask, act;
> +        struct dpif_flow *f = &flows[n_flows];
> +        int cur = thread->netdev_cur_dump;
> +        struct netdev_flow_dump *netdev_dump = dump->netdev_dumps[cur];
> +        struct match match;
> +        struct nlattr *actions;
> +        struct dpif_flow_stats stats;
> +        ovs_u128 ufid;
> +        bool has_next;
> +
> +        ofpbuf_use_stack(&key, keybuf, sizeof *keybuf);
> +        ofpbuf_use_stack(&act, actbuf, sizeof *actbuf);
> +        ofpbuf_use_stack(&mask, maskbuf, sizeof *maskbuf);
> +        has_next = netdev_flow_dump_next(netdev_dump, &match,
> +                                        &actions, &stats,
> +                                        &ufid,
> +                                        &thread->nl_flows,
> +                                        &act);
> +        if (has_next) {
> +            dpif_netlink_netdev_match_to_dpif_flow(&match,
> +                                                   &key, &mask,
> +                                                   actions,
> +                                                   &stats,
> +                                                   &ufid,
> +                                                   f,
> +                                                   dump->up.terse);
> +            n_flows++;
> +            i++;

Seems like 'i' and 'n_flows' are trying to achieve the same objective.
Can we just drop 'i'?

> +        } else {
> +            dpif_netlink_advance_netdev_dump(thread);
> +        }
> +    }
> +
>      while (!n_flows
>             || (n_flows < max_flows && thread->nl_flows.size)) {
>          struct dpif_netlink_flow datapath_flow;
> --
> 1.8.3.1
>
_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Re: [ovs-dev] [PATCH ovs V2 09/21] dpif-netlink: Dump netdevs flows on flow dump

Reply via email to