On 25 December 2016 at 03:39, Paul Blakey <pa...@mellanox.com> wrote: > While dumping flows, dump flows that were offloaded to > netdev and parse them back to dpif flow. > > Signed-off-by: Paul Blakey <pa...@mellanox.com> > Reviewed-by: Roi Dayan <r...@mellanox.com> > --- > lib/dpif-netlink.c | 179 > +++++++++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 179 insertions(+) > > diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c > index 36f2888..3d8940e 100644 > --- a/lib/dpif-netlink.c > +++ b/lib/dpif-netlink.c > @@ -38,6 +38,7 @@ > #include "flow.h" > #include "fat-rwlock.h" > #include "netdev.h" > +#include "netdev-provider.h" > #include "netdev-linux.h" > #include "netdev-vport.h" > #include "netlink-conntrack.h" > @@ -55,6 +56,7 @@ > #include "unaligned.h" > #include "util.h" > #include "openvswitch/vlog.h" > +#include "openvswitch/match.h" > > VLOG_DEFINE_THIS_MODULE(dpif_netlink); > #ifdef _WIN32 > @@ -68,6 +70,8 @@ enum { MAX_PORTS = USHRT_MAX }; > * missing if we have old headers. */ > #define ETH_FLAG_LRO (1 << 15) /* LRO is enabled */ > > +#define FLOW_DUMP_MAX_BATCH 50 > + > struct dpif_netlink_dp { > /* Generic Netlink header. */ > uint8_t cmd; > @@ -1355,6 +1359,10 @@ struct dpif_netlink_flow_dump { > struct dpif_flow_dump up; > struct nl_dump nl_dump; > atomic_int status; > + struct netdev_flow_dump **netdev_dumps; > + int netdev_num; > + int netdev_given; > + struct ovs_mutex netdev_lock;
Could you add a brief comment above these variables that describes their use? (It's also common in OVS code to mention that, eg, netdev_lock protects the following elements. ) If there's a more descriptive name than "netdev_num", like netdev_max_dumps or something then please use that instead. At a glance, "given" and "num" don't provide particularly much context about how they relate to each other or to the dump. > }; > > static struct dpif_netlink_flow_dump * > @@ -1363,6 +1371,34 @@ dpif_netlink_flow_dump_cast(struct dpif_flow_dump > *dump) > return CONTAINER_OF(dump, struct dpif_netlink_flow_dump, up); > } > > +static void start_netdev_dump(const struct dpif *dpif_, > + struct dpif_netlink_flow_dump *dump) { > + > + if (!netdev_flow_api_enabled) { > + dump->netdev_num = 0; > + return; > + } Typically for style we still place all variable declarations at the top of a function, in a christmas tree long lines to short lines, before functional code like this. > + > + struct netdev_list_element *element; > + struct ovs_list port_list; > + int ports = netdev_hmap_port_get_list(dpif_->dpif_class, &port_list); > + int i = 0; > + > + dump->netdev_dumps = > + ports ? xzalloc(sizeof(struct netdev_flow_dump *) * ports) : 0; Can this be sizeof(dump->netdev_dumps)? > + dump->netdev_num = ports; > + dump->netdev_given = 0; > + > + LIST_FOR_EACH(element, node, &port_list) { > + dump->netdev_dumps[i] = netdev_flow_dump_create(element->netdev); > + dump->netdev_dumps[i]->port = element->port_no; > + i++; > + } As a matter of style, it's easier to see that this loop is bounded by 'ports' (and that number is correct) if it's structured as for (i = 0; i < ports; i++) { element = get_next_node; ... } Also, it seems that even if the netdev doesn't support flow_dump, we allocate a netdev_flow_dump and add it to the netdev_dumps here.. perhaps we could/should skip it for these netdevs instead? > + netdev_port_list_del(&port_list); > + > + ovs_mutex_init(&dump->netdev_lock); I don't see a corresponding ovs_mutex_destroy() call for this. > +} > + > static struct dpif_flow_dump * > dpif_netlink_flow_dump_create(const struct dpif *dpif_, bool terse) > { > @@ -1387,6 +1423,8 @@ dpif_netlink_flow_dump_create(const struct dpif *dpif_, > bool terse) > atomic_init(&dump->status, 0); > dump->up.terse = terse; > > + start_netdev_dump(dpif_, dump); > + > return &dump->up; > } > > @@ -1397,6 +1435,16 @@ dpif_netlink_flow_dump_destroy(struct dpif_flow_dump > *dump_) > unsigned int nl_status = nl_dump_done(&dump->nl_dump); > int dump_status; > > + if (netdev_flow_api_enabled) { > + for (int i = 0; i < dump->netdev_num; i++) { > + int err = netdev_flow_dump_destroy(dump->netdev_dumps[i]); > + if (err != 0 && err != EOPNOTSUPP) { > + VLOG_ERR("failed dumping netdev: %s", ovs_strerror(err)); > + } > + } > + free(dump->netdev_dumps); > + } You don't really need to check for netdev_flow_api_enabled here; netdev_num will be 0 if it is disabled, so that for loop turns into a no-op; then you could initialize dump->netdev_dumps to NULL in that case and unconditionally free it. It's a bit simpler to read the code if you don't have to think about whether or not hardware offloads are enabled. > + > /* No other thread has access to 'dump' at this point. */ > atomic_read_relaxed(&dump->status, &dump_status); > free(dump); > @@ -1410,6 +1458,11 @@ struct dpif_netlink_flow_dump_thread { > struct dpif_flow_stats stats; > struct ofpbuf nl_flows; /* Always used to store flows. */ > struct ofpbuf *nl_actions; /* Used if kernel does not supply actions. */ > + struct odputil_keybuf keybuf[FLOW_DUMP_MAX_BATCH]; > + struct odputil_keybuf maskbuf[FLOW_DUMP_MAX_BATCH]; > + struct odputil_keybuf actbuf[FLOW_DUMP_MAX_BATCH]; > + int netdev_cur_dump; > + bool netdev_done; I wonder if it's worthwhile to reuse 'nl_flows' to store all of these netlink-formatted key/mask/acts instead of having these keybufs? It seems that it is currently unused for the first half of the dpif_netlink_flow_dump while the flows are being dumped from the netdev. Regardless of the above question, I also question whether FLOW_DUMP_MAX_BATCH is too big for dumping from the kernel. How many tc flows will we really get from the kernel at once? > }; > > static struct dpif_netlink_flow_dump_thread * > @@ -1429,6 +1482,8 @@ dpif_netlink_flow_dump_thread_create(struct > dpif_flow_dump *dump_) > thread->dump = dump; > ofpbuf_init(&thread->nl_flows, NL_DUMP_BUFSIZE); > thread->nl_actions = NULL; > + thread->netdev_cur_dump = 0; > + thread->netdev_done = !(thread->netdev_cur_dump < dump->netdev_num); > > return &thread->up; > } > @@ -1466,6 +1521,90 @@ dpif_netlink_flow_to_dpif_flow(struct dpif *dpif, > struct dpif_flow *dpif_flow, > dpif_netlink_flow_get_stats(datapath_flow, &dpif_flow->stats); > } > > +static void > +dpif_netlink_advance_netdev_dump(struct dpif_netlink_flow_dump_thread > *thread) > +{ > + struct dpif_netlink_flow_dump *dump = thread->dump; > + > + ovs_mutex_lock(&dump->netdev_lock); > + /* if we haven't finished (dumped everything) */ > + if (dump->netdev_given < dump->netdev_num) { > + /* if we are the first to find that given dump is finished > + * (for race condition, e.g 3 finish dump 0 at the same time) */ Why is there a race condition here if this is executed under netdev_lock? > + if (thread->netdev_cur_dump == dump->netdev_given) { > + thread->netdev_cur_dump = ++dump->netdev_given; > + /* did we just finish the last dump? done. */ > + if (dump->netdev_given == dump->netdev_num) { > + thread->netdev_done = true; > + } > + } else { > + /* otherwise, we are behind, catch up */ > + thread->netdev_cur_dump = dump->netdev_given; > + } > + } else { > + /* some other thread finished */ > + thread->netdev_done = true; > + } > + ovs_mutex_unlock(&dump->netdev_lock); > +} > + > +static struct odp_support netdev_flow_support = { > + .max_mpls_depth = SIZE_MAX, > + .recirc = false, > + .ct_state = false, > + .ct_zone = false, > + .ct_mark = false, > + .ct_label = false, > +}; > + > +static int > +dpif_netlink_netdev_match_to_dpif_flow(struct match *match, > + struct ofpbuf *key_buf, > + struct ofpbuf *mask_buf, > + struct nlattr *actions, > + struct dpif_flow_stats *stats, > + ovs_u128 *ufid, > + struct dpif_flow *flow, > + bool terse OVS_UNUSED) > +{ > + > + struct odp_flow_key_parms odp_parms = { > + .flow = &match->flow, > + .mask = &match->wc.masks, > + .support = netdev_flow_support, There's also 'key_buf' field in parms that may be needed. > + }; > + size_t offset; > + > + memset(flow, 0, sizeof *flow); > + > + /* Key */ > + offset = key_buf->size; > + flow->key = ofpbuf_tail(key_buf); > + odp_flow_key_from_flow(&odp_parms, key_buf); > + flow->key_len = key_buf->size - offset; > + > + /* Mask */ > + offset = mask_buf->size; > + flow->mask = ofpbuf_tail(mask_buf); > + odp_parms.key_buf = key_buf; > + odp_flow_key_from_mask(&odp_parms, mask_buf); > + flow->mask_len = mask_buf->size - offset; > + > + /* Actions */ > + flow->actions = nl_attr_get(actions); > + flow->actions_len = nl_attr_get_size(actions); > + > + /* Stats */ > + memcpy(&flow->stats, stats, sizeof *stats); > + > + /* UFID */ > + flow->ufid_present = true; > + flow->ufid = *ufid; > + > + flow->pmd_id = PMD_ID_NULL; > + return 0; > +} > + > static int > dpif_netlink_flow_dump_next(struct dpif_flow_dump_thread *thread_, > struct dpif_flow *flows, int max_flows) > @@ -1475,11 +1614,51 @@ dpif_netlink_flow_dump_next(struct > dpif_flow_dump_thread *thread_, > struct dpif_netlink_flow_dump *dump = thread->dump; > struct dpif_netlink *dpif = dpif_netlink_cast(thread->up.dpif); > int n_flows; > + int i = 0; > > ofpbuf_delete(thread->nl_actions); > thread->nl_actions = NULL; > > n_flows = 0; > + > + while (!thread->netdev_done && n_flows < max_flows > + && i < FLOW_DUMP_MAX_BATCH) { > + struct odputil_keybuf *maskbuf = &thread->maskbuf[i]; > + struct odputil_keybuf *keybuf = &thread->keybuf[i]; > + struct odputil_keybuf *actbuf = &thread->actbuf[i]; > + struct ofpbuf key, mask, act; > + struct dpif_flow *f = &flows[n_flows]; > + int cur = thread->netdev_cur_dump; > + struct netdev_flow_dump *netdev_dump = dump->netdev_dumps[cur]; > + struct match match; > + struct nlattr *actions; > + struct dpif_flow_stats stats; > + ovs_u128 ufid; > + bool has_next; > + > + ofpbuf_use_stack(&key, keybuf, sizeof *keybuf); > + ofpbuf_use_stack(&act, actbuf, sizeof *actbuf); > + ofpbuf_use_stack(&mask, maskbuf, sizeof *maskbuf); > + has_next = netdev_flow_dump_next(netdev_dump, &match, > + &actions, &stats, > + &ufid, > + &thread->nl_flows, > + &act); > + if (has_next) { > + dpif_netlink_netdev_match_to_dpif_flow(&match, > + &key, &mask, > + actions, > + &stats, > + &ufid, > + f, > + dump->up.terse); > + n_flows++; > + i++; Seems like 'i' and 'n_flows' are trying to achieve the same objective. Can we just drop 'i'? > + } else { > + dpif_netlink_advance_netdev_dump(thread); > + } > + } > + > while (!n_flows > || (n_flows < max_flows && thread->nl_flows.size)) { > struct dpif_netlink_flow datapath_flow; > -- > 1.8.3.1 > _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev